def get_logger(name, level): """ Creates loggers Parameters ---------- name : string name of the log file level : string level of debugging Returns ------- logger : logger created loggers """ logger = logging.getLogger(name) logger.setLevel(level) check_dir_exists(ModelLearn.get_logger_path()) fh = logging.FileHandler(ModelLearn.get_logger_path() + name + '.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) return logger
def export_train(name, Xtrain, Ytrain, export_X=False): """ Exports training data into a csv file Parameters ---------- name : string name of file Xtrain : ndarray X of training data Ytrain : ndarray Y of training data export_X : boolean whether to export 'X'. If False, only ``Ytrain`` will be exported :return: None """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'train_' header = ['Y%d,' % (j) for j in range(Ytrain.shape[1])] data = None if export_X: data = np.hstack((Ytrain, Xtrain)) header += ['X%d,' % (j) for j in range(Xtrain.shape[1])] else: data = Ytrain np.savetxt(path + file_name + '.csv', data, header=''.join(header), delimiter=',', comments='')
def init_logger(name, output_to_disk=True): """ Initialize the logger and the information necessary to save the model. Parameters ---------- name : str The name of the experiment currently being run. """ global logger global _log_folder_path # Configure the logger. logger = logging.getLogger(name) logger.setLevel(LOG_LEVEL) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') if output_to_disk: # Add a file handler to log to disk. log_start_time = datetime.datetime.now() folder_name = name + '_' + log_start_time.strftime('%d-%b-%Y_%Hh%Mm%Ss') + '_%d' % os.getpid() _log_folder_path = os.path.join(OUTPUT_PATH, folder_name) util.check_dir_exists(_log_folder_path) file_handler = logging.FileHandler(os.path.join(_log_folder_path, name + '.log')) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(formatter) logger.addHandler(file_handler) # Add a stream handler to log to stdout. stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.DEBUG) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler)
def get_logger(path, name, level): """ Creates loggers Parameters ---------- path : string path for save the log file in name : string name of the log file level : string level of debugging Returns ------- logger : logger created loggers """ logger = logging.getLogger(name) logger.setLevel(level) check_dir_exists(path) fh = logging.FileHandler(path + '/'+ name + '.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) return logger
def export_test(name, X, Ytrue, Ypred, Yvar_pred, nlpd, pred_names=[''], export_X=False): """ Exports test data and the predictions into a csv file Parameters ---------- name : string name of the file X : ndarray X test for which prediction have been made Ytrue : ndarray The true values of 'Y' Ypred : ndarray Predictions at the test points Yvar_pred : ndarray Variance of the prediction nlpd : ndarray NLPD of the predictions pred_names : list not necessary. It should be [''] export_X : boolean Whether to export 'X' to the csv file. If False, 'X' will not be exported into the csv file (useful in large datasets). """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'test_' out = [] out.append(Ytrue) out += Ypred out += Yvar_pred out += [nlpd] header = ['Ytrue%d,' % (j) for j in range(Ytrue.shape[1])] + \ ['Ypred_%s_%d,' % (m, j) for m in pred_names for j in range(Ypred[0].shape[1])] + \ ['Yvar_pred_%s_%d,' % (m, j) for m in pred_names for j in range(Yvar_pred[0].shape[1])] + \ ['nlpd,'] + ['NLPD_%d,' % (j) for j in range(nlpd.shape[1] - 1)] if export_X: out.append(X) header += ['X%d,' % (j) for j in range(X.shape[1])] header = ''.join(header) out = np.hstack(out) np.savetxt(path + file_name + '.csv', out , header=header , delimiter=',', comments='')
def callback(model, current_iter, total_evals, delta_m, delta_s, obj_track): path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) pickle.dump(model.image(), open(path + 'model.dump', 'w')) pickle.dump({ 'current_iter': current_iter, 'total_evals': total_evals, 'delta_m': delta_m, 'delta_s': delta_s, 'obj_track': obj_track, 'obj_fun': model.objective_function() }, open(path + 'opt.dump', 'w'))
def callback(model, current_iter, total_evals, delta_m, delta_s, obj_track): path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) pickle.dump(model.image(), open(path + 'model.dump', 'w')) pickle.dump( { 'current_iter': current_iter, 'total_evals': total_evals, 'delta_m': delta_m, 'delta_s': delta_s, 'obj_track': obj_track, 'obj_fun': model.objective_function() }, open(path + 'opt.dump', 'w'))
def export_track(name, track): """ exports trajectory of the objective function Parameters ---------- name : string name of the file to which track will be exported track : list trajectory of the objective function """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'obj_track_' np.savetxt(path + file_name + '.csv', np.array([track]).T, header='objective' , delimiter=',', comments='')
def export_configuration(name, config): """ Exports configuration of the model as well as optimisation parameters to a csv file Parameters ---------- name : string Name of the file config : dictionary Configuration to be exported """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = path + 'config_' + '.csv' with open(file_name, 'wb') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=config.keys()) writer.writeheader() writer.writerow(config)
def export_track(name, track): """ exports trajectory of the objective function Parameters ---------- name : string name of the file to which track will be exported track : list trajectory of the objective function """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'obj_track_' np.savetxt(path + file_name + '.csv', np.array([track]).T, header='objective', delimiter=',', comments='')
def __init__(self, id, request, state): Step.__init__(self, id, request, state) self.results = [self._is_not_installed(), util.check_python_version((2, 3, 0, '', 0)), util.check_dir_exists(config.data_dir), util.check_is_writable(config.data_dir), util.check_python_module_exists('pywsgi'), util.check_python_module_exists('SpiffGuard'), util.check_python_module_exists('SpiffIntegrator'), util.check_python_module_exists('SpiffSignal'), util.check_python_module_exists('SpiffWarehouse'), util.check_python_module_exists('SpiffWikiMarkup')] self.failed = False in [r for n, r, e in self.results]
def export_model(model, name): """ exports Model into a csv file Parameters ---------- model : model the model to be exported name : string name of the csv file """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'model_' if model is not None: with open(path + file_name + '.csv', 'w') as fp: f = csv.writer(fp, delimiter=',') f.writerow(['#model', model.__class__]) params = model.get_all_params() param_names = model.get_all_param_names() for j in range(len(params)): f.writerow([param_names[j], params[j]])
def init_logger(name, output_to_disk=True): """ Initialize the logger and the information necessary to save the model. Parameters ---------- name : str The name of the experiment currently being run. """ global logger global _log_folder_path # Configure the logger. logger = logging.getLogger(name) logger.setLevel(LOG_LEVEL) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') if output_to_disk: # Add a file handler to log to disk. log_start_time = datetime.datetime.now() folder_name = name + '_' + log_start_time.strftime( '%d-%b-%Y_%Hh%Mm%Ss') + '_%d' % os.getpid() _log_folder_path = os.path.join(OUTPUT_PATH, folder_name) util.check_dir_exists(_log_folder_path) file_handler = logging.FileHandler( os.path.join(_log_folder_path, name + '.log')) file_handler.setLevel(logging.DEBUG) file_handler.setFormatter(formatter) logger.addHandler(file_handler) # Add a stream handler to log to stdout. stream_handler = logging.StreamHandler() stream_handler.setLevel(logging.DEBUG) stream_handler.setFormatter(formatter) logger.addHandler(stream_handler)
def plot_output(name, infile_path, model_names, filter): """ Reads predictions from csv files and generates plots and output csv. Input csv files should be in the infile_path with following structure: ``infile_path`` / ../any_name/ ../config.csv, test_.csv,train_.csv ../any_name2 ../config.csv, test_.csv,train_.csv The function also exports the data used to generate graphs as csv files the following folder: ../graph_data these csv files can be used to reproduce outputs. Parameters ---------- name : string name of the csv files to which data will be exported infile_path : string the folder which contains csv for configs and test and train model_names : list name of the sub-directories in ``infile_path`` to consider filter : callable a filter which will be applied in config files to filter which configs should be considered. For example, lambda x: x['method'] == 'full' will only consider outputs which used 'full' method """ graphs = { 'SSE': {}, 'MSSE': {}, 'NLPD': {}, 'ER': {}, 'intensity': {}, } graph_n = {} for m in model_names: data_config = PlotOutput.read_config(infile_path + m + '/' + model_logging.CONFIG_FILE_NAME) if filter is None or filter(data_config): data_test = pandas.read_csv(infile_path + m + '/' + model_logging.PREDICTIONS_FILE_NAME) cols = data_test.columns dim = 0 for element in cols: if element.startswith('true_Y'): dim += 1 data_train = pandas.read_csv(infile_path + m + '/' + model_logging.TRAINING_FILE_NAME) Y_mean = data_train['Y_0'].mean() Ypred = np.array([data_test['predicted_Y_%d' % (d)] for d in range(dim)]) Ytrue = np.array([data_test['true_Y_%d' % (d)] for d in range(dim)]) Yvar = np.array([data_test['predicted_variance_%d' % (d)] for d in range(dim)]) if not (PlotOutput.config_to_str(data_config) in graph_n.keys()): graph_n[PlotOutput.config_to_str(data_config)] = 0 graph_n[PlotOutput.config_to_str(data_config)] += 1 if data_config['ll'] in [CogLL.__name__]: for i in range(Ytrue.shape[0]): Y_mean = data_train['Y_' + str(i)].mean() PlotOutput.add_to_list(graphs['MSSE'], PlotOutput.config_to_str(data_config) + '_' + str(i), ((Ypred[i] - Ytrue[i])**2).mean() / ((Y_mean - Ytrue[i]) ** 2).mean()) NLPD = np.array(data_test['NLPD_' + str(i)]) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config) + '_' + str(i), NLPD) if data_config['ll'] in [UnivariateGaussian.__name__, WarpLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['SSE'], PlotOutput.config_to_str(data_config), (Ypred[0] - Ytrue[0])**2 / ((Y_mean - Ytrue[0]) **2).mean()) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogisticLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array([(((Ypred[0] > 0.5) & (Ytrue[0] == -1)) | ((Ypred[0] < 0.5) & (Ytrue[0] == 1)) ).mean()])) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [SoftmaxLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array( [(np.argmax(Ytrue, axis=0) != np.argmax(Ypred, axis=0)).mean()])) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogGaussianCox.__name__]: X0 = np.array([data_test['X_0']]) PlotOutput.add_to_list(graphs['intensity'], PlotOutput.config_to_str(data_config), np.array([X0[0,:]/365+1851.2026, Ypred[0, :], Yvar[0, :], Ytrue[0, :]]).T) for n, g in graphs.iteritems(): if g: ion() for k in g.keys(): if k in graph_n.keys(): print k, 'n: ', graph_n[k] if n in ['SSE', 'NLPD']: g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()])) ax = g.plot(kind='box', title=n) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) if n in ['ER', 'MSSE']: g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()])) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) m = g.mean() errors = g.std() ax =m.plot(kind='bar', yerr=errors, title=n) patches, labels = ax.get_legend_handles_labels() ax.legend(patches, labels, loc='lower center') if n in ['intensity']: X = g.values()[0][:, 0] true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]}) true_data.to_csv('../graph_data/' + name + '_' + 'true_y' + '_data.csv', index=False) plt.figure() color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] c = 0 check_dir_exists('../graph_data/') graph_data = DataFrame() for k,v in g.iteritems(): # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k) # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c]) graph_data = graph_data.append(DataFrame({'x': X, 'm' : v[:, 1], 'v' :v[:, 2], 'model_sp' :[k] * X.shape[0]} )) c += 1 plt.legend(loc='upper center') graph_data.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) show(block=True)
def plot_output(name, infile_path, model_names, filter): """ Reads predictions from csv files and generates plots and output csv. Input csv files should be in the infile_path with following structure: ``infile_path`` / ../any_name/ ../config.csv, test_.csv,train_.csv ../any_name2 ../config.csv, test_.csv,train_.csv The function also exports the data used to generate graphs as csv files the following folder: ../graph_data these csv files can be used to reproduce outputs. Parameters ---------- name : string name of the csv files to which data will be exported infile_path : string the folder which contains csv for configs and test and train model_names : list name of the sub-directories in ``infile_path`` to consider filter : callable a filter which will be applied in config files to filter which configs should be considered. For example, lambda x: x['method'] == 'full' will only consider outputs which used 'full' method """ graphs = { 'SSE': {}, 'MSSE': {}, 'NLPD': {}, 'ER': {}, 'intensity': {}, } graph_n = {} for m in model_names: data_config = PlotOutput.read_config( infile_path + m + '/' + model_logging.CONFIG_FILE_NAME) if filter is None or filter(data_config): data_test = pandas.read_csv( infile_path + m + '/' + model_logging.PREDICTIONS_FILE_NAME) cols = data_test.columns dim = 0 for element in cols: if element.startswith('true_Y'): dim += 1 data_train = pandas.read_csv(infile_path + m + '/' + model_logging.TRAINING_FILE_NAME) Y_mean = data_train['Y_0'].mean() Ypred = np.array( [data_test['predicted_Y_%d' % (d)] for d in range(dim)]) Ytrue = np.array( [data_test['true_Y_%d' % (d)] for d in range(dim)]) Yvar = np.array([ data_test['predicted_variance_%d' % (d)] for d in range(dim) ]) if not (PlotOutput.config_to_str(data_config) in graph_n.keys()): graph_n[PlotOutput.config_to_str(data_config)] = 0 graph_n[PlotOutput.config_to_str(data_config)] += 1 if data_config['ll'] in [CogLL.__name__]: for i in range(Ytrue.shape[0]): Y_mean = data_train['Y_' + str(i)].mean() PlotOutput.add_to_list( graphs['MSSE'], PlotOutput.config_to_str(data_config) + '_' + str(i), ((Ypred[i] - Ytrue[i])**2).mean() / ((Y_mean - Ytrue[i])**2).mean()) NLPD = np.array(data_test['NLPD_' + str(i)]) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config) + '_' + str(i), NLPD) if data_config['ll'] in [ UnivariateGaussian.__name__, WarpLL.__name__ ]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['SSE'], PlotOutput.config_to_str(data_config), (Ypred[0] - Ytrue[0])**2 / ((Y_mean - Ytrue[0])**2).mean()) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogisticLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['ER'], PlotOutput.config_to_str(data_config), np.array([ (((Ypred[0] > 0.5) & (Ytrue[0] == -1)) | ((Ypred[0] < 0.5) & (Ytrue[0] == 1))).mean() ])) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [SoftmaxLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['ER'], PlotOutput.config_to_str(data_config), np.array([(np.argmax(Ytrue, axis=0) != np.argmax( Ypred, axis=0)).mean()])) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogGaussianCox.__name__]: X0 = np.array([data_test['X_0']]) PlotOutput.add_to_list( graphs['intensity'], PlotOutput.config_to_str(data_config), np.array([ X0[0, :] / 365 + 1851.2026, Ypred[0, :], Yvar[0, :], Ytrue[0, :] ]).T) for n, g in graphs.iteritems(): if g: ion() for k in g.keys(): if k in graph_n.keys(): print k, 'n: ', graph_n[k] if n in ['SSE', 'NLPD']: g = DataFrame( dict([(k, Series(v)) for k, v in g.iteritems()])) ax = g.plot(kind='box', title=n) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) if n in ['ER', 'MSSE']: g = DataFrame( dict([(k, Series(v)) for k, v in g.iteritems()])) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) m = g.mean() errors = g.std() ax = m.plot(kind='bar', yerr=errors, title=n) patches, labels = ax.get_legend_handles_labels() ax.legend(patches, labels, loc='lower center') if n in ['intensity']: X = g.values()[0][:, 0] true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]}) true_data.to_csv('../graph_data/' + name + '_' + 'true_y' + '_data.csv', index=False) plt.figure() color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] c = 0 check_dir_exists('../graph_data/') graph_data = DataFrame() for k, v in g.iteritems(): # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k) # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c]) graph_data = graph_data.append( DataFrame({ 'x': X, 'm': v[:, 1], 'v': v[:, 2], 'model_sp': [k] * X.shape[0] })) c += 1 plt.legend(loc='upper center') graph_data.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) show(block=True)
def export_test(name, X, Ytrue, Ypred, Yvar_pred, nlpd, pred_names=[''], export_X=False): """ Exports test data and the predictions into a csv file Parameters ---------- name : string name of the file X : ndarray X test for which prediction have been made Ytrue : ndarray The true values of 'Y' Ypred : ndarray Predictions at the test points Yvar_pred : ndarray Variance of the prediction nlpd : ndarray NLPD of the predictions pred_names : list not necessary. It should be [''] export_X : boolean Whether to export 'X' to the csv file. If False, 'X' will not be exported into the csv file (useful in large datasets). """ path = ModelLearn.get_output_path() + name + '/' check_dir_exists(path) file_name = 'test_' out = [] out.append(Ytrue) out += Ypred out += Yvar_pred out += [nlpd] header = ['Ytrue%d,' % (j) for j in range(Ytrue.shape[1])] + \ ['Ypred_%s_%d,' % (m, j) for m in pred_names for j in range(Ypred[0].shape[1])] + \ ['Yvar_pred_%s_%d,' % (m, j) for m in pred_names for j in range(Yvar_pred[0].shape[1])] + \ ['nlpd,'] + ['NLPD_%d,' % (j) for j in range(nlpd.shape[1] - 1)] if export_X: out.append(X) header += ['X%d,' % (j) for j in range(X.shape[1])] header = ''.join(header) out = np.hstack(out) np.savetxt(path + file_name + '.csv', out, header=header, delimiter=',', comments='')