slices = [(begin + duration * step / steps, begin + duration * (step + 2) / steps) for step in range(0, steps - 2)] slicedDates = [begin + (begin - end) / 2 for (begin, end) in slices ] # re-center date in middle of avg window idx = pandas.to_datetime(slicedDates, unit='s', utc=True) df1 = DataFrame( { 'inside': mean_std(blairInside, slices)[0], 'outside': mean_std(blairOutside, slices)[0] }, index=idx, columns=['inside', 'outside']) df1.plot(kind='line') plt.gca().xaxis.set_major_formatter( matplotlib.dates.DateFormatter('%H:%M', tz=timezone("America/New_York"))) def fill_error_tube(b, color): (mean, error) = mean_std(b, slices) plt.fill_between(df1.index, mean - error, mean + error, color=color) fill_error_tube(blairInside, [0.5, 0.5, 0.5, 0.5]) fill_error_tube(blairOutside, [0.5, 0.5, 0.5, 0.5]) plt.ylabel("Temperatur [Celsius]", fontsize=20) plt.xlabel("Time [Hours]", fontsize=20)
filt = lambda b: b[np.logical_and(b['time'] != 0, b['sid'] == 2)] blairOutside = filt(blairOutsideAll) blairInside = filt(blairInsideAll) begin = blairInside['time'][0] end = blairInside['time'][-1] duration = end - begin steps = 300 slices = [(begin + duration * step / steps, begin + duration * (step + 2) / steps) for step in range(0, steps-2)] slicedDates = [begin + (begin - end) / 2 for ( begin, end) in slices] # re-center date in middle of avg window idx = pandas.to_datetime(slicedDates, unit='s', utc=True) df1 = DataFrame({'inside': mean_std(blairInside, slices)[0], 'outside': mean_std(blairOutside, slices)[0]} , index=idx, columns=['inside', 'outside']) df1.plot(kind='line') plt.gca().xaxis.set_major_formatter(matplotlib.dates.DateFormatter('%H:%M', tz=timezone("America/New_York"))) def fill_error_tube(b, color): (mean, error) = mean_std(b, slices) plt.fill_between(df1.index, mean - error, mean + error, color=color) fill_error_tube(blairInside, [0.5, 0.5, 0.5, 0.5]) fill_error_tube(blairOutside, [0.5, 0.5, 0.5, 0.5]) plt.ylabel("Temperatur [Celsius]", fontsize=20) plt.xlabel("Time [Hours]", fontsize=20) #plt.show()
print("feature-column.py metric=" + args.metric + " out=" + args.out) def read_ssv(fname): lines = [line.split() for line in open(fname, 'r')] if args.format.lower() == 'galago_eval': return lines elif args.format.lower() == 'trec_eval': return [[line[1], line[0]] + line[2:] for line in lines] namestsv = read_ssv(args.names) namesDict = {row[0]: row[2][8:] for row in namestsv} for run in args.runs: tsv = read_ssv(run) values = [float(row[2]) for row in tsv if row[0] in namesDict] tsv = read_ssv(run) labels = [namesDict[row[0]] for row in tsv if row[0] in namesDict] df2 = DataFrame(values, index=labels, columns=[os.path.basename(run)]) plt.figure() df2.plot(kind='bar', color=['1.0', '0.70', '0.0', '0.50']) plt.ylabel(args.metric, fontsize=20) plt.tick_params(axis='both', which='major', labelsize=10) plt.xticks(rotation=90) plt.savefig(args.out + os.path.basename(run) + '.pdf', bbox_inches='tight') # plt.show()
def plot_output(name, infile_path, model_names, filter): """ Reads predictions from csv files and generates plots and output csv. Input csv files should be in the infile_path with following structure: ``infile_path`` / ../any_name/ ../config.csv, test_.csv,train_.csv ../any_name2 ../config.csv, test_.csv,train_.csv The function also exports the data used to generate graphs as csv files the following folder: ../graph_data these csv files can be used to reproduce outputs. Parameters ---------- name : string name of the csv files to which data will be exported infile_path : string the folder which contains csv for configs and test and train model_names : list name of the sub-directories in ``infile_path`` to consider filter : callable a filter which will be applied in config files to filter which configs should be considered. For example, lambda x: x['method'] == 'full' will only consider outputs which used 'full' method """ graphs = { 'SSE': {}, 'MSSE': {}, 'NLPD': {}, 'ER': {}, 'intensity': {}, } graph_n = {} for m in model_names: data_config = PlotOutput.read_config( infile_path + m + '/' + model_logging.CONFIG_FILE_NAME) if filter is None or filter(data_config): data_test = pandas.read_csv( infile_path + m + '/' + model_logging.PREDICTIONS_FILE_NAME) cols = data_test.columns dim = 0 for element in cols: if element.startswith('true_Y'): dim += 1 data_train = pandas.read_csv(infile_path + m + '/' + model_logging.TRAINING_FILE_NAME) Y_mean = data_train['Y_0'].mean() Ypred = np.array( [data_test['predicted_Y_%d' % (d)] for d in range(dim)]) Ytrue = np.array( [data_test['true_Y_%d' % (d)] for d in range(dim)]) Yvar = np.array([ data_test['predicted_variance_%d' % (d)] for d in range(dim) ]) if not (PlotOutput.config_to_str(data_config) in graph_n.keys()): graph_n[PlotOutput.config_to_str(data_config)] = 0 graph_n[PlotOutput.config_to_str(data_config)] += 1 if data_config['ll'] in [CogLL.__name__]: for i in range(Ytrue.shape[0]): Y_mean = data_train['Y_' + str(i)].mean() PlotOutput.add_to_list( graphs['MSSE'], PlotOutput.config_to_str(data_config) + '_' + str(i), ((Ypred[i] - Ytrue[i])**2).mean() / ((Y_mean - Ytrue[i])**2).mean()) NLPD = np.array(data_test['NLPD_' + str(i)]) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config) + '_' + str(i), NLPD) if data_config['ll'] in [ UnivariateGaussian.__name__, WarpLL.__name__ ]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['SSE'], PlotOutput.config_to_str(data_config), (Ypred[0] - Ytrue[0])**2 / ((Y_mean - Ytrue[0])**2).mean()) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogisticLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['ER'], PlotOutput.config_to_str(data_config), np.array([ (((Ypred[0] > 0.5) & (Ytrue[0] == -1)) | ((Ypred[0] < 0.5) & (Ytrue[0] == 1))).mean() ])) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [SoftmaxLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list( graphs['ER'], PlotOutput.config_to_str(data_config), np.array([(np.argmax(Ytrue, axis=0) != np.argmax( Ypred, axis=0)).mean()])) PlotOutput.add_to_list( graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogGaussianCox.__name__]: X0 = np.array([data_test['X_0']]) PlotOutput.add_to_list( graphs['intensity'], PlotOutput.config_to_str(data_config), np.array([ X0[0, :] / 365 + 1851.2026, Ypred[0, :], Yvar[0, :], Ytrue[0, :] ]).T) for n, g in graphs.iteritems(): if g: ion() for k in g.keys(): if k in graph_n.keys(): print k, 'n: ', graph_n[k] if n in ['SSE', 'NLPD']: g = DataFrame( dict([(k, Series(v)) for k, v in g.iteritems()])) ax = g.plot(kind='box', title=n) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) if n in ['ER', 'MSSE']: g = DataFrame( dict([(k, Series(v)) for k, v in g.iteritems()])) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) m = g.mean() errors = g.std() ax = m.plot(kind='bar', yerr=errors, title=n) patches, labels = ax.get_legend_handles_labels() ax.legend(patches, labels, loc='lower center') if n in ['intensity']: X = g.values()[0][:, 0] true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]}) true_data.to_csv('../graph_data/' + name + '_' + 'true_y' + '_data.csv', index=False) plt.figure() color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] c = 0 check_dir_exists('../graph_data/') graph_data = DataFrame() for k, v in g.iteritems(): # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k) # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c]) graph_data = graph_data.append( DataFrame({ 'x': X, 'm': v[:, 1], 'v': v[:, 2], 'model_sp': [k] * X.shape[0] })) c += 1 plt.legend(loc='upper center') graph_data.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) show(block=True)
def plot_output(name, infile_path, model_names, filter): """ Reads predictions from csv files and generates plots and output csv. Input csv files should be in the infile_path with following structure: ``infile_path`` / ../any_name/ ../config.csv, test_.csv,train_.csv ../any_name2 ../config.csv, test_.csv,train_.csv The function also exports the data used to generate graphs as csv files the following folder: ../graph_data these csv files can be used to reproduce outputs. Parameters ---------- name : string name of the csv files to which data will be exported infile_path : string the folder which contains csv for configs and test and train model_names : list name of the sub-directories in ``infile_path`` to consider filter : callable a filter which will be applied in config files to filter which configs should be considered. For example, lambda x: x['method'] == 'full' will only consider outputs which used 'full' method """ graphs = { 'SSE': {}, 'MSSE': {}, 'NLPD': {}, 'ER': {}, 'intensity': {}, } graph_n = {} for m in model_names: data_config = PlotOutput.read_config(infile_path + m + '/' + model_logging.CONFIG_FILE_NAME) if filter is None or filter(data_config): data_test = pandas.read_csv(infile_path + m + '/' + model_logging.PREDICTIONS_FILE_NAME) cols = data_test.columns dim = 0 for element in cols: if element.startswith('true_Y'): dim += 1 data_train = pandas.read_csv(infile_path + m + '/' + model_logging.TRAINING_FILE_NAME) Y_mean = data_train['Y_0'].mean() Ypred = np.array([data_test['predicted_Y_%d' % (d)] for d in range(dim)]) Ytrue = np.array([data_test['true_Y_%d' % (d)] for d in range(dim)]) Yvar = np.array([data_test['predicted_variance_%d' % (d)] for d in range(dim)]) if not (PlotOutput.config_to_str(data_config) in graph_n.keys()): graph_n[PlotOutput.config_to_str(data_config)] = 0 graph_n[PlotOutput.config_to_str(data_config)] += 1 if data_config['ll'] in [CogLL.__name__]: for i in range(Ytrue.shape[0]): Y_mean = data_train['Y_' + str(i)].mean() PlotOutput.add_to_list(graphs['MSSE'], PlotOutput.config_to_str(data_config) + '_' + str(i), ((Ypred[i] - Ytrue[i])**2).mean() / ((Y_mean - Ytrue[i]) ** 2).mean()) NLPD = np.array(data_test['NLPD_' + str(i)]) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config) + '_' + str(i), NLPD) if data_config['ll'] in [UnivariateGaussian.__name__, WarpLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['SSE'], PlotOutput.config_to_str(data_config), (Ypred[0] - Ytrue[0])**2 / ((Y_mean - Ytrue[0]) **2).mean()) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogisticLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array([(((Ypred[0] > 0.5) & (Ytrue[0] == -1)) | ((Ypred[0] < 0.5) & (Ytrue[0] == 1)) ).mean()])) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [SoftmaxLL.__name__]: NLPD = np.array(data_test['NLPD_0']) PlotOutput.add_to_list(graphs['ER'], PlotOutput.config_to_str(data_config), np.array( [(np.argmax(Ytrue, axis=0) != np.argmax(Ypred, axis=0)).mean()])) PlotOutput.add_to_list(graphs['NLPD'], PlotOutput.config_to_str(data_config), NLPD) if data_config['ll'] in [LogGaussianCox.__name__]: X0 = np.array([data_test['X_0']]) PlotOutput.add_to_list(graphs['intensity'], PlotOutput.config_to_str(data_config), np.array([X0[0,:]/365+1851.2026, Ypred[0, :], Yvar[0, :], Ytrue[0, :]]).T) for n, g in graphs.iteritems(): if g: ion() for k in g.keys(): if k in graph_n.keys(): print k, 'n: ', graph_n[k] if n in ['SSE', 'NLPD']: g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()])) ax = g.plot(kind='box', title=n) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) if n in ['ER', 'MSSE']: g= DataFrame(dict([(k,Series(v)) for k,v in g.iteritems()])) check_dir_exists('../graph_data/') g.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) m = g.mean() errors = g.std() ax =m.plot(kind='bar', yerr=errors, title=n) patches, labels = ax.get_legend_handles_labels() ax.legend(patches, labels, loc='lower center') if n in ['intensity']: X = g.values()[0][:, 0] true_data = DataFrame({'x': X, 'y': g.values()[0][:, 3]}) true_data.to_csv('../graph_data/' + name + '_' + 'true_y' + '_data.csv', index=False) plt.figure() color = ['b', 'g', 'r', 'c', 'm', 'y', 'k', 'w'] c = 0 check_dir_exists('../graph_data/') graph_data = DataFrame() for k,v in g.iteritems(): # plt.plot(X, v[:, 1], hold=True, color=color[c], label=k) # plt.fill_between(X, v[:, 1] - 2 * np.sqrt(v[:, 2]), v[:, 1] + 2 * np.sqrt(v[:, 2]), alpha=0.2, facecolor=color[c]) graph_data = graph_data.append(DataFrame({'x': X, 'm' : v[:, 1], 'v' :v[:, 2], 'model_sp' :[k] * X.shape[0]} )) c += 1 plt.legend(loc='upper center') graph_data.to_csv('../graph_data/' + name + '_' + n + '_data.csv', index=False) show(block=True)