def testNumpyParseDataInputs(self): failed = False try: parse_data([[0, 1], [2, 3]]) except TypeError as e: # We want an error here! failed = True assert failed
def scatterplot_files(targetfile, targetcol, eventcol, modelfile, modeloutputcol, **kwargs): ''' scatterplot_files(targetfile, targetcol, eventcol, modelfile, modeloutputcol) Takes two files because the target data and model data is allowed to be in different files. Events are ONLY taken from target data. Writes two files: scatter_cens_targetfile_modelfile.eps scatter_nocens_targetfile_modelfile.eps ''' #Calculate suitable size for the figure for use in LaTEX fig_width_pt = 396.0 # Get this from LaTeX using \showthe\columnwidth inches_per_pt = 1.0 / 72.27 # Convert pt to inch golden_mean = (sqrt(5) - 1.0) / 2.0 # Aesthetic ratio fig_width = fig_width_pt * inches_per_pt # width in inches fig_height = fig_width * golden_mean # height in inches fig_size = [fig_width, fig_height] #Update settings plt.rcParams['figure.figsize'] = fig_size #params = {'axes.labelsize': 10, # 'text.fontsize': 10, # 'legend.fontsize': 10, # 'xtick.labelsize': 8, # 'ytick.labelsize': 8, #'text.usetex': True, # 'figure.figsize': fig_size} #plt.rcParams.update(params) # with open(targetfile, 'r') as f: # X_in = [line.split() for line in f.readlines()] # X_in = numpy.array(X_in) # X = X_in[1:, first_col] # X = numpy.array(X, dtype = 'float') data = np.array(read_data_file(targetfile, ",")) T, t = parse_data(data, inputcols=(targetcol, eventcol), ignorerows=[0], normalize=False) X = T[:, 0] events = T[:, 1] # with open(modeloutputcol, 'r') as f: # Y_in = [line.split() for line in f.readlines()] # # Y_in = numpy.array(Y_in) # Y = Y_in[1:, second_col] # Y = numpy.array(Y, dtype = 'float') data = np.array(read_data_file(modelfile, ",")) D, t = parse_data(data, inputcols=[modeloutputcol], ignorerows=[0], normalize=False) Y = D[:, 0] # if event_col is not None: # events = X_in[1:, event_col] # events = numpy.array(events, dtype = 'float') # print 'Using events' # else: # events = None # T = numpy.empty((len(X), 2), dtype='float') # T[:, 0] = X # T[:, 1] = events outputs = np.empty((len(X), 2), dtype='float') outputs[:, 0] = Y outputs[:, 1] = events c_index = get_C_index(T, outputs) print("C-Index between these files is: {0}".format(c_index)) scatter(X, Y, events=events, x_label='Targets', y_label='Model output', gridsize=30, mincnt=0, show_plot=False) #plt.xlabel(os.path.basename(sys.argv[1]) + "\nC-Index between these files is: {0}".format(c_index)) #plt.ylabel('Correlation of ' + os.path.basename(sys.argv[2])) plt.savefig('scatter_cens_cind_{cindex}_{0}_{1}.eps'.format( os.path.splitext(os.path.basename(modelfile))[0], os.path.splitext(os.path.basename(targetfile))[0], cindex=c_index)) scatter(X, Y, x_label='Targets', y_label='Model output', gridsize=30, mincnt=0, show_plot=False) #plt.xlabel(os.path.basename(sys.argv[1]) + "\nC-Index between these files is: {0}".format(c_index)) #plt.ylabel('Correlation of ' + os.path.basename(sys.argv[2])) plt.savefig('scatter_nocens_{cindex}_{0}_{1}.eps'.format( os.path.splitext(os.path.basename(modelfile))[0], os.path.splitext(os.path.basename(targetfile))[0], cindex=c_index))
def scatterplot_files(targetfile, targetcol, eventcol, modelfile, modeloutputcol, **kwargs): ''' scatterplot_files(targetfile, targetcol, eventcol, modelfile, modeloutputcol) Takes two files because the target data and model data is allowed to be in different files. Events are ONLY taken from target data. Writes two files: scatter_cens_targetfile_modelfile.eps scatter_nocens_targetfile_modelfile.eps ''' #Calculate suitable size for the figure for use in LaTEX fig_width_pt = 396.0 # Get this from LaTeX using \showthe\columnwidth inches_per_pt = 1.0/72.27 # Convert pt to inch golden_mean = (sqrt(5)-1.0)/2.0 # Aesthetic ratio fig_width = fig_width_pt*inches_per_pt # width in inches fig_height = fig_width*golden_mean # height in inches fig_size = [fig_width,fig_height] #Update settings plt.rcParams['figure.figsize'] = fig_size #params = {'axes.labelsize': 10, # 'text.fontsize': 10, # 'legend.fontsize': 10, # 'xtick.labelsize': 8, # 'ytick.labelsize': 8, #'text.usetex': True, # 'figure.figsize': fig_size} #plt.rcParams.update(params) # with open(targetfile, 'r') as f: # X_in = [line.split() for line in f.readlines()] # X_in = numpy.array(X_in) # X = X_in[1:, first_col] # X = numpy.array(X, dtype = 'float') data = np.array(read_data_file(targetfile, ",")) T, t = parse_data(data, inputcols = (targetcol, eventcol), ignorerows = [0], normalize = False) X = T[:, 0] events = T[:, 1] # with open(modeloutputcol, 'r') as f: # Y_in = [line.split() for line in f.readlines()] # # Y_in = numpy.array(Y_in) # Y = Y_in[1:, second_col] # Y = numpy.array(Y, dtype = 'float') data = np.array(read_data_file(modelfile, ",")) D, t = parse_data(data, inputcols = [modeloutputcol], ignorerows = [0], normalize = False) Y = D[:, 0] # if event_col is not None: # events = X_in[1:, event_col] # events = numpy.array(events, dtype = 'float') # print 'Using events' # else: # events = None # T = numpy.empty((len(X), 2), dtype='float') # T[:, 0] = X # T[:, 1] = events outputs = np.empty((len(X), 2), dtype='float') outputs[:, 0 ] = Y outputs[:, 1] = events c_index = get_C_index(T, outputs) print("C-Index between these files is: {0}".format(c_index)) scatter(X, Y, events = events, x_label = 'Targets', y_label = 'Model output', gridsize = 30, mincnt = 0, show_plot = False) #plt.xlabel(os.path.basename(sys.argv[1]) + "\nC-Index between these files is: {0}".format(c_index)) #plt.ylabel('Correlation of ' + os.path.basename(sys.argv[2])) plt.savefig('scatter_cens_cind_{cindex}_{0}_{1}.eps'.format(os.path.splitext(os.path.basename(modelfile))[0], os.path.splitext(os.path.basename(targetfile))[0], cindex=c_index)) scatter(X, Y, x_label = 'Targets', y_label = 'Model output', gridsize = 30, mincnt = 0, show_plot = False) #plt.xlabel(os.path.basename(sys.argv[1]) + "\nC-Index between these files is: {0}".format(c_index)) #plt.ylabel('Correlation of ' + os.path.basename(sys.argv[2])) plt.savefig('scatter_nocens_{cindex}_{0}_{1}.eps'.format(os.path.splitext(os.path.basename(modelfile))[0], os.path.splitext(os.path.basename(targetfile))[0], cindex=c_index))
cb.set_label('log10(N)') if plotSlope: ax.plot(sorted_x_y[:, 0], slope*sorted_x_y[:, 0] + cut, 'r-') #Print slope #ax.scatter(sorted_x_y[:, 0], sorted_x_y[:, 1], c='g') ax.set_xlabel(x_label) ax.set_ylabel(y_label) ax.set_title(title) if axWasNone and show_plot: show() if __name__ == '__main__': import sys if len(sys.argv) < 2: #filename = "/home/gibson/jonask/Projects/Kaplan-Meier/genetic.csv" #filename = "/home/gibson/jonask/Projects/Kaplan-Meier/censored_3node.csv" #filename = "/home/gibson/jonask/Projects/Experiments/src/cox_com_3tanh_output" filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/publication_data/ann/" + \ ".test_.15_tanh_1323233958_Two_thirds_of_the_n4369_dataset_with_logs_lymf_5YEAR.cvs" else: filename = sys.argv[1] data = np.array(read_data_file(filename, "\t")) D, t = parse_data(data, inputcols = (0, 1, 2), ignorerows = [0], normalize = False) kaplanmeier(D, 0, 2, 1, show_plot = False) scatter(D[:, 0], D[:, 1], D[:, 2], x_label = 'Target Data', y_label = 'Model Correlation', show_plot = False) scatter(D[:, 0], D[:, 1], x_label = 'Target Data', y_label = 'Model Output')