def test_model_arrays(savefile, filename, P, T, **kwargs): with open(savefile, 'r') as FILE: master_com = pickle.load(FILE) print("Committee size: {0}".format(len(master_com))) output_file = 'test_{0}_{1}.cvs'.format(os.path.splitext(os.path.basename(savefile))[0], \ os.path.splitext(os.path.basename(filename))[0]) #Need double brackets for dimensions to be right for numpy outputs = numpy.array([[master_com.risk_eval(inputs)] for inputs in P]) if T is None or len(T) == 0: with open(output_file, 'w') as F: #print('Targets\tOutputs\tEvents:') F.write("Outputs\n") for o in outputs: #print("{0}\t{1}\t{2}".format(t[0], o[0], t[1])) F.write("{0}\n".format(o[0])) return outputs c_index = get_C_index(T, outputs) print("C-Index: {0}".format(c_index)) #if len(sys.argv) > 2: # thresholds = [float(t) for t in sys.argv[2:]] #else: thresholds = None #Calculate suitable size for the figure for use in LaTEX fig_width_pt = 396.0 # Get this from LaTeX using \showthe\columnwidth inches_per_pt = 1.0 / 72.27 # Convert pt to inch golden_mean = (sqrt(5) - 1.0) / 2.0 # Aesthetic ratio fig_width = fig_width_pt * inches_per_pt # width in inches fig_height = fig_width * golden_mean # height in inches fig_size = [fig_width, fig_height] #Update settings plt.rcParams['figure.figsize'] = fig_size th = kaplanmeier(time_array=T[:, 0], event_array=T[:, 1], output_array=outputs, threshold=thresholds, show_plot=False, bestcut=False, **kwargs) #print("Threshold dividing the set in two equal pieces: " + str(th)) if plt: plt.savefig('kaplanmeier_{0}_{1}.eps'.format(os.path.splitext(os.path.basename(savefile))[0], \ os.path.splitext(os.path.basename(filename))[0])) with open(output_file, 'w') as F: #print('Targets\tOutputs\tEvents:') F.write("Targets,Outputs,Events\n") for t, o in zip(T, outputs): #print("{0}\t{1}\t{2}".format(t[0], o[0], t[1])) F.write("{0},{1},{2}\n".format(t[0], o[0], t[1])) return output_file
def experiment(net, P, T, vP, vT, filename, epochs, learning_rate): logger.info("Running experiment for: " + filename + ' ' + str(epochs) + ", rate: " + str(learning_rate)) print("Number of patients with events: " + str(T[:, 1].sum())) print("Number of censored patients: " + str((1 - T[:, 1]).sum())) timeslots = generate_timeslots(T) try: net = traingd(net, (P, T), (vP, vT), epochs, learning_rate, block_size = 100, error_module = cox_error) except FloatingPointError: print('Aaawww....') outputs = net.sim(P) c_index = get_C_index(T, outputs) logger.info("C index = " + str(c_index)) #plot_network_weights(net) kaplanmeier(time_array = T[:, 0], event_array = T[:, 1], output_array = outputs[:, 0]) if vP is not None and len(vP) > 0: outputs = net.sim(vP) kaplanmeier(time_array = vT[:, 0], event_array = vT[:, 1], output_array = outputs[:, 0]) return net
def survival_stat(filename, thresholds = None): data = np.array(read_data_file(filename, ",")) D, t = parse_data(data, inputcols = (2, 3, 4, 5, 6, 7, 8, 9, 10), ignorerows = [0], normalize = False) T = D[:, (2, 3)] outputs = D[:, (-1, 3)] C = get_C_index(T, outputs) print("C-index: " + str(C)) print("Genetic error: " + str(1 / C)) th = kaplanmeier(D, 2, 3, -1, threshold = thresholds) print("Threshold dividing the set in two equal pieces: " + str(th)) if plt: plt.show()
def committee_test(): try: netsize = input('Number of hidden nodes? [1]: ') except SyntaxError as e: netsize = 1 try: comsize = input('Committee size? [1]: ') except SyntaxError as e: comsize = 1 try: pop_size = input('Population size? [100]: ') except SyntaxError as e: pop_size = 100 try: mutation_rate = input('Please input a mutation rate (0.05): ') except SyntaxError as e: mutation_rate = 0.05 filename = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt" try: columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n") except SyntaxError: columns = (2, -4, -3, -2, -1) P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) #remove tail censored try: cutoff = input('Cutoff for censored data? [9999 years]: ') except SyntaxError as e: cutoff = 9999 P, T = copy_without_censored(P, T, cutoff) #Divide into validation sets try: test_size = float(input('Size of test set (not used in training)? Input in fractions. Default is [0.0]: ')) except: test_size = 0.0 ((TP, TT), (VP, VT)) = get_validation_set(P, T, validation_size = test_size, binary_column = 1) print("Length of training set: " + str(len(TP))) print("Length of test set: " + str(len(VP))) try: epochs = input("\nNumber of generations (1): ") except SyntaxError as e: epochs = 1 com = build_feedforward_committee(comsize, len(P[0]), netsize, 1, output_function = 'linear') #1 is the column in the target array which holds the binary censoring information test_errors, vald_errors, data_sets = train_committee(com, train_evolutionary, P, T, 1, epochs, error_function = c_index_error, population_size = pop_size, mutation_chance = mutation_rate) com.set_training_sets([set[0][0] for set in data_sets]) #first 0 gives training sets, second 0 gives inputs. print('\nTest C_indices, Validation C_indices:') for terr, verr in zip(test_errors.values(), vald_errors.values()): print(str(1 / terr) + ", " + str(1 / verr)) if plt: outputs = numpy.array([[com.risk_eval(inputs)] for inputs in TP]) #Need double brackets for dimensions to be right for numpy kaplanmeier(time_array = TT[:, 0], event_array = TT[:, 1], output_array = outputs[:, 0], threshold = 0.5) train_c_index = get_C_index(TT, outputs) print("\nC-index on the training set: " + str(train_c_index)) if len(VP) > 0: outputs = numpy.array([[com.risk_eval(inputs)] for inputs in VP]) #Need double brackets for dimensions to be right for numpy test_c_index = get_C_index(VT, outputs) kaplanmeier(time_array = VT[:, 0], event_array = VT[:, 1], output_array = outputs[:, 0], threshold = 0.5) print("C-index on the test set: " + str(test_c_index)) #raw_input("\nPress enter to show plots...") plt.show() try: answer = input("\nDo you wish to print committee risk output? ['n']: ") except (SyntaxError, NameError): answer = 'n' if answer != 'n' and answer != 'no': inputs = read_data_file(filename) P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) outputs = [[com.risk_eval(patient)] for patient in P] while len(inputs) > len(outputs): outputs.insert(0, ["net_output"]) print("\n") for rawline in zip(inputs, outputs): line = '' for col in rawline[0]: line += str(col) line += ',' for col in rawline[1]: line += str(col) print(line)
def plotKM(targets, outputs, cut): kaplanmeier(time_array=targets[:,0], event_array=targets[:, 1], output_array=outputs, threshold=cut, show_plot=False)
def test_model_arrays(savefile, filename, P, T, **kwargs): with open(savefile, "r") as FILE: master_com = pickle.load(FILE) print("Committee size: {0}".format(len(master_com))) output_file = "test_{0}_{1}.cvs".format( os.path.splitext(os.path.basename(savefile))[0], os.path.splitext(os.path.basename(filename))[0] ) # Need double brackets for dimensions to be right for numpy outputs = numpy.array([[master_com.risk_eval(inputs)] for inputs in P]) if T is None or len(T) == 0: with open(output_file, "w") as F: # print('Targets\tOutputs\tEvents:') F.write("Outputs\n") for o in outputs: # print("{0}\t{1}\t{2}".format(t[0], o[0], t[1])) F.write("{0}\n".format(o[0])) return outputs c_index = get_C_index(T, outputs) print("C-Index: {0}".format(c_index)) # if len(sys.argv) > 2: # thresholds = [float(t) for t in sys.argv[2:]] # else: thresholds = None # Calculate suitable size for the figure for use in LaTEX fig_width_pt = 396.0 # Get this from LaTeX using \showthe\columnwidth inches_per_pt = 1.0 / 72.27 # Convert pt to inch golden_mean = (sqrt(5) - 1.0) / 2.0 # Aesthetic ratio fig_width = fig_width_pt * inches_per_pt # width in inches fig_height = fig_width * golden_mean # height in inches fig_size = [fig_width, fig_height] # Update settings plt.rcParams["figure.figsize"] = fig_size th = kaplanmeier( time_array=T[:, 0], event_array=T[:, 1], output_array=outputs, threshold=thresholds, show_plot=False, bestcut=False, **kwargs ) # print("Threshold dividing the set in two equal pieces: " + str(th)) if plt: plt.savefig( "kaplanmeier_{0}_{1}.eps".format( os.path.splitext(os.path.basename(savefile))[0], os.path.splitext(os.path.basename(filename))[0] ) ) with open(output_file, "w") as F: # print('Targets\tOutputs\tEvents:') F.write("Targets,Outputs,Events\n") for t, o in zip(T, outputs): # print("{0}\t{1}\t{2}".format(t[0], o[0], t[1])) F.write("{0},{1},{2}\n".format(t[0], o[0], t[1])) return output_file
def plotKM(targets, outputs, cut): kaplanmeier(time_array=targets[:, 0], event_array=targets[:, 1], output_array=outputs, threshold=cut, show_plot=False)
def train_single(): try: netsize = input('Number of hidden nodes? [1]: ') except SyntaxError as e: netsize = 1 try: pop_size = input('Population size? [100]: ') except SyntaxError as e: pop_size = 100 try: mutation_rate = input('Please input a mutation rate (0.05): ') except SyntaxError as e: mutation_rate = 0.05 SB22 = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB22.txt" Benmargskohorten = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_Benmargskohorten.txt" SB91b = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset_SB91b.txt" all_studies = "/home/gibson/jonask/Dropbox/Ann-Survival-Phd/Two_thirds_of_SA_1889_dataset.txt" #Real data print("Studies to choose from:") print("1: SB22") print("2: Benmargskohorten") print("3: SB91b") print("0: All combined (default)") try: study = input("Which study to train on? [0]: ") except SyntaxError as e: study = 0 if study == 1: filename = SB22 elif study == 2: filename = Benmargskohorten elif study == 3: filename = SB91b else: filename = all_studies try: columns = input("Which columns to include? (Do NOT forget trailing comma if only one column is used, e.g. '3,'\nAvailable columns are: 2, -4, -3, -2, -1. Just press ENTER for all columns.\n") except SyntaxError: columns = (2, -4, -3, -2, -1) #P, T = parse_file(filename, targetcols = [4, 5], inputcols = [2, -4, -3, -2, -1], ignorerows = [0], normalize = True) P, T = parse_file(filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) #Used for output comparison studies = {} studies[SB22] = parse_file(SB22, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) studies[Benmargskohorten] = parse_file(Benmargskohorten, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) studies[SB91b] = parse_file(SB91b, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) studies[all_studies] = parse_file(all_studies, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True) #remove tail censored try: cutoff = input('Cutoff for censored data? [9999 years]: ') except SyntaxError as e: cutoff = 9999 P, T = copy_without_censored(P, T, cutoff) #Divide into validation sets try: pieces = input('Size of validation set? Input denominator (1 for no validation set). Default is 1/[1] parts: ') except: pieces = 1 TandV = get_cross_validation_sets(P, T, pieces , binary_column = 1) #Network part p = len(P[0]) #number of input covariates net = build_feedforward(p, netsize, 1, output_function = 'linear') #net = build_feedforward_multilayered(p, [7, 10], 1, output_function = 'linear') #Initial state #outputs = net.sim(tP) #orderscatter(outputs, tT, filename, 's') try: epochs = input("Number of generations (1): ") except SyntaxError as e: epochs = 1 for ((tP, tT), (vP, vT)) in TandV: #train net = test(net, tP, tT, vP, vT, filename, epochs, population_size = pop_size, mutation_rate = mutation_rate) if plt: outputs = net.sim(tP) threshold = kaplanmeier(time_array = tT[:, 0], event_array = tT[:, 1], output_array = outputs[:, 0]) if len(vP) > 0: outputs = net.sim(vP) kaplanmeier(time_array = vT[:, 0], event_array = vT[:, 1], output_array = outputs[:, 0], threshold = threshold) print("\nThreshold dividing the training set in two equal pieces: " + str(threshold)) raw_input("\nPress enter to show plots...") plt.show() try: answer = input("Do you wish to print network output? Enter filename, or 'no' / 'n'. ['n']: ") except (SyntaxError, NameError): answer = 'n' if os.path.exists(answer): print("File exists. Will add random number to front") answer = str(random.randint(0, 123456)) + answer if answer != 'n' and answer != 'no': print_output(answer, net, filename, targetcols = [4, 5], inputcols = columns, ignorerows = [0], normalize = True)