def main(): args = parser.parse_args() graph = get_graph_from_hyperparameters(args.pred) batch_size = 32 ##Load graph epoch_range = map(int, args.epoch.split(',')) #graph.set_weights(initial_weights) for epoch in range(epoch_range[0], epoch_range[1]) : graph.load_weights(HOME_PATH + '/weights' + str(max_ic50) + '/' + args.pred + '/weights' + str(batch_size) + '_' + str(epoch) ) allele_sequence_data, max_allele_length = load_allele_sequence_data(HOME_PATH + '/pan_allele/files/pseudo/pseudo_sequences.fasta') predictions = read_tcell_predictions(HOME_PATH + '/pan_allele/files/iedb-tcell-2009-negative.csv',HOME_PATH + '/pan_allele/files/iedb-tcell-2009-positive.csv') allele_list = sorted(predictions.keys()) allele_list[:] = [x for x in allele_list if not x.startswith('C')] Y_true = [] Y_pred = [] for allele in allele_list: peptides = predictions[allele].keys() for peptide in peptides: if(len(peptide)>7 and len(peptide)<12): #print allele, peptide, predictions[allele][peptide], 20000**(1-make_prediction(peptide, allele_sequence_data[allele], graph)) Y_true.append( predictions[allele][peptide]) Y_pred.append(make_prediction(peptide, allele_sequence_data[allele], graph)) #print "=====", allele, sum(Y_true), len(Y_true), "====" score = scores(Y_true, Y_pred) print epoch, ','.join(map(str,score[1:]))
def main(): args = parser.parse_args() max_ic50 = args.max_ic50 #IEDB data allele_groups, df = load_binding_data(BINDING_DATA_PATH, max_ic50=max_ic50, peptide_length=9) #graph initialized here so that pseudo sequences are made accordingly graph = get_graph_from_hyperparameters(args.pred) #allele sequence data allele_sequence_data, max_allele_length = load_allele_sequence_data(SEQUENCE_DATA_PATH) allele_list = sorted(create_allele_list(allele_groups, allele_sequence_data)) #reading blind data from txt file that contains aggregated data for all alleles blind_allele_groups, blind_df = load_binding_data('blind_data.txt', max_ic50=max_ic50, peptide_length=None) blind_allele_list = sorted(create_allele_list(blind_allele_groups, allele_sequence_data)) nb_iter = 50 #number of networks to include in the ensemble preds_allele = defaultdict(list) actual_allele = defaultdict(list) for i in range(0,nb_iter): #get_model_data shuffles the data so theres no need for further shuffling peptides, mhc, Y = get_model_data( allele_list, allele_sequence_data, allele_groups, peptide_length = 9, mhc_length=max_allele_length ) #splitting peptides, mhcs and binding into training and test peptides_train, peptides_test = split_train_test(peptides,5) mhc_train, mhc_test = split_train_test(mhc,5) Y_train, Y_test = split_train_test(Y,5) #fit graph model graph = get_graph_from_hyperparameters(args.pred) graph.fit({'peptide':peptides_train, 'mhc':mhc_train, 'output': Y_train}, batch_size=32, nb_epoch=12, verbose = 0, ) #calculate metrics for each allele for allele in blind_allele_list: print i, allele predictions = read_blind_predictions(HOME_PATH + '/combined-test-data/'+ allele + '.csv') peptides = predictions.keys() preds = [] meas = [] for peptide in peptides: preds.append(make_prediction(peptide, allele_sequence_data[allele],graph)) meas.append(predictions[peptide]['meas']) preds = np.array(preds) meas = np.array(meas) try: preds_allele[allele]+=preds/nb_iter except: preds_allele[allele]=preds/nb_iter actual_allele[allele] = meas #calculate average for all the alleles calculated_metrics = np.zeros(6) for allele in blind_allele_list: Y_pred_allele = max_ic50**(1-preds_allele[allele]) Y_true_allele = actual_allele[allele] score_allele = scores(Y_true_allele, Y_pred_allele) calculated_metrics += score_allele ##sum metrics for all alleles print calculated_metrics/len(blind_allele_list) #divide sum by number of alleles
def main(): #prediction input either "conv", "ffn_concat", "ffn_mult" args = parser.parse_args() graph = get_graph_from_hyperparameters(args.pred) allele_sequence_data, max_allele_length = load_allele_sequence_data(SEQUENCE_DATA_PATH) predictors = ['mhcflurry','netmhcpan','netmhc','smmpmbec_cpp'] #allele_list allele_list = ['A0101', 'A0201', 'A0202', 'A0203', 'A0206', 'A0301', 'A1101', 'A2301', 'A2402', 'A2501', 'A2601', 'A2602', 'A2603', 'A2902', 'A3001', 'A3002', 'A3101', 'A3201', 'A3301', 'A6801', 'A6802', 'A6901', 'A8001', 'B0702', 'B0801', 'B0802', 'B0803', 'B1501', 'B1503', 'B1509', 'B1517', 'B1801', 'B2703', 'B2705', 'B3501', 'B3801', 'B3901', 'B4001', 'B4002', 'B4402', 'B4403', 'B4501', 'B4601', 'B5101', 'B5301', 'B5401', 'B5701', 'B5801' ] #Load graph batch_size = 32 epoch_range = map(int, args.epoch.split(',')) for epoch in range(epoch_range[0],epoch_range[1]): graph.load_weights(HOME_PATH + '/weights' + str(args.max_ic50) + '/' + args.pred + '/weights' + str(batch_size) + '_' + str(epoch) ) #Initializing data_len = sum(len(read_blind_predictions(HOME_PATH + '/combined-test-data/'+ allele + '.csv').keys()) for allele in allele_list) Y_true_all = np.zeros(data_len) total_metrics = collections.defaultdict(list) for val in predictors: total_metrics[val] = np.zeros(data_len) pos = 0 calculated_metrics =collections.defaultdict(tuple) for val in predictors: calculated_metrics[val] = np.zeros(6) #calculating metrics per allele for allele in allele_list: filename = HOME_PATH + '/combined-test-data/'+ allele + '.csv' predictions = read_blind_predictions(filename) peptides = predictions.keys() for peptide in peptides: predictions[peptide]['mhcflurry'] = max_ic50**(1-make_prediction(peptide, allele_sequence_data[allele], graph)) df_pred = pd.DataFrame(predictions) Y_true_allele = np.array(df_pred.loc['meas']) Y_true_all[pos:pos+len(peptides)] = Y_true_allele if (args.allele_info == True): print "\n=====", allele, sum(Y_true_allele <= ic50_cutoff), len(Y_true_allele), "====" for val in predictors: Y_pred_allele = np.array(df_pred.loc[val]) calculated_metrics[val] += scores(Y_true_allele, Y_pred_allele) if (args.allele_info == True): print val, scores(Y_true_allele, Y_pred_allele) pos +=len(peptides) print '\n',epoch, for val in predictors: calculated_metrics[val] = calculated_metrics[val]/len(allele_list) print val,',',','.join(map(str,calculated_metrics[val][1:]))