def run(self,burn_in=None,thinning=None,minimum_TN=None): folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.folds,attempts=attempts_generate_M,M=self.M) folds_training = mask.compute_Ms(folds_test) performances_test = {measure:[] for measure in measures} for i,(train,test) in enumerate(zip(folds_training,folds_test)): print "Fold %s." % (i+1) # Run the line search line_search = LineSearch( classifier=self.classifier, values_K=self.values_K, R=self.R, M=train, priors=self.priors, initUV=self.init_UV, iterations=self.iterations, restarts=self.restarts) line_search.search(burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN) # Store the model fits, and find the best one according to the metric all_performances = line_search.all_values(metric=self.quality_metric) self.fout.write("All model fits for fold %s, metric %s: %s.\n" % (i+1,self.quality_metric,all_performances)) self.fout.flush() best_K = line_search.best_value(metric=self.quality_metric) self.fout.write("Best K for fold %s: %s.\n" % (i+1,best_K)) # Train a model with this K and measure performance on the test set performance = self.run_model(train,test,best_K,burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN) self.fout.write("Performance: %s.\n\n" % performance) self.fout.flush() for measure in measures: performances_test[measure].append(performance[measure]) # Store the final performances and average average_performance_test = self.compute_average_performance(performances_test) message = "Average performance: %s. \nPerformances test: %s." % (average_performance_test,performances_test) print message self.fout.write(message) self.fout.flush()
R_targets, M_targets = load_data_filter(location_features_drugs+"drug_targets.txt", drugs) R_1d2d, M_1d2d = load_data_filter(location_features_drugs+"drug_1d2d.txt", drugs) #R_1d2d_std, M_1d2d_std = load_data_filter(location_features_drugs+"drug_1d2d_std.txt", drugs) features_drugs = [R_fp, R_targets, R_1d2d] features_cell_lines = [R_cnv, R_mutation] ''' Split the mask M into folds ''' no_folds = 10 I,J = R_main.shape ATTEMPTS_GENERATE_M = 100 numpy.random.seed(0) random.seed(0) folds_test = mask.compute_folds_attempts(I=I,J=J,no_folds=no_folds,attempts=ATTEMPTS_GENERATE_M,M=M_main) folds_training = mask.compute_Ms(folds_test) ''' Function for assembling features X ''' def assemble_X(Rs_rows,Rs_cols,M): indices = [(i,j) for i,j in itertools.product(range(0,I),range(0,J)) if M[i,j]] X = [[] for datapoint in range(0,len(indices))] for n,(i,j) in enumerate(indices): for R in Rs_rows: X[n] += list(R[i,:]) for R in Rs_cols: X[n] += list(R[j,:]) return numpy.array(X) ''' Function for assembling outcomes y '''
'Sm': 'least', 'G': 'least', 'lambdat': 'exp', 'tau': 'exp' } alpha = [1., 1., 1., 1.] # GDSC, CTRP, CCLE IC, CCLE EC K = {'Cell_lines': 10, 'Drugs': 10} file_performance = 'results/hmf_d_mtf.txt' D, C = [], [] n = 0 ''' Split the folds. For each, obtain a list for the test set of (i,j,real,pred) values. ''' i_j_real_pred = [] folds_test = mask.compute_folds_attempts(I=I, J=J, no_folds=no_folds, attempts=1000, M=M_gdsc) folds_training = mask.compute_Ms(folds_test) for i, (train, test) in enumerate(zip(folds_training, folds_test)): print "Fold %s." % (i + 1) ''' Predict values. ''' R = [(R_gdsc, train, 'Cell_lines', 'Drugs', alpha[0]), (R_ctrp, M_ctrp, 'Cell_lines', 'Drugs', alpha[1]), (R_ccle_ic, M_ccle_ic, 'Cell_lines', 'Drugs', alpha[2]), (R_ccle_ec, M_ccle_ec, 'Cell_lines', 'Drugs', alpha[3])] HMF = HMF_Gibbs(R=R, C=C, D=D, K=K,