Esempio n. 1
0
    def run(self,burn_in=None,thinning=None,minimum_TN=None):
        folds_test = mask.compute_folds_attempts(I=self.I,J=self.J,no_folds=self.folds,attempts=attempts_generate_M,M=self.M)
        folds_training = mask.compute_Ms(folds_test)

        performances_test = {measure:[] for measure in measures}
        for i,(train,test) in enumerate(zip(folds_training,folds_test)):
            print "Fold %s." % (i+1)
            
            # Run the line search
            line_search = LineSearch(
                classifier=self.classifier,
                values_K=self.values_K,
                R=self.R,
                M=train,
                priors=self.priors,
                initUV=self.init_UV,
                iterations=self.iterations,
                restarts=self.restarts)
            line_search.search(burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN)
            
            # Store the model fits, and find the best one according to the metric    
            all_performances = line_search.all_values(metric=self.quality_metric)
            self.fout.write("All model fits for fold %s, metric %s: %s.\n" % (i+1,self.quality_metric,all_performances)) 
            self.fout.flush()
            
            best_K = line_search.best_value(metric=self.quality_metric)
            self.fout.write("Best K for fold %s: %s.\n" % (i+1,best_K))
            
            # Train a model with this K and measure performance on the test set
            performance = self.run_model(train,test,best_K,burn_in=burn_in,thinning=thinning,minimum_TN=minimum_TN)
            self.fout.write("Performance: %s.\n\n" % performance)
            self.fout.flush()
            
            for measure in measures:
                performances_test[measure].append(performance[measure])
        
        # Store the final performances and average
        average_performance_test = self.compute_average_performance(performances_test)
        message = "Average performance: %s. \nPerformances test: %s." % (average_performance_test,performances_test)
        print message
        self.fout.write(message)        
        self.fout.flush()
Esempio n. 2
0
R_targets,  M_targets =  load_data_filter(location_features_drugs+"drug_targets.txt",      drugs)
R_1d2d,     M_1d2d =     load_data_filter(location_features_drugs+"drug_1d2d.txt",         drugs)
#R_1d2d_std, M_1d2d_std = load_data_filter(location_features_drugs+"drug_1d2d_std.txt",     drugs)

features_drugs = [R_fp, R_targets, R_1d2d]
features_cell_lines = [R_cnv, R_mutation]


''' Split the mask M into folds '''
no_folds = 10
I,J = R_main.shape
ATTEMPTS_GENERATE_M = 100

numpy.random.seed(0)
random.seed(0)
folds_test = mask.compute_folds_attempts(I=I,J=J,no_folds=no_folds,attempts=ATTEMPTS_GENERATE_M,M=M_main)
folds_training = mask.compute_Ms(folds_test)


''' Function for assembling features X '''
def assemble_X(Rs_rows,Rs_cols,M):
    indices = [(i,j) for i,j in itertools.product(range(0,I),range(0,J)) if M[i,j]]
    X = [[] for datapoint in range(0,len(indices))]
    for n,(i,j) in enumerate(indices):
        for R in Rs_rows:
            X[n] += list(R[i,:])
        for R in Rs_cols:
            X[n] += list(R[j,:])
    return numpy.array(X)

''' Function for assembling outcomes y '''
Esempio n. 3
0
    'Sm': 'least',
    'G': 'least',
    'lambdat': 'exp',
    'tau': 'exp'
}

alpha = [1., 1., 1., 1.]  # GDSC, CTRP, CCLE IC, CCLE EC
K = {'Cell_lines': 10, 'Drugs': 10}
file_performance = 'results/hmf_d_mtf.txt'
D, C = [], []
n = 0
''' Split the folds. For each, obtain a list for the test set of (i,j,real,pred) values. '''
i_j_real_pred = []
folds_test = mask.compute_folds_attempts(I=I,
                                         J=J,
                                         no_folds=no_folds,
                                         attempts=1000,
                                         M=M_gdsc)
folds_training = mask.compute_Ms(folds_test)

for i, (train, test) in enumerate(zip(folds_training, folds_test)):
    print "Fold %s." % (i + 1)
    ''' Predict values. '''
    R = [(R_gdsc, train, 'Cell_lines', 'Drugs', alpha[0]),
         (R_ctrp, M_ctrp, 'Cell_lines', 'Drugs', alpha[1]),
         (R_ccle_ic, M_ccle_ic, 'Cell_lines', 'Drugs', alpha[2]),
         (R_ccle_ec, M_ccle_ec, 'Cell_lines', 'Drugs', alpha[3])]
    HMF = HMF_Gibbs(R=R,
                    C=C,
                    D=D,
                    K=K,