def get_model_accuracy_filter_feature_size(self,expression_file, ic50_file,feature_size,num_permutations,drug):
     scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None)
     for i in range(0,num_permutations):
         try:
             shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target)
             accuracy = cv.cross_val_score_filter_feature_selection(self.model,cv.trim_X_num_features,feature_size,shuffled_data,shuffled_target,cv=5)
             yield accuracy.mean()
         except:
             yield 0.0
def get_accuracy_and_runtime_vs_num_generations(expression_file,ic50_file,num_features,generation_range,num_permutations,num_threads):
    drug = "SMAP"

    scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None)
    p = Pool(num_threads)
    scores = p.map(wrap, [(g,scikit_data,scikit_target,num_features,num_permutations) for g in generation_range])
    scores = {g : scores[i] for i,g in enumerate(generation_range)}
    print(scores)
    return scores
    def get_model_accuracy_RFE(self,expression_file,ic50_file,target_features,num_permutations,drug):

        scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None)
        step_length = int(len(scikit_data.tolist()[0]) / 100) + 1
        for i in xrange(0,num_permutations):
            try:
                shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target)
                selector = RFE(self.model,target_features,step=step_length)
                yield cross_val_score(selector,shuffled_data,shuffled_target,cv=5).mean()
            except:
                yield 0.0
 def get_cross_validation_time(self,expression_file, ic50_file,feature_size,num_permutations,drug):
     scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None)
     for i in range(0,num_permutations):
         try:
             shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target)
             start_time = datetime.datetime.now()
             cv.cross_val_score_filter_feature_selection(self.model,cv.trim_X_num_features,feature_size,shuffled_data,shuffled_target,cv=5)
             end_time = datetime.datetime.now()
             yield float((end_time - start_time).microseconds) / 100000
         except:
             yield 0.0
    def get_predictions_full_CCLE_dataset_rfe(self,expression_file,ic50_file,target_features,drug):

        scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None)
        step_length = int(len(scikit_data.tolist()[0]) / 100) + 1

        model = RFE(self.model,target_features,step=step_length)
        model.fit(scikit_data,scikit_target)

        expression_frame = dfm.normalize_expression_frame(dfm.get_cell_line_expression_frame(expression_file))
        cell_lines = expression_frame.columns
        testing_data = dfm.get_scikit_data(expression_frame)

        predictions = model.predict(testing_data)

        top_features = [expression_frame.index[i] for i in xrange(0,len(expression_frame.index)) if model.support_[i]]


        return cell_lines,predictions,top_features