def get_model_accuracy_filter_feature_size(self,expression_file, ic50_file,feature_size,num_permutations,drug): scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None) for i in range(0,num_permutations): try: shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target) accuracy = cv.cross_val_score_filter_feature_selection(self.model,cv.trim_X_num_features,feature_size,shuffled_data,shuffled_target,cv=5) yield accuracy.mean() except: yield 0.0
def get_model_accuracy_RFE(self,expression_file,ic50_file,target_features,num_permutations,drug): scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None) step_length = int(len(scikit_data.tolist()[0]) / 100) + 1 for i in xrange(0,num_permutations): try: shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target) selector = RFE(self.model,target_features,step=step_length) yield cross_val_score(selector,shuffled_data,shuffled_target,cv=5).mean() except: yield 0.0
def get_cross_validation_time(self,expression_file, ic50_file,feature_size,num_permutations,drug): scikit_data,scikit_target = dfm.get_expression_scikit_data_target_for_drug(expression_file,ic50_file,drug,normalized=True,trimmed=True,threshold=None) for i in range(0,num_permutations): try: shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target) start_time = datetime.datetime.now() cv.cross_val_score_filter_feature_selection(self.model,cv.trim_X_num_features,feature_size,shuffled_data,shuffled_target,cv=5) end_time = datetime.datetime.now() yield float((end_time - start_time).microseconds) / 100000 except: yield 0.0
def acc_and_run(g,scikit_data,scikit_target,num_features,num_permutations): results = [] for perm in xrange(0,num_permutations): try: start_time = datetime.datetime.now() model = n.NeatClassifier(max_generations=g) shuffled_data,shuffled_target = dfm.shuffle_scikit_data_target(scikit_data,scikit_target) acc = cv.cross_val_score_filter_feature_selection(model,cv.trim_X_num_features,num_features,shuffled_data,shuffled_target,cv=5) end_time = datetime.datetime.now() results.append((acc.mean(),float((end_time - start_time).microseconds) / 100000)) except: results.append((0.0, 1000.0)) print(sys.exc_info()[0]) return results