def test_custom_score(): """Ensure that the MDR 'score' method outputs the right custom score passed in from the user""" features = np.array([[2,0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert mdr.score(features = features, classes = classes, scoring_function = accuracy_score) == 9./15 assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss) == 1 - 9./15 assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss, normalize=False) == 15 - 9
def test_score(): """Ensure that the MDR 'score' method outputs the right default score, as well as the right custom metric if specified""" features = np.array([[2,0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert mdr.score(features, classes) == 9./15
n_way_results.append( (m2[i])[1] ) # n_way_results = tuple(n_way_results) n_way_features.append( (m2[i])[2] ) # n_way_features = tuple(n_way_features) d1 = dict(zip(n_way_results, n_way_features)) max_val = max(d1.keys()) max_feat = list(v for k, v in d1.items() if k == max_val)[0] xtr = xtr[max_feat] xte = xte[max_feat] #clf.fit(mymdr.transform(xtr.values), training_classes) #print('ekf + mdr: ', clf.score(mymdr.transform(xte.values), testing_classes)) mymdr.fit(xtr.values, training_classes) print('ekf + mdr: ', mymdr.score(xte.values, testing_classes)) #randex = random.randint(0,3) #selector = SelectKBest(f_classif, k=5) # Feature selection with EKF #xtr = _ekf(training_features, ekf_index=2) #xte = _ekf(testing_features, ekf_index=2) ##full_data_0 = _ekf(individuals, ekf_index=0) ##full_data_2 = _ekf(individuals, ekf_index=2) # #xtr_2 = selector.fit_transform(training_features, training_classes) #xte_2 = selector.fit_transform(testing_features, testing_classes)
a5000_01h, a5000_02h, a5000_04h ] dataset_names = [ 'a10_005h', 'a10_01h', 'a10_02h', 'a10_04h', 'a100_005h', 'a100_01h', 'a100_02h', 'a100_04h', 'a1000_005h', 'a1000_01h', 'a1000_02h', 'a1000_04h', 'a5000_005h', 'a5000_01h', 'a5000_02h', 'a5000_04h' ] output_txt = '/home/ansohn/Python/venvs/mdr/gametes_logs/target_scores.txt' with open(output_txt, 'w') as t1: for i in range(16): # print(dataset) dataset = gametes_all[i] dataset_name = dataset_names[i] load_dataset = pd.read_csv(dataset, sep='\t') phenotype = load_dataset['Class'].values individuals = load_dataset.drop('Class', axis=1) individuals = individuals[['M0P0', 'M0P1']].values for i in range(30): X_train, X_test, y_train, y_test = train_test_split( individuals, phenotype, train_size=0.75, test_size=0.25) target_pipeline = MDR() target_pipeline.fit(X_train, y_train) t1.write('{}\t{}\tmdr-perfect\n'.format( dataset_name, target_pipeline.score(X_test, y_test)))