def test_mdr_fit(): """Ensure that the MDR 'fit' function constructs the right matrix to count each class, as well as the right map from feature instances to labels""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert len(mdr.class_count_matrix) == 4 assert len(mdr.feature_map) == 4 assert mdr.class_count_matrix[(2, 0)][1] == 1 assert mdr.class_count_matrix[(0, 0)][0] == 3 assert mdr.class_count_matrix[(0, 0)][1] == 6 assert mdr.class_count_matrix[(1, 1)][0] == 2 assert mdr.class_count_matrix[(0, 1)][1] == 3 assert mdr.feature_map[(2, 0)] == 1 assert mdr.feature_map[(0, 0)] == 1 assert mdr.feature_map[(1, 1)] == 0 assert mdr.feature_map[(0, 1)] == 1
def test_custom_score(): """Ensure that the MDR 'score' method outputs the right custom score passed in from the user""" features = np.array([[2,0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert mdr.score(features = features, classes = classes, scoring_function = accuracy_score) == 9./15 assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss) == 1 - 9./15 assert mdr.score(features = features, classes = classes, scoring_function = zero_one_loss, normalize=False) == 15 - 9
def test_mdr_fit_raise_ValueError(): """Ensure that the MDR 'fit' function raises ValueError when it is not a binary classification (temporary)""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() try: mdr.fit(features, classes) except ValueError: assert True else: assert False classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]) try: mdr.fit(features, classes) except ValueError: assert True else: assert False
def test_mdr_transform(): """Ensure that the MDR 'transform' function maps a new set of feature instances to the desired labels""" features = np.array([[2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) test_features = np.array([[2, 2], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 1], [1, 0], [0, 0], [1, 0], [0, 0]]) new_features = mdr.transform(test_features) assert np.array_equal(new_features, [[0], [0], [1], [1], [1], [1], [0], [1], [1], [1], [1], [0], [1], [0], [1]])
def test_fit(): """Ensure that the MDR 'fit' method constructs the right matrix to count each class, as well as the right map from feature instances to labels""" features = np.array([ [2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert len(mdr.unique_labels) == 2 assert mdr.class_fraction == 1. / 3. assert len(mdr.class_count_matrix) == 4 assert len(mdr.feature_map) == 4 assert mdr.class_count_matrix[(2,0)][0] == 0 assert mdr.class_count_matrix[(2,0)][1] == 1 assert mdr.class_count_matrix[(0,0)][0] == 3 assert mdr.class_count_matrix[(0,0)][1] == 6 assert mdr.class_count_matrix[(1,1)][0] == 2 assert mdr.class_count_matrix[(1,1)][1] == 0 assert mdr.class_count_matrix[(0,1)][0] == 0 assert mdr.class_count_matrix[(0,1)][1] == 3 assert mdr.class_count_matrix[(2,2)][0] == 0 assert mdr.class_count_matrix[(2,2)][1] == 0 assert mdr.feature_map[(2,0)] == 1 assert mdr.feature_map[(0,0)] == 0 assert mdr.feature_map[(1,1)] == 0 assert mdr.feature_map[(0,1)] == 1
def test_transform(): """Ensure that the MDR 'transform' method maps a new set of feature instances to the desired labels""" features = np.array([ [2, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) test_features = np.array([ [2, 2], [1, 1], [0, 0], [0, 0], [0, 0], [0, 0], [1, 1], [0, 0], [0, 0], [0, 0], [0, 1], [1, 0], [0, 0], [1, 0], [0, 0]]) new_features = mdr.transform(test_features) assert np.array_equal(new_features, [0,0,0,0,0,0,0,0,0,0,1,0,0,0,0])
def test_score(): """Ensure that the MDR 'score' method outputs the right default score, as well as the right custom metric if specified""" features = np.array([[2,0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 1], [0, 0], [0, 0], [0, 0], [1, 1], [1, 1]]) classes = np.array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0]) mdr = MDR() mdr.fit(features, classes) assert mdr.score(features, classes) == 9./15
for i in range(0, len(m2)): n_way_results.append( (m2[i])[1] ) # n_way_results = tuple(n_way_results) n_way_features.append( (m2[i])[2] ) # n_way_features = tuple(n_way_features) d1 = dict(zip(n_way_results, n_way_features)) max_val = max(d1.keys()) max_feat = list(v for k, v in d1.items() if k == max_val)[0] xtr = xtr[max_feat] xte = xte[max_feat] #clf.fit(mymdr.transform(xtr.values), training_classes) #print('ekf + mdr: ', clf.score(mymdr.transform(xte.values), testing_classes)) mymdr.fit(xtr.values, training_classes) print('ekf + mdr: ', mymdr.score(xte.values, testing_classes)) #randex = random.randint(0,3) #selector = SelectKBest(f_classif, k=5) # Feature selection with EKF #xtr = _ekf(training_features, ekf_index=2) #xte = _ekf(testing_features, ekf_index=2) ##full_data_0 = _ekf(individuals, ekf_index=0) ##full_data_2 = _ekf(individuals, ekf_index=2) # #xtr_2 = selector.fit_transform(training_features, training_classes)
a5000_01h, a5000_02h, a5000_04h ] dataset_names = [ 'a10_005h', 'a10_01h', 'a10_02h', 'a10_04h', 'a100_005h', 'a100_01h', 'a100_02h', 'a100_04h', 'a1000_005h', 'a1000_01h', 'a1000_02h', 'a1000_04h', 'a5000_005h', 'a5000_01h', 'a5000_02h', 'a5000_04h' ] output_txt = '/home/ansohn/Python/venvs/mdr/gametes_logs/target_scores.txt' with open(output_txt, 'w') as t1: for i in range(16): # print(dataset) dataset = gametes_all[i] dataset_name = dataset_names[i] load_dataset = pd.read_csv(dataset, sep='\t') phenotype = load_dataset['Class'].values individuals = load_dataset.drop('Class', axis=1) individuals = individuals[['M0P0', 'M0P1']].values for i in range(30): X_train, X_test, y_train, y_test = train_test_split( individuals, phenotype, train_size=0.75, test_size=0.25) target_pipeline = MDR() target_pipeline.fit(X_train, y_train) t1.write('{}\t{}\tmdr-perfect\n'.format( dataset_name, target_pipeline.score(X_test, y_test)))