/
svm_hmp_2_feature_plot.py
64 lines (47 loc) · 2.32 KB
/
svm_hmp_2_feature_plot.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import pylab as pl
import numpy as np
import sklearn.preprocessing
import mothur_files
import smo
def svm_hmp_2_feature_plot():
print('hazzah!')
shared_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.shared';
design_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.mix.design';
shared_data = mothur_files.load_shared_file(shared_file_path)
design_data = mothur_files.load_design_file(design_file_path)
otu1 = 'Otu29878'
otu2 = 'Otu29552'
# where are Otu29741 and Otu29678
n_otu1 = shared_data.otu_column_names.index(otu1)
n_otu2 = shared_data.otu_column_names.index(otu2)
print('{} is on column {}'.format(otu1, n_otu1))
print('{} is on column {}'.format(otu2, n_otu2))
print('shape of design_data.class_number_for_row {}'.format(design_data.class_number_for_row.shape))
class_zero = design_data.class_number_for_row == 2.0
class_one = design_data.class_number_for_row == 1.0
print('class zero count: {}'.format(np.sum(class_zero)))
print('class one count: {}'.format(np.sum(class_one)))
two_labels = np.logical_or(class_zero, class_one)
print('shape of two_labels: {}'.format(two_labels.shape));
label_index = np.arange(design_data.class_number_for_row.shape[0])
reduced_label_index = label_index[two_labels[:,0]]
print('reduced_label_index: {}'.format(reduced_label_index))
two_labels_otu_frequency = shared_data.otu_frequency[reduced_label_index,:]
print('shape of two_labels_otu_frequency: {}'.format(two_labels_otu_frequency.shape))
reduced_otu_frequency = two_labels_otu_frequency[:,[n_otu1, n_otu2]]
print('shaped of reduced_otu_frequency: {}'.format(reduced_otu_frequency.shape))
#print('reduced_otu_frequency:\n{}'.format(reduced_otu_frequency))
scaler = sklearn.preprocessing.StandardScaler()
# the scaler returns a copy by default
#X = scaler.fit_transform(reduced_otu_frequency)
#exit()
# the next line is pretty good
# smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5)
smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5)
pl.xlabel(otu1)
pl.ylabel(otu2)
pl.gca().set_xticklabels([])
pl.gca().set_yticklabels([])
pl.show()
if __name__ == '__main__':
svm_hmp_2_feature_plot()