def evaluate_svm(): argparser = argparse.ArgumentParser() argparser.add_argument("shared_file_path", help="<path to shared file>") argparser.add_argument("design_file_path", help="<path to design file>") args = argparser.parse_args() print("shared file path: {0.shared_file_path}".format(args)) print("design file path: {0.design_file_path}".format(args)) shared_data = mothur_files.load_shared_file(args.shared_file_path) design_data = mothur_files.load_design_file(args.design_file_path) scaler = sklearn.preprocessing.StandardScaler() # the scaler returns a copy by default X = scaler.fit_transform(shared_data.otu_frequency) y = design_data.class_number_for_row[:,0] y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())] C_range = 10.0 ** np.arange(-3, 3) gamma_range = 10.0 ** np.arange(-5, -3) degree_range = np.arange(1, 5) coef0_range = np.arange(-3.0, 3.0) support_vector_machine(X, y, y_labels, "linear", dict(C=C_range), shared_data) support_vector_machine(X, y, y_labels, "rbf", dict(gamma=gamma_range, C=C_range), shared_data) support_vector_machine(X, y, y_labels, "poly", dict(C=C_range, degree=degree_range, coef0=coef0_range), shared_data) support_vector_machine(X, y, y_labels, "sigmoid", dict(C=C_range, coef0=coef0_range), shared_data)
def svm_hmp_2_feature_plot(): print('hazzah!') shared_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.shared' design_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.mix.design' shared_data = mothur_files.load_shared_file(shared_file_path) design_data = mothur_files.load_design_file(design_file_path) otu1 = 'Otu29878' otu2 = 'Otu29552' # where are Otu29741 and Otu29678 n_otu1 = shared_data.otu_column_names.index(otu1) n_otu2 = shared_data.otu_column_names.index(otu2) print('{} is on column {}'.format(otu1, n_otu1)) print('{} is on column {}'.format(otu2, n_otu2)) print('shape of design_data.class_number_for_row {}'.format( design_data.class_number_for_row.shape)) class_zero = design_data.class_number_for_row == 2.0 class_one = design_data.class_number_for_row == 1.0 print('class zero count: {}'.format(np.sum(class_zero))) print('class one count: {}'.format(np.sum(class_one))) two_labels = np.logical_or(class_zero, class_one) print('shape of two_labels: {}'.format(two_labels.shape)) label_index = np.arange(design_data.class_number_for_row.shape[0]) reduced_label_index = label_index[two_labels[:, 0]] print('reduced_label_index: {}'.format(reduced_label_index)) two_labels_otu_frequency = shared_data.otu_frequency[ reduced_label_index, :] print('shape of two_labels_otu_frequency: {}'.format( two_labels_otu_frequency.shape)) reduced_otu_frequency = two_labels_otu_frequency[:, [n_otu1, n_otu2]] print('shaped of reduced_otu_frequency: {}'.format( reduced_otu_frequency.shape)) #print('reduced_otu_frequency:\n{}'.format(reduced_otu_frequency)) scaler = sklearn.preprocessing.StandardScaler() # the scaler returns a copy by default #X = scaler.fit_transform(reduced_otu_frequency) #exit() # the next line is pretty good # smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5) smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5) pl.xlabel(otu1) pl.ylabel(otu2) pl.gca().set_xticklabels([]) pl.gca().set_yticklabels([]) pl.show()
def select_features(): shared_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.shared' design_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.mix.design' shared_label_names, shared_group_names, otu_column_names, shared_data = \ mothur_files.load_shared_file(shared_data_file_path) design_group_names, design_partition_names = \ mothur_files.load_design_file(design_data_file_path) elastic_net(shared_data, otu_column_names, design_partition_names) linear_support_vector_machine(shared_data, otu_column_names, design_partition_names) rbf_support_vector_machine(shared_data, otu_column_names, design_partition_names)
def svm_hmp_2_feature_plot(): print('hazzah!') shared_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.shared'; design_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.mix.design'; shared_data = mothur_files.load_shared_file(shared_file_path) design_data = mothur_files.load_design_file(design_file_path) otu1 = 'Otu29878' otu2 = 'Otu29552' # where are Otu29741 and Otu29678 n_otu1 = shared_data.otu_column_names.index(otu1) n_otu2 = shared_data.otu_column_names.index(otu2) print('{} is on column {}'.format(otu1, n_otu1)) print('{} is on column {}'.format(otu2, n_otu2)) print('shape of design_data.class_number_for_row {}'.format(design_data.class_number_for_row.shape)) class_zero = design_data.class_number_for_row == 2.0 class_one = design_data.class_number_for_row == 1.0 print('class zero count: {}'.format(np.sum(class_zero))) print('class one count: {}'.format(np.sum(class_one))) two_labels = np.logical_or(class_zero, class_one) print('shape of two_labels: {}'.format(two_labels.shape)); label_index = np.arange(design_data.class_number_for_row.shape[0]) reduced_label_index = label_index[two_labels[:,0]] print('reduced_label_index: {}'.format(reduced_label_index)) two_labels_otu_frequency = shared_data.otu_frequency[reduced_label_index,:] print('shape of two_labels_otu_frequency: {}'.format(two_labels_otu_frequency.shape)) reduced_otu_frequency = two_labels_otu_frequency[:,[n_otu1, n_otu2]] print('shaped of reduced_otu_frequency: {}'.format(reduced_otu_frequency.shape)) #print('reduced_otu_frequency:\n{}'.format(reduced_otu_frequency)) scaler = sklearn.preprocessing.StandardScaler() # the scaler returns a copy by default #X = scaler.fit_transform(reduced_otu_frequency) #exit() # the next line is pretty good # smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5) smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5) pl.xlabel(otu1) pl.ylabel(otu2) pl.gca().set_xticklabels([]) pl.gca().set_yticklabels([]) pl.show()
def evaluate_enet(): argparser = argparse.ArgumentParser() argparser.add_argument("shared_file_path") argparser.add_argument("design_file_path") args = argparser.parse_args() print("shared file path: {0.shared_file_path}".format(args)) print("design file path: {0.design_file_path}".format(args)) shared_data = mothur_files.load_shared_file(args.shared_file_path) design_data = mothur_files.load_design_file(args.design_file_path) #scaler = sklearn.preprocessing.StandardScaler() # the scaler returns a copy by default #X = scaler.fit_transform(shared_data.otu_frequency) #y = design_data.class_number_for_row[:,0] #y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())] #print(y_labels) elastic_net_ovo(shared_data, design_data)
def evaluate_enet(): argparser = argparse.ArgumentParser() argparser.add_argument("shared_file_path") argparser.add_argument("design_file_path") args = argparser.parse_args() print("shared file path: {0.shared_file_path}".format(args)) print("design file path: {0.design_file_path}".format(args)) shared_data = mothur_files.load_shared_file(args.shared_file_path) design_data = mothur_files.load_design_file(args.design_file_path) # scaler = sklearn.preprocessing.StandardScaler() # the scaler returns a copy by default # X = scaler.fit_transform(shared_data.otu_frequency) # y = design_data.class_number_for_row[:,0] # y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())] # print(y_labels) elastic_net_ovo(shared_data, design_data)