def evaluate_svm():
    argparser = argparse.ArgumentParser()
    argparser.add_argument("shared_file_path", help="<path to shared file>")
    argparser.add_argument("design_file_path", help="<path to design file>")
    args = argparser.parse_args()

    print("shared file path: {0.shared_file_path}".format(args))
    print("design file path: {0.design_file_path}".format(args))

    shared_data = mothur_files.load_shared_file(args.shared_file_path)
    design_data = mothur_files.load_design_file(args.design_file_path)

    scaler = sklearn.preprocessing.StandardScaler()
    # the scaler returns a copy by default
    X = scaler.fit_transform(shared_data.otu_frequency)
    y = design_data.class_number_for_row[:,0]
    y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())]

    C_range = 10.0 ** np.arange(-3, 3)
    gamma_range = 10.0 ** np.arange(-5, -3)
    degree_range = np.arange(1, 5)
    coef0_range = np.arange(-3.0, 3.0)

    support_vector_machine(X, y, y_labels, "linear", dict(C=C_range), shared_data)
    support_vector_machine(X, y, y_labels, "rbf", dict(gamma=gamma_range, C=C_range), shared_data)
    support_vector_machine(X, y, y_labels, "poly", dict(C=C_range, degree=degree_range, coef0=coef0_range), shared_data)
    support_vector_machine(X, y, y_labels, "sigmoid", dict(C=C_range, coef0=coef0_range), shared_data)
def svm_hmp_2_feature_plot():
    print('hazzah!')

    shared_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.shared'
    design_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.mix.design'

    shared_data = mothur_files.load_shared_file(shared_file_path)
    design_data = mothur_files.load_design_file(design_file_path)

    otu1 = 'Otu29878'
    otu2 = 'Otu29552'

    # where are Otu29741 and Otu29678
    n_otu1 = shared_data.otu_column_names.index(otu1)
    n_otu2 = shared_data.otu_column_names.index(otu2)

    print('{} is on column {}'.format(otu1, n_otu1))
    print('{} is on column {}'.format(otu2, n_otu2))

    print('shape of design_data.class_number_for_row {}'.format(
        design_data.class_number_for_row.shape))
    class_zero = design_data.class_number_for_row == 2.0
    class_one = design_data.class_number_for_row == 1.0
    print('class zero count: {}'.format(np.sum(class_zero)))
    print('class one count: {}'.format(np.sum(class_one)))
    two_labels = np.logical_or(class_zero, class_one)
    print('shape of two_labels: {}'.format(two_labels.shape))
    label_index = np.arange(design_data.class_number_for_row.shape[0])
    reduced_label_index = label_index[two_labels[:, 0]]
    print('reduced_label_index: {}'.format(reduced_label_index))

    two_labels_otu_frequency = shared_data.otu_frequency[
        reduced_label_index, :]
    print('shape of two_labels_otu_frequency: {}'.format(
        two_labels_otu_frequency.shape))

    reduced_otu_frequency = two_labels_otu_frequency[:, [n_otu1, n_otu2]]
    print('shaped of reduced_otu_frequency: {}'.format(
        reduced_otu_frequency.shape))
    #print('reduced_otu_frequency:\n{}'.format(reduced_otu_frequency))
    scaler = sklearn.preprocessing.StandardScaler()
    # the scaler returns a copy by default
    #X = scaler.fit_transform(reduced_otu_frequency)

    #exit()

    # the next line is pretty good
    # smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5)
    smo.smo(reduced_otu_frequency,
            design_data.class_number_for_row[two_labels], 0.5)

    pl.xlabel(otu1)
    pl.ylabel(otu2)
    pl.gca().set_xticklabels([])
    pl.gca().set_yticklabels([])
    pl.show()
def select_features():
    shared_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.shared'
    design_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.mix.design'
    
    shared_label_names, shared_group_names, otu_column_names, shared_data = \
        mothur_files.load_shared_file(shared_data_file_path)
    
    design_group_names, design_partition_names = \
        mothur_files.load_design_file(design_data_file_path)

    elastic_net(shared_data, otu_column_names, design_partition_names)
    linear_support_vector_machine(shared_data, otu_column_names, design_partition_names)
    rbf_support_vector_machine(shared_data, otu_column_names, design_partition_names)
def svm_hmp_2_feature_plot():
    print('hazzah!')

    shared_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.shared';
    design_file_path = '/home/jlynch/gsoc2013/data/Stool.0.03.subsample.0.03.filter.mix.design';

    shared_data = mothur_files.load_shared_file(shared_file_path)
    design_data = mothur_files.load_design_file(design_file_path)

    otu1 = 'Otu29878'
    otu2 = 'Otu29552'

    # where are Otu29741 and Otu29678
    n_otu1 = shared_data.otu_column_names.index(otu1)
    n_otu2 = shared_data.otu_column_names.index(otu2)

    print('{} is on column {}'.format(otu1, n_otu1))
    print('{} is on column {}'.format(otu2, n_otu2))

    print('shape of design_data.class_number_for_row {}'.format(design_data.class_number_for_row.shape))
    class_zero = design_data.class_number_for_row == 2.0
    class_one =  design_data.class_number_for_row == 1.0
    print('class zero count: {}'.format(np.sum(class_zero)))
    print('class one count: {}'.format(np.sum(class_one)))
    two_labels = np.logical_or(class_zero, class_one)
    print('shape of two_labels: {}'.format(two_labels.shape));
    label_index = np.arange(design_data.class_number_for_row.shape[0])
    reduced_label_index = label_index[two_labels[:,0]]
    print('reduced_label_index: {}'.format(reduced_label_index))

    two_labels_otu_frequency = shared_data.otu_frequency[reduced_label_index,:]
    print('shape of two_labels_otu_frequency: {}'.format(two_labels_otu_frequency.shape))

    reduced_otu_frequency = two_labels_otu_frequency[:,[n_otu1, n_otu2]]
    print('shaped of reduced_otu_frequency: {}'.format(reduced_otu_frequency.shape))
    #print('reduced_otu_frequency:\n{}'.format(reduced_otu_frequency))
    scaler = sklearn.preprocessing.StandardScaler()
    # the scaler returns a copy by default
    #X = scaler.fit_transform(reduced_otu_frequency)

    #exit()

    # the next line is pretty good
    # smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5)
    smo.smo(reduced_otu_frequency, design_data.class_number_for_row[two_labels], 0.5)

    pl.xlabel(otu1)
    pl.ylabel(otu2)
    pl.gca().set_xticklabels([])
    pl.gca().set_yticklabels([])
    pl.show()
Exemple #5
0
def select_features():
    shared_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.shared'
    design_data_file_path = 'data/Stool.0.03.subsample.0.03.filter.mix.design'

    shared_label_names, shared_group_names, otu_column_names, shared_data = \
        mothur_files.load_shared_file(shared_data_file_path)

    design_group_names, design_partition_names = \
        mothur_files.load_design_file(design_data_file_path)

    elastic_net(shared_data, otu_column_names, design_partition_names)
    linear_support_vector_machine(shared_data, otu_column_names,
                                  design_partition_names)
    rbf_support_vector_machine(shared_data, otu_column_names,
                               design_partition_names)
Exemple #6
0
def evaluate_enet():
    argparser = argparse.ArgumentParser()
    argparser.add_argument("shared_file_path")
    argparser.add_argument("design_file_path")
    args = argparser.parse_args()

    print("shared file path: {0.shared_file_path}".format(args))
    print("design file path: {0.design_file_path}".format(args))

    shared_data = mothur_files.load_shared_file(args.shared_file_path)
    design_data = mothur_files.load_design_file(args.design_file_path)

    #scaler = sklearn.preprocessing.StandardScaler()
    # the scaler returns a copy by default
    #X = scaler.fit_transform(shared_data.otu_frequency)
    #y = design_data.class_number_for_row[:,0]
    #y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())]
    #print(y_labels)
    elastic_net_ovo(shared_data, design_data)
def evaluate_enet():
    argparser = argparse.ArgumentParser()
    argparser.add_argument("shared_file_path")
    argparser.add_argument("design_file_path")
    args = argparser.parse_args()

    print("shared file path: {0.shared_file_path}".format(args))
    print("design file path: {0.design_file_path}".format(args))

    shared_data = mothur_files.load_shared_file(args.shared_file_path)
    design_data = mothur_files.load_design_file(args.design_file_path)

    # scaler = sklearn.preprocessing.StandardScaler()
    # the scaler returns a copy by default
    # X = scaler.fit_transform(shared_data.otu_frequency)
    # y = design_data.class_number_for_row[:,0]
    # y_labels = [design_data.class_number_to_name[n] for n in sorted(design_data.class_number_to_name.keys())]
    # print(y_labels)
    elastic_net_ovo(shared_data, design_data)