Exemple #1
0
def run_main(method, fs_functions, score_name, n_clfs=5, dataset_name="PC4"):

    print("\nDATASET: %s\nMETHOD: %s\n" % (dataset_name, method))
    np.random.seed(1)

    ##### 1. ------ GET DATASET
    X, y, ft_names = ut.read_dataset("datasets/", dataset_name=dataset_name)
    ##### 2. ------- RUN TRANING METHOD
    methods.run_method(method,
                       X,
                       y,
                       n_clfs=n_clfs,
                       fs_functions=fs_functions,
                       score_name=score_name)
    pl.title(dataset_name)
    pl.ylabel(score_name)

    pl.legend(loc="best")
    img = BytesIO()
    pl.savefig(img)
    img.seek(0)
    return img
Exemple #2
0
    parser.add_argument('-d', '--dataset_name', default="ant")

    parser.add_argument('-n', '--n_clfs', default=5, type=int)

    parser.add_argument('-s', '--score_name', required=True, 
                        choices=["auc","gmeans"])

    args = parser.parse_args()      
    method = args.method
    dataset_name = args.dataset_name
    fs_functions = args.fs_functions
    n_clfs = args.n_clfs
    score_name = args.score_name

    print("\nDATASET: %s\nMETHOD: %s\n" % (dataset_name, method))
    np.random.seed(1)


    ##### 1. ------ GET DATASET
    X, y, ft_names = ut.read_dataset("datasets/", dataset_name=dataset_name)
    pl.title(dataset_name)
    pl.ylabel("AUC")

    ##### 2. ------- RUN TRANING METHOD
    methods.run_method(method, X, y, n_clfs=n_clfs, 
                       fs_functions=fs_functions, 
                       score_name=score_name)

    pl.legend(loc="best")
    pl.show()
Exemple #3
0
def run_rawted(args):
    trees = args.trees
    methods = args.methods

    structure_file1 = args.structure1
    chain1 = args.chain1
    structure_file2 = args.structure2
    chain2 = args.chain2

    save_folder = args.save_folder
    pdb_structure1 = load_pdb_structure(structure_file1)
    pdb_structure2 = load_pdb_structure(structure_file2)

    dssr1 = DSSRWrapper.load(structure_file1)
    dssr2 = DSSRWrapper.load(structure_file2)
    clean_dssr_output()

    if not chain1:
        chain1 = get_chains(pdb_structure2)[0]
    else:
        chains = get_chains(pdb_structure1)
        if chain1 not in chains:
            print(
                'Input argument chain1 {} does not match any chains in the input structure ({}). Aborting...'
                .format(chain1, chains))
            exit()

    if not chain2:
        chain2 = get_chains(pdb_structure2)[0]
    else:
        chains = get_chains(pdb_structure2)
        if chain2 not in chains:
            print(
                'Input argument chain2 {} does not match any chains in the input structure ({}). Aborting...'
                .format(chain2, chains))
            exit()

    pdb_chain1 = pdb_structure1[0][chain1]
    pdb_chain2 = pdb_structure2[0][chain2]
    dssr_nts1 = dssr1.nts_for_chain(chain1)
    dssr_nts2 = dssr2.nts_for_chain(chain2)

    for tree_descriptor in trees:
        tree_version = tree_descriptor.split(',')[0]
        tree1 = tree_from_version(tree_version).from_nts(dssr_nts1, pdb_chain1)
        tree2 = tree_from_version(tree_version).from_nts(dssr_nts2, pdb_chain2)

        ted_matrix = zss_with_descriptor(tree1, tree2, tree_descriptor)

        for method in methods:
            save_args = None
            if save_folder:
                save_args = {
                    'structure1':
                    pdb_structure1.copy(),
                    'structure2':
                    pdb_structure2.copy(),
                    'chain1':
                    chain1,
                    'chain2':
                    chain2,
                    'filename1':
                    '{}/{}_{}_s1.pdb'.format(save_folder,
                                             tree_descriptor.replace(',', '-'),
                                             method.lower()),
                    'filename2':
                    '{}/{}_{}_s2.pdb'.format(save_folder,
                                             tree_descriptor.replace(',', '-'),
                                             method.lower())
                }

            rmsd, psi, _, _ = run_method(
                method,
                coordinates_from_pdb_chain(pdb_chain1),
                coordinates_from_pdb_chain(pdb_chain2),
                tree1,
                tree2,
                ted_matrix,
                save_args=save_args)
            print('--------------------')
            print('Rawted finished.')
            print('Structure 1: {}'.format(structure_file1))
            print('Structure 2: {}'.format(structure_file2))
            print('Tree descriptor: {}'.format(tree_descriptor))
            print('Method: {}'.format(method))
            print('--------------------')
            print('>> Results <<')
            print('RMSD: {}'.format(round(rmsd, 4)))
            print('PSI: {}'.format(round(psi, 4)))
            print('--------------------')
    n_clfs = args.n_clfs
    problem = args.problem
    use_unbalanced_data = args.unbalanced
    np.random.seed(1)

    # GET DATASET
    feature_matrix, facies_vector = ut.read_dataset()
    # draw_data_histogram(facies_vector)
    ut.get_feature_statistics(feature_matrix)

    # Preprocessing
    feature_matrix = preprocessing.normalize(feature_matrix, facies_vector)

    # convert the dataset to be a binary classification problem for class x over rest
    if problem == "ovr":
        facies_vector = ut.convert_to_binary_classification(2, facies_vector)

    if not use_unbalanced_data:
        feature_matrix, facies_vector = preprocessing.balance(feature_matrix, facies_vector)

    # RUN TRANING METHOD
    print("Method: ", method)

    methods.run_method(method, feature_matrix, facies_vector, n_clfs=n_clfs,
                       fs_functions=fs_functions)

    pl.legend(loc="best")
    pl.show()