Esempio n. 1
0
 def create_folded_dataset(self, window_size):
     positive_dataset, negative_dataset = feature.create_dataset(
         self.bindingResidueData, self.pssmData, window_size)
     folded_dataset = dataset.FoldedDataset(
         positive_dataset,
         negative_dataset,
         fold=self.fold,
         undersampling=self.undersampling,
         shuffle=self.shuffle)
     return folded_dataset
Esempio n. 2
0
def create_positive_and_negative_dataset(window_size, sequence_length):
        bindres_file = "/tmp/bindingData.txt"
        pssms_file = "/tmp/pssms.txt"
        with open(bindres_file, "w") as fp:
            fp.write("http://purl.uniprot.org/uniprot/AAAAAA 0 1 2\n")
            fp.write("http://purl.uniprot.org/uniprot/BBBBBB 9\n")
            fp.write("http://purl.uniprot.org/uniprot/CCCCCC 7 2\n")
        with open(pssms_file, "w") as fp:
            fp.write(">http://purl.uniprot.org/uniprot/AAAAAA\n")
            pssm = '\n'.join(map('\t'.join, [['1' if i == j else '-1' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
            fp.write(">http://purl.uniprot.org/uniprot/BBBBBB\n")
            pssm = '\n'.join(map('\t'.join, [['2' if i == j else '-2' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
            fp.write(">http://purl.uniprot.org/uniprot/CCCCCC\n")
            pssm = '\n'.join(map('\t'.join, [['3' if i == j else '-3' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
        bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file)
        positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, window_size)
        return positive_dataset, negative_dataset
Esempio n. 3
0
 def create_folded_dataset(self, window_size):
     positive_dataset, negative_dataset = feature.create_dataset(self.bindingResidueData, self.pssmData, window_size)
     folded_dataset = dataset.FoldedDataset(positive_dataset, negative_dataset, fold=self.fold,
                                             undersampling=self.undersampling, shuffle=self.shuffle)
     return folded_dataset
Esempio n. 4
0
    indim = 21 * (2 * window_size + 1)
    clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto')
    clf.fit(train_dataset, train_labels)
    return clf
 

def create_classifier(method_and_genes, positive_dataset, negative_dataset):
    method, genes = method_and_genes[0], method_and_genes[1:]
    if method == "neuralNetwork":
        return create_NN_classifier(genes, positive_dataset, negative_dataset)
    elif method == "randomForest":
        return create_RF_classifier(genes, positive_dataset, negative_dataset)
    elif method == "SVM":
        return create_SVM_classifier(genes, positive_dataset, negative_dataset)
    else:
        raise ValueError("method must be neuralNetwork or randomForest or SVM [{}]".format(method))


if __name__ == "__main__":
    arguments = docopt(__doc__)
    best_chromosome_file = arguments['<best_chromosome_file>']
    bindres_file = arguments['<binding_residue_file>']
    pssms_file = arguments['<pssms_file>']
    output_pickled_model_file = arguments['<output_pickled_model_file>']
    method_and_genes = common.get_method_and_genes(best_chromosome_file)
    bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file)
    positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, int(method_and_genes[3]))
    clf_or_net = create_classifier(method_and_genes, positive_dataset, negative_dataset)
    with open(output_pickled_model_file, 'wb') as fp:
        pickle.dump(clf_or_net, fp)
Esempio n. 5
0
def create_classifier(method_and_genes, positive_dataset, negative_dataset):
    method, genes = method_and_genes[0], method_and_genes[1:]
    if method == "neuralNetwork":
        return create_NN_classifier(genes, positive_dataset, negative_dataset)
    elif method == "randomForest":
        return create_RF_classifier(genes, positive_dataset, negative_dataset)
    elif method == "SVM":
        return create_SVM_classifier(genes, positive_dataset, negative_dataset)
    else:
        raise ValueError(
            "method must be neuralNetwork or randomForest or SVM [{}]".format(
                method))


if __name__ == "__main__":
    arguments = docopt(__doc__)
    best_chromosome_file = arguments['<best_chromosome_file>']
    bindres_file = arguments['<binding_residue_file>']
    pssms_file = arguments['<pssms_file>']
    output_pickled_model_file = arguments['<output_pickled_model_file>']
    method_and_genes = common.get_method_and_genes(best_chromosome_file)
    bindingResidueData, pssmData = feature.parse_record_files(
        bindres_file, pssms_file)
    positive_dataset, negative_dataset = feature.create_dataset(
        bindingResidueData, pssmData, int(method_and_genes[3]))
    clf_or_net = create_classifier(method_and_genes, positive_dataset,
                                   negative_dataset)
    with open(output_pickled_model_file, 'wb') as fp:
        pickle.dump(clf_or_net, fp)