def create_folded_dataset(self, window_size): positive_dataset, negative_dataset = feature.create_dataset( self.bindingResidueData, self.pssmData, window_size) folded_dataset = dataset.FoldedDataset( positive_dataset, negative_dataset, fold=self.fold, undersampling=self.undersampling, shuffle=self.shuffle) return folded_dataset
def create_positive_and_negative_dataset(window_size, sequence_length): bindres_file = "/tmp/bindingData.txt" pssms_file = "/tmp/pssms.txt" with open(bindres_file, "w") as fp: fp.write("http://purl.uniprot.org/uniprot/AAAAAA 0 1 2\n") fp.write("http://purl.uniprot.org/uniprot/BBBBBB 9\n") fp.write("http://purl.uniprot.org/uniprot/CCCCCC 7 2\n") with open(pssms_file, "w") as fp: fp.write(">http://purl.uniprot.org/uniprot/AAAAAA\n") pssm = '\n'.join(map('\t'.join, [['1' if i == j else '-1' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)])) fp.write(pssm+"\n") fp.write(">http://purl.uniprot.org/uniprot/BBBBBB\n") pssm = '\n'.join(map('\t'.join, [['2' if i == j else '-2' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)])) fp.write(pssm+"\n") fp.write(">http://purl.uniprot.org/uniprot/CCCCCC\n") pssm = '\n'.join(map('\t'.join, [['3' if i == j else '-3' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)])) fp.write(pssm+"\n") bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file) positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, window_size) return positive_dataset, negative_dataset
def create_folded_dataset(self, window_size): positive_dataset, negative_dataset = feature.create_dataset(self.bindingResidueData, self.pssmData, window_size) folded_dataset = dataset.FoldedDataset(positive_dataset, negative_dataset, fold=self.fold, undersampling=self.undersampling, shuffle=self.shuffle) return folded_dataset
indim = 21 * (2 * window_size + 1) clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto') clf.fit(train_dataset, train_labels) return clf def create_classifier(method_and_genes, positive_dataset, negative_dataset): method, genes = method_and_genes[0], method_and_genes[1:] if method == "neuralNetwork": return create_NN_classifier(genes, positive_dataset, negative_dataset) elif method == "randomForest": return create_RF_classifier(genes, positive_dataset, negative_dataset) elif method == "SVM": return create_SVM_classifier(genes, positive_dataset, negative_dataset) else: raise ValueError("method must be neuralNetwork or randomForest or SVM [{}]".format(method)) if __name__ == "__main__": arguments = docopt(__doc__) best_chromosome_file = arguments['<best_chromosome_file>'] bindres_file = arguments['<binding_residue_file>'] pssms_file = arguments['<pssms_file>'] output_pickled_model_file = arguments['<output_pickled_model_file>'] method_and_genes = common.get_method_and_genes(best_chromosome_file) bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file) positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, int(method_and_genes[3])) clf_or_net = create_classifier(method_and_genes, positive_dataset, negative_dataset) with open(output_pickled_model_file, 'wb') as fp: pickle.dump(clf_or_net, fp)
def create_classifier(method_and_genes, positive_dataset, negative_dataset): method, genes = method_and_genes[0], method_and_genes[1:] if method == "neuralNetwork": return create_NN_classifier(genes, positive_dataset, negative_dataset) elif method == "randomForest": return create_RF_classifier(genes, positive_dataset, negative_dataset) elif method == "SVM": return create_SVM_classifier(genes, positive_dataset, negative_dataset) else: raise ValueError( "method must be neuralNetwork or randomForest or SVM [{}]".format( method)) if __name__ == "__main__": arguments = docopt(__doc__) best_chromosome_file = arguments['<best_chromosome_file>'] bindres_file = arguments['<binding_residue_file>'] pssms_file = arguments['<pssms_file>'] output_pickled_model_file = arguments['<output_pickled_model_file>'] method_and_genes = common.get_method_and_genes(best_chromosome_file) bindingResidueData, pssmData = feature.parse_record_files( bindres_file, pssms_file) positive_dataset, negative_dataset = feature.create_dataset( bindingResidueData, pssmData, int(method_and_genes[3])) clf_or_net = create_classifier(method_and_genes, positive_dataset, negative_dataset) with open(output_pickled_model_file, 'wb') as fp: pickle.dump(clf_or_net, fp)