コード例 #1
0
 def __init__(self, bindres_file, pssms_file, log_file, method, fold=5, undersampling=True, shuffle=True, maxEpochs_for_trainer=10, geneScale=(0, 10)):
     if geneScale[0] != 0 or geneScale[1] <= geneScale[0]:
         raise ValueError("Gene Scale in GA has to be (0, n).  n is greater than 0.")
     if method != "neuralNetwork" and method != "randomForest" and method != "SVM":
         raise ValueError("method must be neuralNetwork or randomForest or SVM [{}]".format(method))
     self.bindingResidueData, self.pssmData = feature.parse_record_files(bindres_file, pssms_file)
     self.log_file = log_file
     self.method = method
     self.fold = fold
     self.undersampling = undersampling
     self.shuffle = shuffle
     self.maxEpochs_for_trainer=maxEpochs_for_trainer
     self.SVMParamScales = {"cost" : (-10, 10), "gamma" : (-10, 5)} 
     self.NNParamScales = {"node_num" : (5, 50), "learning_rate" : (0.01, 0.1)} 
     self.RFParamScales = {"n_estimators" : (101, 1001), "max_features" : (2, 30)}
     self.windowSizeScales = (1, 19)
     self.geneScale = geneScale
     self.log = {}
コード例 #2
0
def create_positive_and_negative_dataset(window_size, sequence_length):
        bindres_file = "/tmp/bindingData.txt"
        pssms_file = "/tmp/pssms.txt"
        with open(bindres_file, "w") as fp:
            fp.write("http://purl.uniprot.org/uniprot/AAAAAA 0 1 2\n")
            fp.write("http://purl.uniprot.org/uniprot/BBBBBB 9\n")
            fp.write("http://purl.uniprot.org/uniprot/CCCCCC 7 2\n")
        with open(pssms_file, "w") as fp:
            fp.write(">http://purl.uniprot.org/uniprot/AAAAAA\n")
            pssm = '\n'.join(map('\t'.join, [['1' if i == j else '-1' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
            fp.write(">http://purl.uniprot.org/uniprot/BBBBBB\n")
            pssm = '\n'.join(map('\t'.join, [['2' if i == j else '-2' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
            fp.write(">http://purl.uniprot.org/uniprot/CCCCCC\n")
            pssm = '\n'.join(map('\t'.join, [['3' if i == j else '-3' for i in xrange(20)]+['5' for l in xrange(20)] for j in xrange(sequence_length)]))
            fp.write(pssm+"\n")
        bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file)
        positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, window_size)
        return positive_dataset, negative_dataset
コード例 #3
0
 def __init__(self,
              bindres_file,
              pssms_file,
              log_file,
              method,
              fold=5,
              undersampling=True,
              shuffle=True,
              maxEpochs_for_trainer=10,
              geneScale=(0, 10)):
     if geneScale[0] != 0 or geneScale[1] <= geneScale[0]:
         raise ValueError(
             "Gene Scale in GA has to be (0, n).  n is greater than 0.")
     if method != "neuralNetwork" and method != "randomForest" and method != "SVM":
         raise ValueError(
             "method must be neuralNetwork or randomForest or SVM [{}]".
             format(method))
     self.bindingResidueData, self.pssmData = feature.parse_record_files(
         bindres_file, pssms_file)
     self.log_file = log_file
     self.method = method
     self.fold = fold
     self.undersampling = undersampling
     self.shuffle = shuffle
     self.maxEpochs_for_trainer = maxEpochs_for_trainer
     self.SVMParamScales = {"cost": (-10, 10), "gamma": (-10, 5)}
     self.NNParamScales = {
         "node_num": (5, 50),
         "learning_rate": (0.01, 0.1)
     }
     self.RFParamScales = {
         "n_estimators": (101, 1001),
         "max_features": (2, 30)
     }
     self.windowSizeScales = (1, 19)
     self.geneScale = geneScale
     self.log = {}
コード例 #4
0
ファイル: create_model.py プロジェクト: clclcocoro/MLwithGA
    indim = 21 * (2 * window_size + 1)
    clf = svm.SVC(C=cost, gamma=gamma, class_weight='auto')
    clf.fit(train_dataset, train_labels)
    return clf
 

def create_classifier(method_and_genes, positive_dataset, negative_dataset):
    method, genes = method_and_genes[0], method_and_genes[1:]
    if method == "neuralNetwork":
        return create_NN_classifier(genes, positive_dataset, negative_dataset)
    elif method == "randomForest":
        return create_RF_classifier(genes, positive_dataset, negative_dataset)
    elif method == "SVM":
        return create_SVM_classifier(genes, positive_dataset, negative_dataset)
    else:
        raise ValueError("method must be neuralNetwork or randomForest or SVM [{}]".format(method))


if __name__ == "__main__":
    arguments = docopt(__doc__)
    best_chromosome_file = arguments['<best_chromosome_file>']
    bindres_file = arguments['<binding_residue_file>']
    pssms_file = arguments['<pssms_file>']
    output_pickled_model_file = arguments['<output_pickled_model_file>']
    method_and_genes = common.get_method_and_genes(best_chromosome_file)
    bindingResidueData, pssmData = feature.parse_record_files(bindres_file, pssms_file)
    positive_dataset, negative_dataset = feature.create_dataset(bindingResidueData, pssmData, int(method_and_genes[3]))
    clf_or_net = create_classifier(method_and_genes, positive_dataset, negative_dataset)
    with open(output_pickled_model_file, 'wb') as fp:
        pickle.dump(clf_or_net, fp)
コード例 #5
0
def create_classifier(method_and_genes, positive_dataset, negative_dataset):
    method, genes = method_and_genes[0], method_and_genes[1:]
    if method == "neuralNetwork":
        return create_NN_classifier(genes, positive_dataset, negative_dataset)
    elif method == "randomForest":
        return create_RF_classifier(genes, positive_dataset, negative_dataset)
    elif method == "SVM":
        return create_SVM_classifier(genes, positive_dataset, negative_dataset)
    else:
        raise ValueError(
            "method must be neuralNetwork or randomForest or SVM [{}]".format(
                method))


if __name__ == "__main__":
    arguments = docopt(__doc__)
    best_chromosome_file = arguments['<best_chromosome_file>']
    bindres_file = arguments['<binding_residue_file>']
    pssms_file = arguments['<pssms_file>']
    output_pickled_model_file = arguments['<output_pickled_model_file>']
    method_and_genes = common.get_method_and_genes(best_chromosome_file)
    bindingResidueData, pssmData = feature.parse_record_files(
        bindres_file, pssms_file)
    positive_dataset, negative_dataset = feature.create_dataset(
        bindingResidueData, pssmData, int(method_and_genes[3]))
    clf_or_net = create_classifier(method_and_genes, positive_dataset,
                                   negative_dataset)
    with open(output_pickled_model_file, 'wb') as fp:
        pickle.dump(clf_or_net, fp)