Exemple #1
0
def main(argv):
    dpath = os.environ["DeeprimerPATH"]
    try:
        opts, args = getopt.getopt(argv, "ht:w:o:p:c:")
    except getopt.GetoptError:
        print(
            'Run_randomforest.py -t <n_estimator> -c <n_class> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                "[Command]: Run_randomforest.py -t <n_estimator> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>"
                + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" +
                "\n\n" +
                "Run_randomforest.py -t 1000 -w fit -o <sample_pre.fit.pickled>"
                + "\n\n" + "2. Evaluate the model:" + "\n\n" +
                "Run_randomforest.py -t 1000 -w eval -o <sample_split.eval.pickled>"
                + "\n\n" + "3. Make prediction:" + "\n\n" +
                "Run_randomforest.py -t 1000 -w pred -o <sample_pre.fit_RF.machine> -p <pred_input>"
                + "\n")
            sys.exit()
        elif opt == "-t":
            n_est = int(arg)
        elif opt == "-w":
            task = arg
        elif opt == "-o":
            obj = arg
        elif opt == "-c":
            nclass = int(arg)
        elif opt == "-p":
            pred_input = pd.read_csv(arg, delim_whitespace=True)
            pred = pred_input.values

    if 'nclass' not in locals():
        nclass = 2
    LW = 2
    RANDOM_STATE = 42
    tag = re.sub('\.pickled$', '', obj).split("/")[-1]

    if task == "fit":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
            X_tr = pre_obj[0]
            y_tr = pre_obj[1]
        if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum(
                y_tr == 1) <= (1 / 2) * sum(y_tr == 0):
            classifier = RandomForestClassifier(n_estimators=n_est)
            sampler = RandomOverSampler(random_state=RANDOM_STATE)
            clf = make_pipeline(sampler, classifier)
        else:
            clf = RandomForestClassifier(n_estimators=n_est)

        fitted = clf.fit(X_tr, y_tr)
        if nclass == 2:
            with open(OBJpath + "/" + tag + "_RF.machine", 'wb') as f:
                pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL)
        elif nclass == 3:
            with open(OBJpath + "/" + tag + "_3C_RF.machine", 'wb') as f:
                pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL)
        else:
            print("Not support class over 3. Try Regress." + "\n")
            sys.exit()

    elif task == "eval":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
            X_tr = pre_obj[0]
            y_tr = pre_obj[1]
            X_te = pre_obj[2]
            y_te = pre_obj[3]

        if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum(
                y_tr == 1) <= (1 / 2) * sum(y_tr == 0):
            classifier = RandomForestClassifier(n_estimators=n_est)
            sampler = RandomOverSampler(random_state=RANDOM_STATE)
            clf = make_pipeline(sampler, classifier)
        else:
            clf = RandomForestClassifier(n_estimators=n_est)

        fitted = clf.fit(X_tr, y_tr)
        if nclass == 2:
            fpr, tpr, precision, recall, metrics = mev.performance_stat(
                X_te, y_te, fitted)
            mev.plot_eval(fpr, tpr, precision, recall, metrics, tag)
            with open(REPpath + "/" + tag + "_RF.metrics", 'wb') as f:
                for key, value in metrics.items():
                    f.write("%s %.3f" % (key, value) + '\n')
        elif nclass == 3:
            pred_p = fitted.predict_proba(X_te)
            pred_c = fitted.predict(X_te)
            cnf_matrix = confusion_matrix(y_te, pred_c)
            np.set_printoptions(precision=2)
            class_names = ["0", "1", "2"]

            plot_confusion_matrix(cnf_matrix, class_names, True,
                                  'Normalized confusion matrix', tag)
            with open(REPpath + "/" + tag + "_3C_RF_eval.summay", 'wb') as f:
                f.write(
                    classification_report(y_te,
                                          pred_c,
                                          target_names=class_names))

    elif task == "pred":
        tag1 = re.sub('\.machine$', '', obj).split("/")[-1]
        with open(obj, 'rb') as f:
            fitted_obj = pickle.load(f)
        pred_p = fitted_obj.predict_proba(pred)
        pred_c = fitted_obj.predict(pred)

        if nclass == 2:
            df = pd.DataFrame({
                "Probability_1": pred_p[:, 1],
                "Probability_0": pred_p[:, 0],
                "Predited_class": pred_c
            })
            df.to_csv(REPpath + "/" + tag1 + ".2classification",
                      index=None,
                      sep='\t')

        elif nclass == 3:
            df = pd.DataFrame({
                "Probability_2": pred_p[:, 2],
                "Probability_1": pred_p[:, 1],
                "Probability_0": pred_p[:, 0],
                "Predited_class": pred_c
            })
            df.to_csv(REPpath + "/" + tag1 + ".3classification",
                      index=None,
                      sep='\t')
        else:
            print("Not support class over 3. Try Regression.")
            sys.exit()

    else:
        print("Not supported task. Choose from fit, pred, eval" + "\n")
        sys.exit()
Exemple #2
0
def main(argv):
    dpath = os.environ["DeeprimerPATH"]
    try:
        opts, args = getopt.getopt(argv, "hC:w:o:p:")
    except getopt.GetoptError:
        print(
            'Run_SVM.py -C <penalty> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                "[Command]: Run_SVM.py -C <penalty> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>"
                + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" +
                "\n\n" +
                "Run_SVM.py -C 1000 -w fit -o <sample_pre.fit.pickled>" +
                "\n\n" + "2. Evaluate the model:" + "\n\n" +
                "Run_SVM.py -C 1000 -w eval -o <sample_split.eval.pickled>" +
                "\n\n" + "3. Make prediction:" + "\n\n" +
                "Run_SVM.py -C 1000 -w pred -o <sample_pre.fit_SVM.machine> -p <pred_input>"
                + "\n")
            sys.exit()
        elif opt == "-C":
            pty = int(arg)
        elif opt == "-w":
            task = arg
        elif opt == "-o":
            obj = arg
        elif opt == "-p":
            pred_input = pd.read_csv(arg, delim_whitespace=True)
            pred = pred_input.values

    LW = 2
    RANDOM_STATE = 42
    tag = re.sub('\.pickled$', '', obj).split("/")[-1]

    if task == "fit":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
            X_tr = pre_obj[0]
            y_tr = pre_obj[1]
        if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum(
                y_tr == 1) <= (1 / 2) * sum(y_tr == 0):
            classifier = svm.SVC(kernel='rbf', probability=True, C=pty)
            sampler = RandomOverSampler(random_state=RANDOM_STATE)
            clf = make_pipeline(sampler, classifier)
        else:
            clf = svm.SVC(kernel='rbf', probability=True, C=pty)

        fitted = clf.fit(X_tr, y_tr)
        with open(OBJpath + "/" + tag + "_SVM.machine", 'wb') as f:
            pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL)

    elif task == "eval":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
            X_tr = pre_obj[0]
            y_tr = pre_obj[1]
            X_te = pre_obj[2]
            y_te = pre_obj[3]

        if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum(
                y_tr == 1) <= (1 / 2) * sum(y_tr == 0):
            classifier = svm.SVC(kernel='rbf', probability=True, C=pty)
            sampler = RandomOverSampler(random_state=RANDOM_STATE)
            clf = make_pipeline(sampler, classifier)
        else:
            clf = svm.SVC(kernel='rbf', probability=True, C=pty)

        fitted = clf.fit(X_tr, y_tr)
        fpr, tpr, precision, recall, metrics = mev.performance_stat(
            X_te, y_te, fitted)
        mev.plot_eval(fpr, tpr, precision, recall, metrics, tag)
        with open(REPpath + "/" + tag + "_SVM.metrics", 'wb') as f:
            for key, value in metrics.items():
                f.write("%s %.3f" % (key, value) + '\n')

    elif task == "pred":
        tag1 = re.sub('\.machine$', '', obj).split("/")[-1]
        with open(obj, 'rb') as f:
            fitted_obj = pickle.load(f)
        pred_p = fitted_obj.predict_proba(pred)
        pred_c = fitted_obj.predict(pred)
        df = pd.DataFrame({
            "Probability_1": pred_p[:, 1],
            "Probability_0": pred_p[:, 0],
            "Predited_class": pred_c
        })
        df.to_csv(REPpath + "/" + tag1 + ".classification",
                  index=None,
                  sep='\t')

    else:
        print("Not supported task. Choose from fit, pred, eval" + "\n")
        sys.exit()
Exemple #3
0
def main(argv):
    dpath = os.environ["DeeprimerPATH"]
    try:
        opts, args = getopt.getopt(argv, "hf:d:i:w:o:p:")
    except getopt.GetoptError:
        print(
            'Run_CNN.py -f <n_feature> -d <drop_probability> -i <iterations> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>'
        )
        sys.exit(2)
    for opt, arg in opts:
        if opt == '-h':
            print(
                "[Command]: Run_CNN.py -f <n_feature> -d <drop_probability> -i <iterations> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>"
                + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" +
                "\n\n" +
                "Run_CNN.py -f 415 -d 0.5 -i <iterations> -w fit -o <sample_pre.fit.pickled>"
                + "\n\n" + "2. Evaluate the model:" + "\n\n" +
                "Run_CNN.py -f 415 -d 0.5 -i <iterations> -w eval -o <sample_split.eval.pickled>"
                + "\n\n" + "3. Make prediction:" + "\n\n" +
                "Run_CNN.py -f 415 -w pred -o <sample_pre.fit_CNN.machine.meta> -p <pred_input>"
                + "\n")
            sys.exit()
        elif opt == "-f":
            n_f = int(arg)
        elif opt == "-d":
            dp = float(arg)
        elif opt == "-i":
            it = int(arg)
        elif opt == "-w":
            task = arg
        elif opt == "-o":
            obj = arg
        elif opt == "-p":
            pred_input = pd.read_csv(arg, delim_whitespace=True)
            Xpred = pred_input.values

    LW = 2
    tag = re.sub('\.pickled$', '', obj).split("/")[-1]

    if task == "fit":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
        X_tr = pre_obj[0]
        y_tr = pre_obj[1]

        fit_CNN(X_tr, y_tr, n_f, dp, it, tag,
                TF_CNN)  #restores session in the TF_sessions folder

    elif task == "eval":

        with open(obj, 'rb') as f:
            pre_obj = pickle.load(f)
            X_tr = pre_obj[0]
            y_tr = pre_obj[1]
            X_te = pre_obj[2]
            y_te = pre_obj[3]

        fit_CNN(X_tr, y_tr, n_f, dp, it, tagi, TF_CNN)
        meta = tag + "_CNN.machine.meta"
        fpr, tpr, precision, recall, metrics = mev.performance_stat_nn(
            X_te, y_te, "CNN", meta)
        mev.plot_eval(fpr, tpr, precision, recall, metrics, tag)
        with open(REPpath + "/" + tag + "_CNN.metrics", 'wb') as f:
            for key, value in metrics.items():
                f.write("%s %.3f" % (key, value) + '\n')

    elif task == "pred":
        meta = obj
        tag1 = re.sub('\.machine\.meta$', '', obj).split("/")[-1]
        X_te32 = Xpred.astype(np.float32)
        sess = tf.Session()
        saver = tf.train.import_meta_graph(obj)
        saver.restore(sess, tf.train.latest_checkpoint(TF_CNN))

        graph = tf.get_default_graph()
        y_conv = graph.get_tensor_by_name("y_conv:0")
        y_ = graph.get_tensor_by_name("y_:0")
        x = graph.get_tensor_by_name("x:0")
        keep_prob = graph.get_tensor_by_name("keep_prob:0")
        feed_dict = {x: X_te32, keep_prob: 1.0}

        probs = tf.nn.softmax(y_conv)
        pred_p = sess.run(probs, feed_dict)

        prediction = tf.argmax(y_conv, 1)
        pred_c = prediction.eval(feed_dict, session=sess)

        df = pd.DataFrame({
            "Probability_1": pred_p[:, 1],
            "Probability_0": pred_p[:, 0],
            "Predited_class": pred_c
        })
        df.to_csv(REPpath + "/" + tag1 + ".classification",
                  index=None,
                  sep='\t')

    else:
        print("Not supported task. Choose from fit, pred, eval" + "\n")
        sys.exit()