def main(argv): dpath = os.environ["DeeprimerPATH"] try: opts, args = getopt.getopt(argv, "ht:w:o:p:c:") except getopt.GetoptError: print( 'Run_randomforest.py -t <n_estimator> -c <n_class> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( "[Command]: Run_randomforest.py -t <n_estimator> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>" + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" + "\n\n" + "Run_randomforest.py -t 1000 -w fit -o <sample_pre.fit.pickled>" + "\n\n" + "2. Evaluate the model:" + "\n\n" + "Run_randomforest.py -t 1000 -w eval -o <sample_split.eval.pickled>" + "\n\n" + "3. Make prediction:" + "\n\n" + "Run_randomforest.py -t 1000 -w pred -o <sample_pre.fit_RF.machine> -p <pred_input>" + "\n") sys.exit() elif opt == "-t": n_est = int(arg) elif opt == "-w": task = arg elif opt == "-o": obj = arg elif opt == "-c": nclass = int(arg) elif opt == "-p": pred_input = pd.read_csv(arg, delim_whitespace=True) pred = pred_input.values if 'nclass' not in locals(): nclass = 2 LW = 2 RANDOM_STATE = 42 tag = re.sub('\.pickled$', '', obj).split("/")[-1] if task == "fit": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum( y_tr == 1) <= (1 / 2) * sum(y_tr == 0): classifier = RandomForestClassifier(n_estimators=n_est) sampler = RandomOverSampler(random_state=RANDOM_STATE) clf = make_pipeline(sampler, classifier) else: clf = RandomForestClassifier(n_estimators=n_est) fitted = clf.fit(X_tr, y_tr) if nclass == 2: with open(OBJpath + "/" + tag + "_RF.machine", 'wb') as f: pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL) elif nclass == 3: with open(OBJpath + "/" + tag + "_3C_RF.machine", 'wb') as f: pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL) else: print("Not support class over 3. Try Regress." + "\n") sys.exit() elif task == "eval": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] X_te = pre_obj[2] y_te = pre_obj[3] if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum( y_tr == 1) <= (1 / 2) * sum(y_tr == 0): classifier = RandomForestClassifier(n_estimators=n_est) sampler = RandomOverSampler(random_state=RANDOM_STATE) clf = make_pipeline(sampler, classifier) else: clf = RandomForestClassifier(n_estimators=n_est) fitted = clf.fit(X_tr, y_tr) if nclass == 2: fpr, tpr, precision, recall, metrics = mev.performance_stat( X_te, y_te, fitted) mev.plot_eval(fpr, tpr, precision, recall, metrics, tag) with open(REPpath + "/" + tag + "_RF.metrics", 'wb') as f: for key, value in metrics.items(): f.write("%s %.3f" % (key, value) + '\n') elif nclass == 3: pred_p = fitted.predict_proba(X_te) pred_c = fitted.predict(X_te) cnf_matrix = confusion_matrix(y_te, pred_c) np.set_printoptions(precision=2) class_names = ["0", "1", "2"] plot_confusion_matrix(cnf_matrix, class_names, True, 'Normalized confusion matrix', tag) with open(REPpath + "/" + tag + "_3C_RF_eval.summay", 'wb') as f: f.write( classification_report(y_te, pred_c, target_names=class_names)) elif task == "pred": tag1 = re.sub('\.machine$', '', obj).split("/")[-1] with open(obj, 'rb') as f: fitted_obj = pickle.load(f) pred_p = fitted_obj.predict_proba(pred) pred_c = fitted_obj.predict(pred) if nclass == 2: df = pd.DataFrame({ "Probability_1": pred_p[:, 1], "Probability_0": pred_p[:, 0], "Predited_class": pred_c }) df.to_csv(REPpath + "/" + tag1 + ".2classification", index=None, sep='\t') elif nclass == 3: df = pd.DataFrame({ "Probability_2": pred_p[:, 2], "Probability_1": pred_p[:, 1], "Probability_0": pred_p[:, 0], "Predited_class": pred_c }) df.to_csv(REPpath + "/" + tag1 + ".3classification", index=None, sep='\t') else: print("Not support class over 3. Try Regression.") sys.exit() else: print("Not supported task. Choose from fit, pred, eval" + "\n") sys.exit()
def main(argv): dpath = os.environ["DeeprimerPATH"] try: opts, args = getopt.getopt(argv, "hC:w:o:p:") except getopt.GetoptError: print( 'Run_SVM.py -C <penalty> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( "[Command]: Run_SVM.py -C <penalty> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>" + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" + "\n\n" + "Run_SVM.py -C 1000 -w fit -o <sample_pre.fit.pickled>" + "\n\n" + "2. Evaluate the model:" + "\n\n" + "Run_SVM.py -C 1000 -w eval -o <sample_split.eval.pickled>" + "\n\n" + "3. Make prediction:" + "\n\n" + "Run_SVM.py -C 1000 -w pred -o <sample_pre.fit_SVM.machine> -p <pred_input>" + "\n") sys.exit() elif opt == "-C": pty = int(arg) elif opt == "-w": task = arg elif opt == "-o": obj = arg elif opt == "-p": pred_input = pd.read_csv(arg, delim_whitespace=True) pred = pred_input.values LW = 2 RANDOM_STATE = 42 tag = re.sub('\.pickled$', '', obj).split("/")[-1] if task == "fit": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum( y_tr == 1) <= (1 / 2) * sum(y_tr == 0): classifier = svm.SVC(kernel='rbf', probability=True, C=pty) sampler = RandomOverSampler(random_state=RANDOM_STATE) clf = make_pipeline(sampler, classifier) else: clf = svm.SVC(kernel='rbf', probability=True, C=pty) fitted = clf.fit(X_tr, y_tr) with open(OBJpath + "/" + tag + "_SVM.machine", 'wb') as f: pickle.dump(fitted, f, pickle.HIGHEST_PROTOCOL) elif task == "eval": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] X_te = pre_obj[2] y_te = pre_obj[3] if sum(y_tr == 1) >= 2 * sum(y_tr == 0) or sum( y_tr == 1) <= (1 / 2) * sum(y_tr == 0): classifier = svm.SVC(kernel='rbf', probability=True, C=pty) sampler = RandomOverSampler(random_state=RANDOM_STATE) clf = make_pipeline(sampler, classifier) else: clf = svm.SVC(kernel='rbf', probability=True, C=pty) fitted = clf.fit(X_tr, y_tr) fpr, tpr, precision, recall, metrics = mev.performance_stat( X_te, y_te, fitted) mev.plot_eval(fpr, tpr, precision, recall, metrics, tag) with open(REPpath + "/" + tag + "_SVM.metrics", 'wb') as f: for key, value in metrics.items(): f.write("%s %.3f" % (key, value) + '\n') elif task == "pred": tag1 = re.sub('\.machine$', '', obj).split("/")[-1] with open(obj, 'rb') as f: fitted_obj = pickle.load(f) pred_p = fitted_obj.predict_proba(pred) pred_c = fitted_obj.predict(pred) df = pd.DataFrame({ "Probability_1": pred_p[:, 1], "Probability_0": pred_p[:, 0], "Predited_class": pred_c }) df.to_csv(REPpath + "/" + tag1 + ".classification", index=None, sep='\t') else: print("Not supported task. Choose from fit, pred, eval" + "\n") sys.exit()
def main(argv): dpath = os.environ["DeeprimerPATH"] try: opts, args = getopt.getopt(argv, "hf:d:i:w:o:p:") except getopt.GetoptError: print( 'Run_CNN.py -f <n_feature> -d <drop_probability> -i <iterations> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>' ) sys.exit(2) for opt, arg in opts: if opt == '-h': print( "[Command]: Run_CNN.py -f <n_feature> -d <drop_probability> -i <iterations> -w <fit/pred/eval> -o <preprocessed_object> -p <pred_input>" + "\n\n" + "Example:" + "\n\n" + "1. Fit the model only:" + "\n\n" + "Run_CNN.py -f 415 -d 0.5 -i <iterations> -w fit -o <sample_pre.fit.pickled>" + "\n\n" + "2. Evaluate the model:" + "\n\n" + "Run_CNN.py -f 415 -d 0.5 -i <iterations> -w eval -o <sample_split.eval.pickled>" + "\n\n" + "3. Make prediction:" + "\n\n" + "Run_CNN.py -f 415 -w pred -o <sample_pre.fit_CNN.machine.meta> -p <pred_input>" + "\n") sys.exit() elif opt == "-f": n_f = int(arg) elif opt == "-d": dp = float(arg) elif opt == "-i": it = int(arg) elif opt == "-w": task = arg elif opt == "-o": obj = arg elif opt == "-p": pred_input = pd.read_csv(arg, delim_whitespace=True) Xpred = pred_input.values LW = 2 tag = re.sub('\.pickled$', '', obj).split("/")[-1] if task == "fit": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] fit_CNN(X_tr, y_tr, n_f, dp, it, tag, TF_CNN) #restores session in the TF_sessions folder elif task == "eval": with open(obj, 'rb') as f: pre_obj = pickle.load(f) X_tr = pre_obj[0] y_tr = pre_obj[1] X_te = pre_obj[2] y_te = pre_obj[3] fit_CNN(X_tr, y_tr, n_f, dp, it, tagi, TF_CNN) meta = tag + "_CNN.machine.meta" fpr, tpr, precision, recall, metrics = mev.performance_stat_nn( X_te, y_te, "CNN", meta) mev.plot_eval(fpr, tpr, precision, recall, metrics, tag) with open(REPpath + "/" + tag + "_CNN.metrics", 'wb') as f: for key, value in metrics.items(): f.write("%s %.3f" % (key, value) + '\n') elif task == "pred": meta = obj tag1 = re.sub('\.machine\.meta$', '', obj).split("/")[-1] X_te32 = Xpred.astype(np.float32) sess = tf.Session() saver = tf.train.import_meta_graph(obj) saver.restore(sess, tf.train.latest_checkpoint(TF_CNN)) graph = tf.get_default_graph() y_conv = graph.get_tensor_by_name("y_conv:0") y_ = graph.get_tensor_by_name("y_:0") x = graph.get_tensor_by_name("x:0") keep_prob = graph.get_tensor_by_name("keep_prob:0") feed_dict = {x: X_te32, keep_prob: 1.0} probs = tf.nn.softmax(y_conv) pred_p = sess.run(probs, feed_dict) prediction = tf.argmax(y_conv, 1) pred_c = prediction.eval(feed_dict, session=sess) df = pd.DataFrame({ "Probability_1": pred_p[:, 1], "Probability_0": pred_p[:, 0], "Predited_class": pred_c }) df.to_csv(REPpath + "/" + tag1 + ".classification", index=None, sep='\t') else: print("Not supported task. Choose from fit, pred, eval" + "\n") sys.exit()