def model_logistic_regression (X_train, y_train, X_dev,y_dev, X_test,y_test,\ file_out, flag_degree = True, degree = 2, \ C = 1,class_weight=None,out=False): if flag_degree == True: poly = PolynomialFeatures(degree=degree, interaction_only = False) X_poly = poly.fit_transform(X_train) X_test_poly = poly.transform(X_test) else: X_poly = X_train X_test_poly = X_test lr =LogisticRegression(C=C, n_jobs=-1, class_weight=None,\ penalty = 'l2', random_state=228) lr.fit(X_poly, y_train) predictions = lr.predict(X_poly) #predictions2 = lr.predict(X_train) predictions_test = lr.predict(X_test_poly) if X_dev == None: predictions_dev = None dev = False else: predictions_dev = dtc.predict(X_dev) print_prediction(predictions, y_train, \ predictions_dev, y_dev, \ predictions_test, y_test,\ file_out, dev , out=out) return lr,predictions_test
def model_tree_fit(X_train,y_train,X_dev,y_dev,X_test,y_test,file_out,\ max_depth=None, out = True): dtc = tree.DecisionTreeClassifier(random_state=228, max_depth=max_depth) dtc = dtc.fit(X_train, y_train) predictions = dtc.predict(X_train) predictions_test = dtc.predict(X_test) if X_dev == None: predictions_dev = None dev = False else: predictions_dev = dtc.predict(X_dev) print_prediction(predictions, y_train, \ predictions_dev, y_dev, \ predictions_test, y_test,\ file_out, dev , out=out) return dtc
def model_RandomForest_fit (file_out, X_train,y_train,X_dev,y_dev,X_test,y_test, \ n_estimators=1000, max_depth=None, \ out=True): rf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=228) rf = rf.fit(X_train, y_train) predictions = rf.predict(X_train) predictions_test = rf.predict(X_test) if X_dev == None: predictions_dev = None dev = False else: predictions_dev = rf.predict(X_dev) print_prediction(predictions, y_train, \ predictions_dev, y_dev, \ predictions_test, y_test,\ file_out, dev, out = out) return rf
def main(): args = init_arguments() set_logging(args.verbose - args.quiet) def file_exists(path): return path and os.path.exists(path) if args.subcmd == 'train': if not file_exists(args.datafile) and not (file_exists( args.vulfile) and file_exists(args.opfile)): logging.error( "Need to specify the data file or vul/op files to read.") logging.error( " For example: %s train -d %s [ -u %s -o %s ]" % (sys.argv[0], 'data.csv', '../run-anlyzers/vuls.csv.xz', '../features/op-ft.csv.xz')) sys.exit(1) logging.info('training...') data = get_vul_op_data(args.vulfile, args.opfile, args.datafile) train(data) elif args.subcmd == 'predict': if file_exists(args.sol): logging.error("No solidty file") sys.exit(1) logging.info('predicting...') vuls = ALL_VULS data = sol_to_data(args.sol) preds = predict(data, vuls) print_prediction(vuls, preds) else: logging.error("unknown command '%s'" % args.subcmd) sys.exit(1)
def fit_nn_model (epochs, X_train, y_train, X_dev, y_dev,X_text, y_text,\ file_out, batch_size = 32, out = True): nn = get_nn_model(X_train.shape[1]) if X_dev == None: X_dev = X_test y_dev = y_test flag = 1 #validation_data=(X_dev, y_dev), logs = nn.fit(X_train,y_train, epochs=epochs, batch_size=batch_size,\ shuffle=True, verbose = 0) loss_graph(epochs, batch_size, logs.history.get('loss'),\ logs.history.get('val_loss'), name = name) loss_graph(epochs, batch_size, logs.history.get('loss'), name = name) predictions = nn.predict_on_batch(X_train) predictions = np.where(predictions > 0.5, 1, 0) predictions_test = nn.predict_on_batch(X_test) predictions_test = np.where(predictions_test > 0.5, 1, 0) if flag == 1: predictions_dev = None dev = False else: predictions_dev = nn.predict_on_batch(X_dev) predictions_dev = np.where(predictions_dev > 0.5, 1, 0) #cm = confusion_matrix(y_dev, y_test_pred) common.print_prediction(predictions, y_train, \ predictions_dev, y_dev, \ predictions_test, y_test,\ file_out, dev, out= out ) return nn