def model_logistic_regression (X_train, y_train, X_dev,y_dev, X_test,y_test,\
                               file_out, flag_degree = True, degree = 2, \
                               C = 1,class_weight=None,out=False):
    if flag_degree == True:
        poly = PolynomialFeatures(degree=degree, interaction_only = False)
        X_poly = poly.fit_transform(X_train)
        X_test_poly = poly.transform(X_test)
    else:
        X_poly = X_train
        X_test_poly = X_test

    lr =LogisticRegression(C=C, n_jobs=-1, class_weight=None,\
                           penalty = 'l2', random_state=228)
    lr.fit(X_poly, y_train)

    predictions = lr.predict(X_poly)
    #predictions2 = lr.predict(X_train)
    predictions_test = lr.predict(X_test_poly)

    if X_dev == None:
        predictions_dev = None
        dev = False
    else:
        predictions_dev  = dtc.predict(X_dev)
      
    print_prediction(predictions, y_train, \
                     predictions_dev, y_dev, \
                     predictions_test, y_test,\
                     file_out, dev , out=out)

    return lr,predictions_test
def model_tree_fit(X_train,y_train,X_dev,y_dev,X_test,y_test,file_out,\
                   max_depth=None, out = True):
    dtc = tree.DecisionTreeClassifier(random_state=228, max_depth=max_depth)
    dtc = dtc.fit(X_train, y_train)

    predictions = dtc.predict(X_train)
    predictions_test = dtc.predict(X_test)

    if X_dev == None:
        predictions_dev = None
        dev = False
    else:
        predictions_dev = dtc.predict(X_dev)

    print_prediction(predictions, y_train, \
                     predictions_dev, y_dev, \
                     predictions_test, y_test,\
                     file_out, dev , out=out)
    return dtc
def model_RandomForest_fit (file_out, X_train,y_train,X_dev,y_dev,X_test,y_test, \
                            n_estimators=1000, max_depth=None, \
                            out=True):
    rf = RandomForestClassifier(n_estimators=n_estimators,
                                max_depth=max_depth,
                                random_state=228)
    rf = rf.fit(X_train, y_train)

    predictions = rf.predict(X_train)
    predictions_test = rf.predict(X_test)

    if X_dev == None:
        predictions_dev = None
        dev = False
    else:
        predictions_dev = rf.predict(X_dev)

    print_prediction(predictions, y_train, \
                     predictions_dev, y_dev, \
                     predictions_test, y_test,\
                     file_out, dev, out = out)

    return rf
Example #4
0
def main():
    args = init_arguments()
    set_logging(args.verbose - args.quiet)

    def file_exists(path):
        return path and os.path.exists(path)

    if args.subcmd == 'train':
        if not file_exists(args.datafile) and not (file_exists(
                args.vulfile) and file_exists(args.opfile)):
            logging.error(
                "Need to specify the data file or vul/op files to read.")
            logging.error(
                " For example: %s train -d %s [ -u %s -o %s ]" %
                (sys.argv[0], 'data.csv', '../run-anlyzers/vuls.csv.xz',
                 '../features/op-ft.csv.xz'))
            sys.exit(1)

        logging.info('training...')
        data = get_vul_op_data(args.vulfile, args.opfile, args.datafile)
        train(data)

    elif args.subcmd == 'predict':
        if file_exists(args.sol):
            logging.error("No solidty file")
            sys.exit(1)

        logging.info('predicting...')
        vuls = ALL_VULS
        data = sol_to_data(args.sol)

        preds = predict(data, vuls)
        print_prediction(vuls, preds)

    else:
        logging.error("unknown command '%s'" % args.subcmd)
        sys.exit(1)
def fit_nn_model (epochs, X_train, y_train, X_dev, y_dev,X_text, y_text,\
                  file_out, batch_size = 32, out = True):
    nn = get_nn_model(X_train.shape[1])
    if X_dev == None:
        X_dev = X_test
        y_dev = y_test
        flag = 1

    #validation_data=(X_dev, y_dev),
    logs = nn.fit(X_train,y_train, epochs=epochs, batch_size=batch_size,\
                  shuffle=True, verbose = 0)

    loss_graph(epochs, batch_size, logs.history.get('loss'),\
               logs.history.get('val_loss'), name = name)
    loss_graph(epochs, batch_size, logs.history.get('loss'), name = name)
    
    predictions = nn.predict_on_batch(X_train)
    predictions = np.where(predictions > 0.5, 1, 0)

    predictions_test = nn.predict_on_batch(X_test)
    predictions_test = np.where(predictions_test > 0.5, 1, 0)

    if flag == 1:
        predictions_dev = None
        dev = False
    else:
        predictions_dev = nn.predict_on_batch(X_dev)
        predictions_dev = np.where(predictions_dev > 0.5, 1, 0)
    
    
    #cm = confusion_matrix(y_dev, y_test_pred)
    common.print_prediction(predictions, y_train, \
                                            predictions_dev, y_dev, \
                                            predictions_test, y_test,\
                                            file_out, dev, out= out )

    return nn