Beispiel #1
0
def Doc2vecFeatureEngineering():
    benchmark = 'trainData'
    curpath = os.path.abspath(os.curdir)
    parenDir = os.path.abspath(os.path.pardir)
    path = parenDir + '/' + benchmark + '/'
    print(path)
    #train_X,train_Y,verify_X,verify_Y = readHornClausesAndHints_resplitTrainAndVerifyData(path, dataset='train', discardNegativeData=True)
    train_X = pickleRead('trainData_X')
    train_Y = pickleRead('trainData_Y')
    verify_X = pickleRead('verifyData_X')
    verify_Y = pickleRead('verifyData_Y')
    # train_X=train_X[0:10]   #cut training size for debug
    # train_Y = train_Y[0:10] #cut training size for debug

    # split data to training and verifiying sets
    #train_X, verify_X, train_Y, verify_Y = train_test_split(train_X, train_Y, test_size=0.2, random_state=42)

    #load Doc2vec model

    programDoc2VecModel = gensim.models.doc2vec.Doc2Vec.load(
        parenDir + '/models/programDoc2VecModel')
    hintsDoc2VecModel = gensim.models.doc2vec.Doc2Vec.load(
        parenDir + '/models/hintsDoc2VecModel')
    transformDatatoFeatures_doc2vec(train_X, verify_X, programDoc2VecModel,
                                    hintsDoc2VecModel)
    #transformDatatoFeatures_node2vec(train_X, verify_X)
    pickleWrite(train_Y, 'train_Y')
    pickleWrite(verify_Y, 'verify_Y')
Beispiel #2
0
def write_best_threshod_to_pickle(parameters, true_Y, predicted_Y_loaded_model,
                                  label, benchmark):
    best_set_threshold = set_threshold_by_roundings(true_Y,
                                                    predicted_Y_loaded_model)
    parameters["best_threshold_set"] = best_set_threshold
    pickleWrite(parameters, benchmark + "-" + label + "-parameters",
                "../src/trained_model/")
    return best_set_threshold
Beispiel #3
0
def Node2vecFeatureEngineering():
    benchmark = 'trainData'
    curpath = os.path.abspath(os.curdir)
    parenDir = os.path.abspath(os.path.pardir)
    path = parenDir + '/' + benchmark + '/'
    print(path)
    #train_X,train_Y,verify_X,verify_Y = readHornClausesAndHints_resplitTrainAndVerifyData(path, dataset='train', discardNegativeData=True)
    train_X = pickleRead('trainData_X')
    train_Y = pickleRead('trainData_Y')
    verify_X = pickleRead('verifyData_X')
    verify_Y = pickleRead('verifyData_Y')
    transformDatatoFeatures_node2vec(train_X, verify_X)
    pickleWrite(train_Y, 'train_Y')
    pickleWrite(verify_Y, 'verify_Y')
Beispiel #4
0
def main():
    programList = readMultiplePrograms()
    trainData, testData = shuffleData(programList, 0.8)
    trainData_X, trainData_Y = transformDataToTrainingVector(
        pickleRead("argumentTrainData", path="../"))
    testData_X, testData_Y = transformDataToTrainingVector(
        pickleRead("argumentTestData", path="../"))
    pickleWrite(trainData_X, "argumentTrainData_X", path="../")
    pickleWrite(trainData_Y, "argumentTrainData_Y", path="../")
    pickleWrite(testData_X, "argumentTestData_X", path="../")
    pickleWrite(testData_Y, "argumentTestData_Y", path="../")
Beispiel #5
0
def Graph2vecFeatureEngineering():
    benchmark = 'trainData'
    curpath = os.path.abspath(os.curdir)
    parenDir = os.path.abspath(os.path.pardir)
    path = parenDir + '/' + benchmark + '/'
    print(path)
    #train_X,train_Y,verify_X,verify_Y = readHornClausesAndHints_resplitTrainAndVerifyData(path, dataset='train', discardNegativeData=True)
    train_X = pickleRead('trainData_X')
    train_Y = pickleRead('trainData_Y')
    verify_X = pickleRead('verifyData_X')
    verify_Y = pickleRead('verifyData_Y')
    programGraph2VecModel = gensim.models.doc2vec.Doc2Vec.load(
        parenDir + '/models/programGraph2VecModel')
    hintsGraph2VecModel = gensim.models.doc2vec.Doc2Vec.load(
        parenDir + '/models/hintsGraph2VecModel')
    transformDatatoFeatures_graph2vec(train_X, verify_X, programGraph2VecModel,
                                      hintsGraph2VecModel)
    pickleWrite(train_Y, 'train_Y')
    pickleWrite(verify_Y, 'verify_Y')
Beispiel #6
0
def shuffleData(programList, trainDataSplitRate):
    #splite train and test data
    random.shuffle(programList)
    splitPoint = int(trainDataSplitRate * len(programList))
    trainData = programList[:splitPoint]
    testData = programList[splitPoint:]

    #write train and test data to file
    path = "../../trainData/"
    if (os.path.exists("../../testData")):
        shutil.rmtree("../../testData/")
        os.mkdir("../../testData")
    for pi in testData:
        fileName = pi.programName
        #print(fileName)
        if (os.path.exists("../../trainData/" + fileName + ".arguments")):
            shutil.move("../../trainData/" + fileName + ".arguments",
                        "../../testData/" + fileName + ".arguments")
            shutil.move("../../trainData/" + fileName + ".gv",
                        "../../testData/" + fileName + ".gv")
            shutil.move("../../trainData/" + fileName + ".hints.graphs",
                        "../../testData/" + fileName + ".hints.graphs")
            shutil.move("../../trainData/" + fileName + ".horn",
                        "../../testData/" + fileName + ".horn")
            shutil.move("../../trainData/" + fileName + ".HornGraph",
                        "../../testData/" + fileName + ".HornGraph")
            shutil.move("../../trainData/" + fileName + ".initialHints",
                        "../../testData/" + fileName + ".initialHints")
            shutil.move("../../trainData/" + fileName + ".negativeHints",
                        "../../testData/" + fileName + ".negativeHints")
            shutil.move("../../trainData/" + fileName + ".positiveHints",
                        "../../testData/" + fileName + ".positiveHints")
            shutil.move("../../trainData/" + fileName + ".smt2",
                        "../../testData/" + fileName + ".smt2")
    pickleWrite(trainData, "argumentTrainData", path="../")
    pickleWrite(testData, "argumentTestData", path="../")
    return trainData, testData
Beispiel #7
0
def transformDatatoFeatures_doc2vec(X_train, X_test, programDoc2VecModel,
                                    hintsDoc2VecModel):
    #create Doc2Vec model
    #programDoc2VecModel, hintsDoc2VecModel=trainDoc2VectModel(X_train)

    #infer/embedding programs and hints to vectors
    print("Doc2Vec (text) inferring begin")
    encodedPrograms_train, encodedHints_train = doc2vecModelInferNewData(
        X_train, programDoc2VecModel, hintsDoc2VecModel)
    encodedPrograms_verify, encodedHints_verify = doc2vecModelInferNewData(
        X_test, programDoc2VecModel, hintsDoc2VecModel)
    print("Doc2Vec (text) inferring end")
    print('write infered train and test data to files')
    pickleWrite(content=encodedPrograms_train, name='encodedPrograms_train')
    pickleWrite(content=encodedHints_train, name='encodedHints_train')
    pickleWrite(content=encodedPrograms_verify, name='encodedPrograms_verify')
    pickleWrite(content=encodedHints_verify, name='encodedHints_verify')

    return encodedPrograms_train, encodedPrograms_verify, encodedHints_train, encodedHints_verify
Beispiel #8
0
def transformDatatoFeatures_graph2vec(X_train, X_test, programGraph2VecModel,
                                      hintsGraph2VecModel):
    #create Doc2Vec model
    #programDoc2VecModel, hintsDoc2VecModel=trainDoc2VectModel(X_train)

    #infer/embedding programs and hints to vectors
    print("Doc2Vec (graph) inferring begin")
    graphEncodedPrograms_train, graphEncodedHints_train = graph2vecModelInferNewData(
        X_train, programGraph2VecModel, hintsGraph2VecModel)
    graphEncodedPrograms_verify, graphEncodedHints_verify = graph2vecModelInferNewData(
        X_test, programGraph2VecModel, hintsGraph2VecModel)
    print('write infered train and test data to files')
    pickleWrite(content=graphEncodedPrograms_train,
                name='graphEncodedPrograms_train')
    pickleWrite(content=graphEncodedHints_train,
                name='graphEncodedHints_train')
    pickleWrite(content=graphEncodedPrograms_verify,
                name='graphEncodedPrograms_verify')
    pickleWrite(content=graphEncodedHints_verify,
                name='graphEncodedHints_verify')

    return graphEncodedPrograms_train, graphEncodedPrograms_verify, graphEncodedHints_train, graphEncodedHints_verify
Beispiel #9
0
def transformDatatoFeatures_node2vec(X_train, X_test):
    graphEncodedPrograms_train = list()
    for graph in X_train:
        graphEncodedPrograms_train.append(graph[2])
    graphEncodedPrograms_train = np.expand_dims(graphEncodedPrograms_train,
                                                axis=2)

    graphEncodedPrograms_verify = list()
    for graph in X_test:
        graphEncodedPrograms_verify.append(graph[2])
    graphEncodedPrograms_verify = np.expand_dims(graphEncodedPrograms_verify,
                                                 axis=2)

    print('write  train and test  graph embedding data to files')
    pickleWrite(content=graphEncodedPrograms_train,
                name='graphEncodedPrograms_train')
    pickleWrite(content=graphEncodedPrograms_verify,
                name='graphEncodedPrograms_test')

    graphEncodedHints_train = list()
    for graph in X_train:
        graphEncodedHints_train.append(graph[3])
    graphEncodedHints_train = np.expand_dims(graphEncodedHints_train, axis=2)
    #graphEncodedHints_train=np.array(graphEncodedHints_train)

    graphEncodedHints_verify = list()
    for graph in X_test:
        graphEncodedHints_verify.append(graph[3])
    graphEncodedHints_verify = np.expand_dims(graphEncodedHints_verify, axis=2)
    #graphEncodedHints_verify=np.array(graphEncodedHints_verify)

    pickleWrite(content=graphEncodedHints_train,
                name='graphEncodedHints_train')
    pickleWrite(content=graphEncodedHints_verify,
                name='graphEncodedHints_test')

    return graphEncodedPrograms_train, graphEncodedPrograms_verify, graphEncodedHints_train, graphEncodedHints_verify
Beispiel #10
0
def main():
    path=sys.argv[1]
    df=sys.argv[2]
    curssor = int(sys.argv[3])
    file_type=sys.argv[4]
    label=sys.argv[5]
    buckets = sys.argv[6]
    reading_type=sys.argv[7]
    graphInfoList = DotToGraphInfo(df + "_data", path)
    graphInfoList._split_flag = curssor
    graphInfoList._file_type=file_type
    graphInfoList._buckets=int(buckets)
    start=time.time()
    if reading_type == "gnn_inputs":
        print("reading_type",reading_type)
        graphs_node_label_ids, graphs_argument_indices, graphs_adjacency_lists, graphs_argument_scores, total_number_of_node,total_control_flow_node_list,graph_info_list = graphInfoList.getHornGraphSample_no_offset()
        pickleWrite(graphs_node_label_ids, df + "-graphs_node_label_ids-" + str(curssor))
        pickleWrite(graphs_argument_indices, df + "-graphs_argument_indices-" + str(curssor))
        pickleWrite(graphs_adjacency_lists, df + "-graphs_adjacency_lists-" + str(curssor) )
        pickleWrite(graphs_argument_scores, df + "-graphs_argument_scores-" + str(curssor))
        pickleWrite(total_number_of_node, df + "-total_number_of_node-" + str(curssor))
        pickleWrite(total_control_flow_node_list, df + "-total_control_flow_node_list-" + str(curssor))
        pickleWrite(graph_info_list, df + "-graphs_graph_info_list-" + str(curssor))
    else:
        graphs_node_label_ids, graphs_argument_indices, graphs_adjacency_lists, graphs_argument_scores, total_number_of_node, graph_info_list = graphInfoList.getHornGraphSample_analysis()
        pickleWrite(graphs_node_label_ids, label + "-graphs_node_label_ids-" + str(curssor))
        pickleWrite(graphs_argument_indices, label + "-graphs_argument_indices-" + str(curssor))
        pickleWrite(graphs_adjacency_lists, label + "-graphs_adjacency_lists-" + str(curssor))
        pickleWrite(graphs_argument_scores, label + "-graphs_argument_scores-" + str(curssor))
        pickleWrite(total_number_of_node, label + "-total_number_of_node-" + str(curssor))
        pickleWrite(graph_info_list, label + "-graphs_graph_info_list-" + str(curssor))
    print("--time for transform dot to GNN input",time.time()-start,"--")