Example #1
0
def loading_data_all(FLAGS):
    # load all data from FLAGS path
    # split data to training and testing, only load testing data
    commits_ = extract_commit(path_file=FLAGS.path)
    filter_commits = filtering_commit(commits=commits_,
                                      num_file=FLAGS.code_file,
                                      num_hunk=FLAGS.code_hunk,
                                      num_loc=FLAGS.code_line,
                                      size_line=FLAGS.code_length)
    msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
        commits=filter_commits)
    dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
    pad_msg = mapping_commit_msg(msgs=msgs_,
                                 max_length=FLAGS.msg_length,
                                 dict_msg=dict_msg_)
    pad_added_code = mapping_commit_code(type="added",
                                         commits=filter_commits,
                                         max_hunk=FLAGS.code_hunk,
                                         max_code_line=FLAGS.code_line,
                                         max_code_length=FLAGS.code_length,
                                         dict_code=dict_code_)
    pad_removed_code = mapping_commit_code(type="removed",
                                           commits=filter_commits,
                                           max_hunk=FLAGS.code_hunk,
                                           max_code_line=FLAGS.code_line,
                                           max_code_length=FLAGS.code_length,
                                           dict_code=dict_code_)
    labels = load_label_commits(commits=filter_commits)
    return pad_msg, pad_added_code, pad_removed_code, labels
Example #2
0
def loading_data(path_file):
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    filter_commits = filtering_commit(commits=commits_,
                                      num_file=nfile,
                                      num_hunk=nhunk,
                                      num_loc=nline,
                                      size_line=nleng)
    msgs = extract_msg(commits=filter_commits)
    labels = extract_label(commits=filter_commits)
    codes = extract_code(commits=filter_commits)
    all_lines = add_two_list(list1=msgs, list2=codes)
    return all_lines, labels
Example #3
0
def loading_testing_data(FLAGS, path_file, type):
    if type == "msg":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
    elif type == "all":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines

    elif type == "code":
        commits_ = extract_commit(path_file=path_file)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        msgs_ = codes_
    else:
        print "You need to type correct model"
        exit()
    return msgs_, codes_, filter_commits
Example #4
0
def loading_data_lstm(FLAGS):
    print FLAGS.model
    if "msg" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    elif "all" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    elif "code" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_, num_file=FLAGS.code_file, num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(commits=filter_commits)
        msgs_ = codes_
        dict_msg_, dict_code_ = dictionary(data=msgs_), dictionary(data=codes_)
        pad_msg = mapping_commit_msg(msgs=msgs_, max_length=FLAGS.msg_length, dict_msg=dict_msg_)
        pad_added_code = mapping_commit_code(type="added", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                             max_code_line=FLAGS.code_line,
                                             max_code_length=FLAGS.code_length, dict_code=dict_code_)
        pad_removed_code = mapping_commit_code(type="removed", commits=filter_commits, max_hunk=FLAGS.code_hunk,
                                               max_code_line=FLAGS.code_line,
                                               max_code_length=FLAGS.code_length, dict_code=dict_code_)
        labels = load_label_commits(commits=filter_commits)
    else:
        print "You need to type correct model"
        exit()

    kf = KFold(n_splits=FLAGS.folds, random_state=FLAGS.seed)
    for train_index, test_index in kf.split(filter_commits):
        X_train_msg, X_test_msg = np.array(get_items(items=pad_msg, indexes=train_index)), \
                                  np.array(get_items(items=pad_msg, indexes=test_index))
        X_train_added_code, X_test_added_code = np.array(get_items(items=pad_added_code, indexes=train_index)), \
                                                np.array(get_items(items=pad_added_code, indexes=test_index))
        X_train_removed_code, X_test_removed_code = np.array(get_items(items=pad_removed_code, indexes=train_index)), \
                                                    np.array(get_items(items=pad_removed_code, indexes=test_index))
        y_train, y_test = np.array(get_items(items=labels, indexes=train_index)), \
                          np.array(get_items(items=labels, indexes=test_index))
        return X_test_msg, X_test_added_code, X_test_removed_code, y_test
def filter_number_code_hunk(commits):
    commit_id = list()
    for c in commits:
        files = c["code"]
        cnt_hunk = list()
        for hunk in files:
            added_hunk, removed_hunk = hunk["added"].keys(), hunk["removed"].keys()
            cnt_hunk += added_hunk + removed_hunk
        # if max(cnt_hunk) <= num_hunk:
        #     commit_id.append(c["id"])
        print c["id"], max(cnt_hunk)


if __name__ == "__main__":
    path_data = "./data/test_data/merging_markus_sasha.txt"
    commits_ = extract_commit(path_file=path_data)
    nfile, nhunk, nline, nleng = 1, 8, 10, 120
    filter_commits = filtering_commit(commits=commits_, num_file=nfile, num_hunk=nhunk, num_loc=nline, size_line=nleng)
    ids_ = [c["id"] for c in filter_commits]
    labels_ = [c["stable"] for c in filter_commits]

    path_nonoverlap = "./qualitative_analysis_ver3/nonOverlap_PatchNet_all_LPU_SVM_all"
    id_overlap = load_file(path_file=path_nonoverlap)
    new_commits = list()
    for i in id_overlap:
        index_i = ids_.index(i)
        new_commits.append(filter_commits[index_i])
    filter_number_code_hunk(commits=new_commits)

Example #6
0
    path_write = "./data_test_data_pred_results/cnn_" + type + ".txt"
    write_file(path_file=path_write, data=y_pred)
    print "Accuracy: ", accuracy_score(y_true=y_test, y_pred=y_pred)
    print "Precision: ", precision_score(y_true=y_test, y_pred=y_pred)
    print "Recall: ", recall_score(y_true=y_test, y_pred=y_pred)
    print "F1: ", f1_score(y_true=y_test, y_pred=y_pred)
    print "AUC: ", auc_score(y_true=y_test, y_pred=y_pred)


if __name__ == "__main__":
    nfile, nhunk, nline, nleng = 1, 8, 10, 120

    path_data = "./data/3_mar7/typediff.out"
    commits_train = extract_commit(path_file=path_data)
    filter_commits_train = filtering_commit(commits=commits_train, num_file=nfile,
                                            num_hunk=nhunk, num_loc=nline,
                                            size_line=nleng)
    msgs_train = extract_msg(commits=filter_commits_train)
    labels_train = extract_label(commits=filter_commits_train)
    codes_train = extract_code(commits=filter_commits_train)
    all_lines_train = add_two_list(list1=msgs_train, list2=codes_train)

    # path_test = "./data/test_data/sasha_translated.out"
    path_test = "./data/test_data/merging_markus_sasha.txt"
    type = "all"
    # type = "msg"
    # type = "code"
    commits_test = extract_commit(path_file=path_test)
    filter_commits_test = filtering_commit(commits=commits_test,
                                           num_file=nfile, num_hunk=nhunk,
                                           num_loc=nline, size_line=nleng)
    code_length = 120  # "Max length of code in one line in commits")
    code_line = 10  # "Max line of code in one hunk in commits")
    code_hunk = 8  # "Max hunk of code in one file in commits")
    code_file = 1  # "Max file of code in one in commits")

    path_train = "./data/3_mar7/typediff.out"
    # data_train = load_file(path_file=path_train)
    # train_pad_msg, train_pad_added_code, train_pad_removed_code, train_labels, dict_msg_, dict_code_ = \
    #     load_commit_train_data(commits=data_train, msg_length_=msg_length, code_length_=code_length,
    #                            code_line_=code_line, code_hunk_=code_hunk, code_file_=code_file)
    # print train_pad_msg.shape, train_pad_added_code.shape, train_pad_removed_code.shape, train_labels.shape

    commits_train = extract_commit(path_file=path_train)
    filter_commits_train = filtering_commit(commits=commits_train,
                                            num_file=code_file,
                                            num_hunk=code_hunk,
                                            num_loc=code_line,
                                            size_line=code_length)
    msgs_train, codes_train = extract_msg(
        commits=filter_commits_train), extract_code(
            commits=filter_commits_train)
    dict_msg_train, dict_code_train = dictionary(data=msgs_train), dictionary(
        data=codes_train)

    path_test = "./data/test_data/markus_translated.out"
    # path_test = "./data/test_data/sasha_translated.out"
    commits_test = extract_commit(path_file=path_test)
    filter_commits_test = filtering_commit(commits=commits_test,
                                           num_file=code_file,
                                           num_hunk=code_hunk,
                                           num_loc=code_line,
Example #8
0
if __name__ == "__main__":
    tf = model_parameter_evaluation_keras()
    FLAGS = tf.flags.FLAGS
    print_params(tf)

    path_file_model = "./keras_model/"
    model_name = FLAGS.model
    # model_name = "lstm_code"
    model_name = "lstm_all"
    model = load_model(path_file_model + model_name + ".h5")

    if "msg" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
    elif "all" in FLAGS.model:
        commits_ = extract_commit(path_file=FLAGS.path)
        filter_commits = filtering_commit(commits=commits_,
                                          num_file=FLAGS.code_file,
                                          num_hunk=FLAGS.code_hunk,
                                          num_loc=FLAGS.code_line,
                                          size_line=FLAGS.code_length)
        msgs_, codes_ = extract_msg(commits=filter_commits), extract_code(
            commits=filter_commits)
        all_lines = add_two_list(list1=msgs_, list2=codes_)
        msgs_ = all_lines