def padding_data(data, dictionary, params, type):
    if type == 'msg':
        pad_msg = padding_message(data=data, max_length=params.msg_length)
        pad_msg = mapping_dict_msg(pad_msg=pad_msg, dict_msg=dictionary)
        return pad_msg
    elif type == 'code':
        pad_code = padding_commit_code(data=data, max_line=params.code_line, max_length=params.code_length)
        pad_code = mapping_dict_code(pad_code=pad_code, dict_code=dictionary)
        return pad_code
    else:
        print('Your type is incorrect -- please correct it')
        exit()
コード例 #2
0
ファイル: split_train_test.py プロジェクト: lizj14/CC2Vec
    obj = pickle.load(f)
    f.close()

    messages, codes, labels, ids = obj
    labels = convert_label(labels)
    codes = clean_and_reformat_code(data=codes)

    info_label(data=labels)
    print('Number of instances in commit message %i and commit code %i ' % (len(messages), len(codes)))
    print('Labels: %i' % (len(labels)))

    dict_msg, dict_code = dictionary_commit(data=messages, type_data='msg'), dictionary_commit(data=codes,
                                                                                               type_data='code')
    pad_msg = padding_message(data=messages, max_length=256)
    added_code, removed_code = codes
    pad_added_code = padding_commit_code(data=added_code, max_file=3, max_line=10, max_length=256)
    pad_removed_code = padding_commit_code(data=removed_code, max_file=3, max_line=10, max_length=256)

    pad_msg = mapping_dict_msg(pad_msg=pad_msg, dict_msg=dict_msg)
    pad_added_code = mapping_dict_code(pad_code=pad_added_code, dict_code=dict_code)
    pad_removed_code = mapping_dict_code(pad_code=pad_removed_code, dict_code=dict_code)
    data = (pad_msg, pad_added_code, pad_removed_code, labels, dict_msg, dict_code, ids)
    print('Dictionary message: %i -- Dictionary code: %i' % (len(dict_msg), len(dict_code)))
    print('Shape of commit message:', pad_msg.shape)
    print('Shape of added code:', pad_added_code.shape)
    print('Shape of removed code:', pad_removed_code.shape)
    print('Shape of labels:', labels.shape)
    print('Ids of projects:', project, len(ids))
    write_data = open('../data/jit_' + project + '.pkl', 'wb')
    pickle.dump(data, write_data)