def loading_msg_code(data, path_file):
    ids, labels, features = data
    messages, codes = info_commit(ids=ids, path_file=path_file)
    messages, codes = clean_message(data=messages), clean_code(data=codes)
    return (ids, labels, messages, codes)
Beispiel #2
0
    # sorted data based on author date and return the ids and labels
    df = pd.read_csv(path_data)
    new_df = list()
    for id in ids:
        new_df.append(df.loc[df['commit_id'] == id])
    new_df = pd.concat(new_df)
    new_df = new_df.sort_values(by=['author_date']).fillna(0)
    ids, labels = list(new_df['commit_id'].values), list(
        new_df['bugcount'].values)
    labels = [1 if (int(l) > 0) else 0 for l in labels]
    return ids, labels


if __name__ == '__main__':
    project = 'openstack'
    ids, labels = loading_variable_path(
        pname='./variables_ver1/' + project + '_ids.pkl'), convert_label(
            loading_variable_path(pname='./variables_ver1/' + project +
                                  '_labels.pkl'))
    path_data = './labels/' + project + '.csv'
    ids, lables = sorted_authordate(ids=ids, path_data=path_data)

    path_file = './output/' + project
    messages, codes = info_commit(ids=ids, path_file=path_file)
    print(len(ids), len(labels), len(messages), len(codes))
    messages, codes = clean_message(data=messages), clean_code(data=codes)

    saving_variable(project + '_messages', messages)
    saving_variable(project + '_codes', codes)
    saving_variable(project + '_labels', labels)
    saving_variable(project + '_ids', ids)
def load_msg_code_feature(data, path_file):
    ids, labels, features = data
    messages, codes = info_commit(ids=ids, path_file=path_file)
    messages, codes = clean_message(data=messages), clean_code(data=codes)
    return (ids, labels, preprocessing.scale(features), messages, codes)