def loading_data(project):
    train, test = loading_variable(project +
                                   '_train'), loading_variable(project +
                                                               '_test')
    dictionary = (loading_variable(project + '_dict_msg'),
                  loading_variable(project + '_dict_code'))
    return train, test, dictionary
        plt.hist(data, bins=range(0, 40))
        plt.title(check)
        plt.xlabel("Length")
        plt.ylabel("Frequency")
        plt.show()
        return mean(data), stdev(data)
    elif check == 'Length':
        new_data = list()
        for d in data:
            for f in d:
                new_data.append(len(f.split()))
        plt.hist(new_data, bins=range(0, 750))
        plt.title(check)
        plt.xlabel("Length")
        plt.ylabel("Frequency")
        plt.show()
        return mean(new_data), stdev(new_data)


if __name__ == '__main__':
    project = 'openstack'
    messages, codes = loading_variable(project + '_messages'), loading_variable(project + '_codes')
    print(type(messages), type(codes))
    mean_, std_ = statistic_msg(data=messages)
    print(mean_, std_)

    mean_, std_ = statistic_code(data=codes, check='File')
    print(mean_, std_)
    mean_, std_ = statistic_code(data=codes, check='Length')
    print(mean_, std_)
Beispiel #3
0
    info_label(data=labels_test)
    ids_train, ids_test = get_index(data=ids, index=train_index), get_index(
        data=ids, index=test_index)
    train = (ids_train, labels_train, pad_msg_train, pad_code_train)
    test = (ids_test, labels_test, pad_msg_test, pad_code_test)
    dict_msg, dict_code = dictionary_commit(
        data=pad_msg_train,
        type_data='msg'), dictionary_commit(data=pad_code_train,
                                            type_data='code')
    return train, test, dict_msg, dict_code


if __name__ == '__main__':
    # project = 'openstack'
    project = 'qt'
    messages, codes = loading_variable(project +
                                       '_messages'), loading_variable(project +
                                                                      '_codes')
    ids, labels = loading_variable(project + '_ids'), convert_label(
        loading_variable(project + '_labels'))
    info_label(data=labels)
    print('Number of instances in commit message %i and commit code %i ' %
          (len(messages), len(codes)))
    print('Labels: %i' % (len(labels)))
    train, test, dict_msg, dict_code = folding_data(pad_msg=messages,
                                                    pad_code=codes,
                                                    labels=labels,
                                                    ids=ids,
                                                    n_folds=5)
    saving_variable(project + '_train', train)
    saving_variable(project + '_test', test)
    saving_variable(project + '_dict_msg', dict_msg)