コード例 #1
0
ファイル: dnn_model.py プロジェクト: datnvhust/dnn
def dnn_model_kfold(k=10):
    """ Run kfold cross validation in parallel
    
    Keyword Arguments:
        k {integer} -- the number of folds (default: {10})
    """
    samples = csv2dict(DATASET.features)

    # These collections are speed up the process while calculating top-k accuracy
    sample_dict, bug_reports, br2files_dict = helper_collections(samples)

    np.random.shuffle(samples)

    # K-fold Cross Validation in parallel
    acc_dicts = Parallel(n_jobs=-2)(  # Uses all cores but one
        delayed(train_dnn)(
            i, k, samples, start, step, sample_dict, bug_reports, br2files_dict
        )
        for i, (start, step) in enumerate(kfold_split_indexes(k, len(samples)))
    )

    # Calculating the average accuracy from all folds
    avg_acc_dict = {}
    for key in acc_dicts[0].keys():
        avg_acc_dict[key] = round(sum([d[key] for d in acc_dicts]) / len(acc_dicts), 3)

    return avg_acc_dict
コード例 #2
0
ファイル: rvsm_model.py プロジェクト: datnvhust/dnn
def rsvm_model():
    samples = csv2dict(DATASET.features)
    # rvsm_list = [float(sample["rVSM_similarity"]) for sample in samples]

    # These collections are speed up the process while calculating top-k accuracy
    sample_dict, bug_reports, br2files_dict = helper_collections(samples, True)

    acc_dict = topk_accuarcy(bug_reports, sample_dict, br2files_dict)

    return acc_dict
コード例 #3
0
from util import csv2dict, tsv2dict
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, KFold

samples = csv2dict()

rvsm_list = [float(sample['rVSM_similarity']) for sample in samples]

sample_dict = {}
for sample in samples:
    sample_dict[sample["report_id"]] = []

for sample in samples:
    temp_dict = {}
    temp_dict[sample["file"]] = [float(sample['rVSM_similarity'])]

    sample_dict[sample["report_id"]].append(temp_dict)

bug_reports = tsv2dict()
bug_reports_files_dict = {}

for bug_report in bug_reports:
    bug_reports_files_dict[bug_report["id"]] = bug_report["files"]

topk_counters = [0] * 20
negative_total = 0
for bug_report in bug_reports:
    dnn_input = []
    corresponding_files = []
    bug_id = bug_report["id"]
コード例 #4
0
if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "context",
        help=
        "Path to a CSV file with the data context for the template. Each row in the CSV will result in one sent email. The template is filled with associated data from a given row. Must have a column named 'email' for To: address."
    )
    parser.add_argument(
        "template",
        help="Path to a Jinja2 template file that can be filled by the context"
    )
    parser.add_argument(
        "subject",
        help=
        "Quoted string with email's subject line, subject string is NOT filled as a template"
    )
    parser.add_argument("-q",
                        "--quiet",
                        help="Supress printing of all filled templates.",
                        action="store_true")
    parser.add_argument(
        "-d",
        "--dryrun",
        help="Don't actually send emails, just print them to screen.",
        action="store_true")
    args = parser.parse_args()
    context = util.csv2dict(args.context)
    template = util.readtemplate(args.template)
    subject = args.subject
    main()
コード例 #5
0
from util import csv2dict, tsv2dict
import numpy as np
from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import train_test_split, KFold

samples_ = csv2dict()
samples = []

# oversampling
for i, sample in enumerate(samples_):
    samples.append(sample)
    if i % 51 in [0]:
        for _ in range(9):
            samples.append(sample)

np.random.shuffle(samples)

x_ = np.zeros((len(samples), 5))
y_ = np.zeros((len(samples), 1))

for i, sample in enumerate(samples):
    x_[i][0] = float(sample['rVSM_similarity'])
    x_[i][1] = float(sample['collab_filter'])
    x_[i][2] = float(sample['classname_similarity'])
    x_[i][3] = float(sample['bug_recency'])
    x_[i][4] = float(sample['bug_frequency'])
    y_[i] = float(sample['match'])

data_train, data_test, labels_train, labels_test = train_test_split(
    x_, y_, test_size=0.20, random_state=42)