Example #1
0
def parse_s(s):
    fn_data = Name_functions.DS_file(s)
    fn_subset = Name_functions.DS_reduced_ids_DSJ(s)
    x, y = Di(fn_data).split_data(int(s), fn_subset_ids=fn_subset)
    print('\tM^{}_j ... '.format(s), end='', flush=True)
    good_splits = 0

    for i in sorted(x):
        fn_model = Name_functions.model_SJ(s, i)
        c, cc = np.unique(y[i], return_counts=True)

        if min(cc) < cv * 2:
            index = np.where(cc == np.min(cc))
            continue
        if len(c) <= 1:
            continue
        if os.path.exists(fn_model):
            good_splits += 1
            continue
        else:
            generate_model(x[i], y[i], s, i)
            good_splits += 1
            continue
    print('Done ({}/{} D^{}_j met requirements)'.format(
        good_splits, len(x), s))
    return good_splits, len(x), 100 * good_splits / len(x)
def parse_ms(s):
    print('D^{} ... '.format(s), end='', flush=True)
    if Filefunctions.exists(Name_functions.DS_train_ids(s)):
        if Filefunctions.exists(Name_functions.DS_test_ids(s)):
            print('Already done')
            return

    np.random.seed(0)
    X, y, times, ids = DI(Name_functions.DS_file(s)).get_data(
        Name_functions.DS_reduced_ids_DSJ(s), True, True)

    if Parameters.take_test_split_chronological:
        test_case_ids = []
        train_case_ids = []
        times_post_warm_up = [
            t for t in times if t > Parameters.test_time_start
        ]
        times_post_warm_up.sort()
        train_start_index = int(
            (1 - Parameters.assessment_test_split) * len(times_post_warm_up))
        train_time_end = times_post_warm_up[train_start_index]
        for case_start_time, case_id in zip(times, ids):
            if case_start_time <= Parameters.test_time_start:
                continue

            if case_start_time < train_time_end:
                train_case_ids.append(case_id)
            else:
                test_case_ids.append(case_id)
    else:
        indices = [
            i for i in range(len(ids)) if times[i] > Parameters.test_time_start
        ]
        test_indices = []
        train_indices = []
        c, cc = np.unique(y[indices], return_counts=True)
        for label, label_count in zip(c, cc):
            num_test = int(label_count * Parameters.assessment_test_split)
            indices_c = [i for i in indices if y[i] == label]
            indices_c_test = np.random.choice(indices_c,
                                              num_test,
                                              replace=False)
            test_indices.extend(indices_c_test.tolist())
            train_indices.extend(
                [i for i in indices_c if i not in indices_c_test])
        test_case_ids = ids[test_indices]
        train_case_ids = ids[train_indices]

    with open(Name_functions.DS_train_ids(s), 'w+') as wf:
        for case_id in train_case_ids:
            wf.write('{}\n'.format(case_id))

    with open(Name_functions.DS_test_ids(s), 'w+') as wf:
        for case_id in test_case_ids:
            wf.write('{}\n'.format(case_id))

    print('Done')
def run():
    with open(Name_functions.best_graec(), 'r') as rf:
        (S, B, T, P) = rf.readline()[:-1].split(';')[0:4]
    CalculateDailyScores(single={'S': [int(S)],
                                 'Tau': [float(T)],
                                 'P': [float(P)],
                                 'Beta': [float(B)]},
                         multi={'Beta': Parameters.GRAEC_beta,
                                'Tau': Parameters.GRAEC_tau,
                                'S': Parameters.S_values},
                         test_ids_fn=Name_functions.DS_reduced_ids_DSJ(S)).run()
Example #4
0
def parse_naive(s):
    print('\tM^{}_naive ... '.format(s), end='', flush=True)
    fn_model = Name_functions.model_S_naive(s)
    if os.path.exists(fn_model):
        print("Already done")
        return 1.0, 1.0, 100

    fn_data = Name_functions.DS_file(s)
    fn_subset = Name_functions.DS_reduced_ids_DSJ(s)

    x, y, t = Di(fn_data).get_data(fn_subset, True, False)

    y = y.ravel()

    # Only take data that is in the first year
    x = [
        x[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop
    ]
    y = [
        y[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop
    ]

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        random_state=0,
                                                        test_size=0.2)

    # Get the best model
    best_model = None
    best_score = -1
    for c in used_models:
        score, model = train_classifier(c, x_train, x_test, y_train, y_test)
        if score > best_score:
            best_score = score
            best_model = model

    # save the model
    Model_Functions.saveModel(best_model, fn_model)
    print("Done")
    return 1.0, 1.0, 100
Example #5
0
def parse_ms(s):
    fn_target = Name_functions.DS_reduced_ids_DSJ(s)

    # Check existence
    print('\tD^S_j ... ', end='', flush=True)
    if Filefunctions.exists(fn_target):
        print('Already done')
        return

    fn_input = Name_functions.DS_file(s)
    x, y, ids = DataImporter(fn_input).split_data(int(s),
                                                  return_identifiers=True)
    ids_keep = []
    for i in sorted(x):
        xi, yi, indices = KMedoids.reduce_to_medoids(x[i],
                                                     y[i],
                                                     return_indices=True)
        ids_keep.extend([ids[i][j] for j in indices])

    with open(fn_target, 'w+') as wf:
        for caseID in ids_keep:
            wf.write('{}\n'.format(caseID))

    print('Done')