def parse_s(s): fn_data = Name_functions.DS_file(s) fn_subset = Name_functions.DS_reduced_ids_DSJ(s) x, y = Di(fn_data).split_data(int(s), fn_subset_ids=fn_subset) print('\tM^{}_j ... '.format(s), end='', flush=True) good_splits = 0 for i in sorted(x): fn_model = Name_functions.model_SJ(s, i) c, cc = np.unique(y[i], return_counts=True) if min(cc) < cv * 2: index = np.where(cc == np.min(cc)) continue if len(c) <= 1: continue if os.path.exists(fn_model): good_splits += 1 continue else: generate_model(x[i], y[i], s, i) good_splits += 1 continue print('Done ({}/{} D^{}_j met requirements)'.format( good_splits, len(x), s)) return good_splits, len(x), 100 * good_splits / len(x)
def parse_ms(s): print('D^{} ... '.format(s), end='', flush=True) if Filefunctions.exists(Name_functions.DS_train_ids(s)): if Filefunctions.exists(Name_functions.DS_test_ids(s)): print('Already done') return np.random.seed(0) X, y, times, ids = DI(Name_functions.DS_file(s)).get_data( Name_functions.DS_reduced_ids_DSJ(s), True, True) if Parameters.take_test_split_chronological: test_case_ids = [] train_case_ids = [] times_post_warm_up = [ t for t in times if t > Parameters.test_time_start ] times_post_warm_up.sort() train_start_index = int( (1 - Parameters.assessment_test_split) * len(times_post_warm_up)) train_time_end = times_post_warm_up[train_start_index] for case_start_time, case_id in zip(times, ids): if case_start_time <= Parameters.test_time_start: continue if case_start_time < train_time_end: train_case_ids.append(case_id) else: test_case_ids.append(case_id) else: indices = [ i for i in range(len(ids)) if times[i] > Parameters.test_time_start ] test_indices = [] train_indices = [] c, cc = np.unique(y[indices], return_counts=True) for label, label_count in zip(c, cc): num_test = int(label_count * Parameters.assessment_test_split) indices_c = [i for i in indices if y[i] == label] indices_c_test = np.random.choice(indices_c, num_test, replace=False) test_indices.extend(indices_c_test.tolist()) train_indices.extend( [i for i in indices_c if i not in indices_c_test]) test_case_ids = ids[test_indices] train_case_ids = ids[train_indices] with open(Name_functions.DS_train_ids(s), 'w+') as wf: for case_id in train_case_ids: wf.write('{}\n'.format(case_id)) with open(Name_functions.DS_test_ids(s), 'w+') as wf: for case_id in test_case_ids: wf.write('{}\n'.format(case_id)) print('Done')
def run(): with open(Name_functions.best_graec(), 'r') as rf: (S, B, T, P) = rf.readline()[:-1].split(';')[0:4] CalculateDailyScores(single={'S': [int(S)], 'Tau': [float(T)], 'P': [float(P)], 'Beta': [float(B)]}, multi={'Beta': Parameters.GRAEC_beta, 'Tau': Parameters.GRAEC_tau, 'S': Parameters.S_values}, test_ids_fn=Name_functions.DS_reduced_ids_DSJ(S)).run()
def parse_naive(s): print('\tM^{}_naive ... '.format(s), end='', flush=True) fn_model = Name_functions.model_S_naive(s) if os.path.exists(fn_model): print("Already done") return 1.0, 1.0, 100 fn_data = Name_functions.DS_file(s) fn_subset = Name_functions.DS_reduced_ids_DSJ(s) x, y, t = Di(fn_data).get_data(fn_subset, True, False) y = y.ravel() # Only take data that is in the first year x = [ x[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop ] y = [ y[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop ] x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.2) # Get the best model best_model = None best_score = -1 for c in used_models: score, model = train_classifier(c, x_train, x_test, y_train, y_test) if score > best_score: best_score = score best_model = model # save the model Model_Functions.saveModel(best_model, fn_model) print("Done") return 1.0, 1.0, 100
def parse_ms(s): fn_target = Name_functions.DS_reduced_ids_DSJ(s) # Check existence print('\tD^S_j ... ', end='', flush=True) if Filefunctions.exists(fn_target): print('Already done') return fn_input = Name_functions.DS_file(s) x, y, ids = DataImporter(fn_input).split_data(int(s), return_identifiers=True) ids_keep = [] for i in sorted(x): xi, yi, indices = KMedoids.reduce_to_medoids(x[i], y[i], return_indices=True) ids_keep.extend([ids[i][j] for j in indices]) with open(fn_target, 'w+') as wf: for caseID in ids_keep: wf.write('{}\n'.format(caseID)) print('Done')