def parse_ms(s): print('D^{} ... '.format(s), end='', flush=True) if Filefunctions.exists(Name_functions.DS_train_ids(s)): if Filefunctions.exists(Name_functions.DS_test_ids(s)): print('Already done') return np.random.seed(0) X, y, times, ids = DI(Name_functions.DS_file(s)).get_data( Name_functions.DS_reduced_ids_DSJ(s), True, True) if Parameters.take_test_split_chronological: test_case_ids = [] train_case_ids = [] times_post_warm_up = [ t for t in times if t > Parameters.test_time_start ] times_post_warm_up.sort() train_start_index = int( (1 - Parameters.assessment_test_split) * len(times_post_warm_up)) train_time_end = times_post_warm_up[train_start_index] for case_start_time, case_id in zip(times, ids): if case_start_time <= Parameters.test_time_start: continue if case_start_time < train_time_end: train_case_ids.append(case_id) else: test_case_ids.append(case_id) else: indices = [ i for i in range(len(ids)) if times[i] > Parameters.test_time_start ] test_indices = [] train_indices = [] c, cc = np.unique(y[indices], return_counts=True) for label, label_count in zip(c, cc): num_test = int(label_count * Parameters.assessment_test_split) indices_c = [i for i in indices if y[i] == label] indices_c_test = np.random.choice(indices_c, num_test, replace=False) test_indices.extend(indices_c_test.tolist()) train_indices.extend( [i for i in indices_c if i not in indices_c_test]) test_case_ids = ids[test_indices] train_case_ids = ids[train_indices] with open(Name_functions.DS_train_ids(s), 'w+') as wf: for case_id in train_case_ids: wf.write('{}\n'.format(case_id)) with open(Name_functions.DS_test_ids(s), 'w+') as wf: for case_id in test_case_ids: wf.write('{}\n'.format(case_id)) print('Done')
def parse_ms(s): print('\tGRAEC ... ', end='', flush=True) if Filefunctions.exists(Name_functions.S_GRAEC_enumeration_dictionary(s)): print('Already done') return enumeration_encoder = dict() fn_data = Name_functions.DS_file(s) fn_train_ids = Name_functions.DS_train_ids(s) fn_test_ids = Name_functions.DS_test_ids(s) x_train, labels_train, times_train, ids_train = DI(fn_data).get_data( fn_subset_ids=fn_train_ids, return_split_values=True, return_identifiers=True) x_test, labels_test, times_test, ids_test = DI(fn_data).get_data( fn_subset_ids=fn_test_ids, return_split_values=True, return_identifiers=True) enumeration = 0 predictor = Classifiers.BPTSClassifier(s=s, score_function=None) for B in Parameters.GRAEC_beta: for T in Parameters.GRAEC_tau: for P in Parameters.GRAEC_p if not T == 0 else [ 0 ]: # P has no use for T == 0 enumeration_encoder[enumeration] = '{};{};{}'.format(B, T, P) predictor.set_scoring_function( score_function=PeriodScoring(beta=B, p=P, tau=T, s=s)) with open( Name_functions.S_GRAEC_train_predictions( s, enumeration), 'w+') as wf: wf.write('SOID;time;True_label;Predicted_label\n') for case_id, t, true_label in zip(ids_train, times_train, labels_train): predicted_label = predictor.predict(case_id=case_id, time=t) wf.write('{};{};{};{}\n'.format( case_id, t, true_label[0], predicted_label)) with open( Name_functions.S_GRAEC_test_predictions( s, enumeration), 'w+') as wf: wf.write('Case_id;time;True_label;Predicted_label\n') for case_id, t, true_label in zip(ids_test, times_test, labels_test): predicted_label = predictor.predict(case_id=case_id, time=t) wf.write('{};{};{};{}\n'.format( case_id, t, true_label[0], predicted_label)) enumeration += 1 Human_Functions.save_dict_to_csv( enumeration_encoder, Name_functions.S_GRAEC_enumeration_dictionary(s)) fn_data = Name_functions.DS_file(s) fn_ids = Name_functions.DS_test_ids(s) x, labels, times, ids = DI(fn_data).get_data(fn_subset_ids=fn_ids, return_split_values=True, return_identifiers=True) print('Done') print('\tNaive and Previous ... ', end='', flush=True) naive_predictor = Classifiers.NaiveClassifier(s) previous_predictor = Classifiers.PreviousClassifier(s) with open(Name_functions.S_naive_test_predictions(s), 'w+') as wf_naive: with open(Name_functions.S_recent_test_predictions(s), 'w+') as wf_previous: wf_naive.write('{};{};{};{}\n'.format('Case_id', 'time', 'True_label', 'Predicted_label')) wf_previous.write('{};{};{};{}\n'.format('Case_id', 'time', 'True_label', 'Predicted_label')) for case_id, t, true_label in zip(ids, times, labels): predicted_label_naive = naive_predictor.predict( case_id=case_id, time=t) if predicted_label_naive is not None: wf_naive.write('{};{};{};{}\n'.format( case_id, t, true_label[0], predicted_label_naive)) predicted_label_previous = previous_predictor.predict( case_id=case_id, time=t) if predicted_label_previous is not None: wf_previous.write('{};{};{};{}\n'.format( case_id, t, true_label[0], predicted_label_previous)) print('Done')