def parse_ms(s): fn_data = Name_functions.DS_file(s) x, y, time, case_id = Di(fn_data).get_data(return_identifiers=True, return_split_values=True) print('\tM^{}_j ... '.format(s), end='', flush=True) # S predictions for i in sorted([int(i) for i in Name_functions.S_J_values(s)], reverse=True): if Filefunctions.exists(Name_functions.DSJ_probabilities(s, i)): continue model_i = Model_Functions.loadModel(Name_functions.model_SJ(s, i)) model_labels = model_i.classes_.tolist() model_end_time = Name_functions.SJ_period_end_time(s, i) with open(Name_functions.DSJ_probabilities(s, i), 'w+') as wf: for dx, t, idn in zip(x, time, case_id): if t < model_end_time: # Only test if the model existed before the data point continue model_predictions = model_i.predict_proba(dx.reshape(1, -1))[0] actual_predictions = [ (0 if (i not in model_labels) else model_predictions[model_labels.index(i)]) for i in all_labels ] wf.write('{};{};{}\n'.format( idn, t, ';'.join(['{:4f}'.format(x) for x in actual_predictions]))) print('Done') # Naive predictions print('\tM^{}_naive ... '.format(s), end='', flush=True) if Filefunctions.exists(Name_functions.DS_probabilities_naive(s)): print('Already done') return model_naive = Model_Functions.loadModel(Name_functions.model_S_naive(s)) model_naive_labels = model_naive.classes_.tolist() model_naive_end_time = Parameters.train_time_naive_stop with open(Name_functions.DS_probabilities_naive(s), 'w+') as wf: for dx, t, idn in zip(x, time, case_id): if t < model_naive_end_time: # Only test if the model existed before the data point continue model_predictions = model_naive.predict_proba(dx.reshape(1, -1))[0] actual_predictions = [ (0 if (i not in model_naive_labels) else model_predictions[model_naive_labels.index(i)]) for i in all_labels ] wf.write('{};{};{}\n'.format( idn, t, ';'.join(['{:4f}'.format(x) for x in actual_predictions]))) print('Done')
def parse_naive(s): print('\tM^{}_naive ... '.format(s), end='', flush=True) fn_model = Name_functions.model_S_naive(s) if os.path.exists(fn_model): print("Already done") return 1.0, 1.0, 100 fn_data = Name_functions.DS_file(s) fn_subset = Name_functions.DS_reduced_ids_DSJ(s) x, y, t = Di(fn_data).get_data(fn_subset, True, False) y = y.ravel() # Only take data that is in the first year x = [ x[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop ] y = [ y[i] for i in range(len(t)) if t[i] < Parameters.train_time_naive_stop ] x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=0, test_size=0.2) # Get the best model best_model = None best_score = -1 for c in used_models: score, model = train_classifier(c, x_train, x_test, y_train, y_test) if score > best_score: best_score = score best_model = model # save the model Model_Functions.saveModel(best_model, fn_model) print("Done") return 1.0, 1.0, 100