def parse_s(s): fn_data = Name_functions.DS_file(s) fn_subset = Name_functions.DS_reduced_ids_DSJ(s) x, y = Di(fn_data).split_data(int(s), fn_subset_ids=fn_subset) print('\tM^{}_j ... '.format(s), end='', flush=True) good_splits = 0 for i in sorted(x): fn_model = Name_functions.model_SJ(s, i) c, cc = np.unique(y[i], return_counts=True) if min(cc) < cv * 2: index = np.where(cc == np.min(cc)) continue if len(c) <= 1: continue if os.path.exists(fn_model): good_splits += 1 continue else: generate_model(x[i], y[i], s, i) good_splits += 1 continue print('Done ({}/{} D^{}_j met requirements)'.format( good_splits, len(x), s)) return good_splits, len(x), 100 * good_splits / len(x)
def parse_ms(s): fn_data = Name_functions.DS_file(s) x, y, time, case_id = Di(fn_data).get_data(return_identifiers=True, return_split_values=True) print('\tM^{}_j ... '.format(s), end='', flush=True) # S predictions for i in sorted([int(i) for i in Name_functions.S_J_values(s)], reverse=True): if Filefunctions.exists(Name_functions.DSJ_probabilities(s, i)): continue model_i = Model_Functions.loadModel(Name_functions.model_SJ(s, i)) model_labels = model_i.classes_.tolist() model_end_time = Name_functions.SJ_period_end_time(s, i) with open(Name_functions.DSJ_probabilities(s, i), 'w+') as wf: for dx, t, idn in zip(x, time, case_id): if t < model_end_time: # Only test if the model existed before the data point continue model_predictions = model_i.predict_proba(dx.reshape(1, -1))[0] actual_predictions = [ (0 if (i not in model_labels) else model_predictions[model_labels.index(i)]) for i in all_labels ] wf.write('{};{};{}\n'.format( idn, t, ';'.join(['{:4f}'.format(x) for x in actual_predictions]))) print('Done') # Naive predictions print('\tM^{}_naive ... '.format(s), end='', flush=True) if Filefunctions.exists(Name_functions.DS_probabilities_naive(s)): print('Already done') return model_naive = Model_Functions.loadModel(Name_functions.model_S_naive(s)) model_naive_labels = model_naive.classes_.tolist() model_naive_end_time = Parameters.train_time_naive_stop with open(Name_functions.DS_probabilities_naive(s), 'w+') as wf: for dx, t, idn in zip(x, time, case_id): if t < model_naive_end_time: # Only test if the model existed before the data point continue model_predictions = model_naive.predict_proba(dx.reshape(1, -1))[0] actual_predictions = [ (0 if (i not in model_naive_labels) else model_predictions[model_naive_labels.index(i)]) for i in all_labels ] wf.write('{};{};{}\n'.format( idn, t, ';'.join(['{:4f}'.format(x) for x in actual_predictions]))) print('Done')
def generate_model(x, y, s, i): # Generate Train/Test x_train, x_test, y_train, y_test = train_test_split(x, y.ravel(), random_state=0, test_size=0.2) # Get the best model best_model = None best_score = -1 for c in used_models: score, model = train_classifier(c, x_train, x_test, y_train, y_test) if score > best_score: best_score = score best_model = model # save the model Model_Functions.saveModel(best_model, Name_functions.model_SJ(s, i))