def run(file, epochs, hidden_nodes): print(file, ' ', epochs, ' ', hidden_nodes) with open(file, 'rb') as f: patients = pickle.load(f) print(patients.shape) X = patients[:, :-1] y = patients[:, -1].astype(int) dA = {} for hn in hidden_nodes: for e in epochs: name = str(hn) + '_' + str(e) dA[name] = trainer.train_da(X, learning_rate=0.1, coruption_rate=0.2, batch_size=10, training_epochs=e, n_hidden=hn) save_run(file, dA)
def testDaRun(self): p = 100 hn = 2 patients = sim.create_patients(patient_count=p, observed_variables=100, systematic_bias=0.1, input_variance=0.1, effects=4, per_effect=5, effect_mag=5, trial=1, sim_model=1, missing_data=0) X = patients[:, :-1] y = patients[:, -1] dAs = {} dAs[p] = {} dAs[p][hn] = trainer.train_da(X, learning_rate=0.1, coruption_rate=0.2, batch_size=10, training_epochs=1000, n_hidden=hn, missing_data=None) self.assertTrue(str(dAs[p][hn].trained_cost)[:7] == str(3.78843)) scores = classifier.classify(X, y, dAs) print(scores) da_rfc_scores = [ 1.0, 1.0, 1.0, 0.90000000000000002, 1.0, 1.0, 0.96000000000000008, 1.0, 0.78000000000000003, 1.0 ] rfc_score_name = 'da_' + str(p) + '_' + str(hn) + '_rfc' self.assertTrue(scores[rfc_score_name] == da_rfc_scores)
def testDaRun(self): p = 100 hn = 2 patients = sim.create_patients(patient_count=p, observed_variables=100, systematic_bias=0.1, input_variance=0.1, effects=4, per_effect=5, effect_mag=5, trial=1, sim_model=1, missing_data=0) X = patients[:, :-1] y = patients[:, -1] dAs = {} dAs[p] = {} dAs[p][hn] = trainer.train_da(X, learning_rate=0.1, coruption_rate=0.2, batch_size=10, training_epochs=1000, n_hidden=hn, missing_data=None) self.assertTrue(str(dAs[p][hn].trained_cost)[:7] == str(3.78843)) scores = classifier.classify(X, y, dAs) print(scores) da_rfc_scores = [1.0, 1.0, 1.0, 0.90000000000000002, 1.0, 1.0, 0.96000000000000008, 1.0, 0.78000000000000003, 1.0] rfc_score_name = 'da_' + str(p) + '_' + str(hn) + '_rfc' self.assertTrue(scores[rfc_score_name] == da_rfc_scores)
def run(run_name='test', patient_counts=[100, 200, 500], da_patients=[1000], hidden_nodes=[1, 2, 4, 8], missing_data=[0]): # loop through patient files np.random.seed(seed=123) random.seed(123) overall_time = time.time() i = 0 path = './data/' + run_name + '/' patients_path = path + 'patients' for file in os.listdir(patients_path): if file.endswith(".p"): for d in missing_data: run_start = time.time() i += 1 scores = {} print(file, ' ', str(d)) patients = pkl.load(open(patients_path + '/' + file, 'rb')) np.random.shuffle(patients) X = patients[:, :-1] y = patients[:, -1] if d > 0: missing_vector = np.asarray(add_missing(patients, d)) X = np.array(X) X[np.where(missing_vector == 0)] = 'NaN' imp = Imputer(strategy='mean', axis=0) X = imp.fit_transform(X) else: missing_vector = None print(sum(y), len(y)) dAs = {} for p in da_patients: dAs[p] = {} for n in hidden_nodes: print(p, n) dAs[p][n] = trainer.train_da(X[:p], learning_rate=0.1, coruption_rate=0.2, batch_size=100, training_epochs=1000, n_hidden=n, missing_data=missing_vector) for count in patient_counts: scores[count] = classify(X[:count], y[:count], dAs) first_part = file.split('.p')[0] score_name = first_part + '_s.p' if len(missing_data) > 0: pkl.dump(scores, open(path + 'scores/m' + str(d) + '_' + score_name, "wb"), protocol=2) else: pkl.dump(scores, open(path + 'scores/' + score_name, "wb"), protocol=2) print(scores) del patients run_end = time.time() print(i, ' run time:', str(run_end - run_start), ' total: ', str(run_end - overall_time)) print(i)