Esempio n. 1
0
def run(file, epochs, hidden_nodes):
    print(file, ' ', epochs, ' ', hidden_nodes)
    with open(file, 'rb') as f:
        patients = pickle.load(f)
    print(patients.shape)

    X = patients[:, :-1]
    y = patients[:, -1].astype(int)

    dA = {}

    for hn in hidden_nodes:
        for e in epochs:
            name = str(hn) + '_' + str(e)
            dA[name] = trainer.train_da(X, learning_rate=0.1,
                                        coruption_rate=0.2,
                                        batch_size=10,
                                        training_epochs=e,
                                        n_hidden=hn)
    save_run(file, dA)
def run(file, epochs, hidden_nodes):
    print(file, ' ', epochs, ' ', hidden_nodes)
    with open(file, 'rb') as f:
        patients = pickle.load(f)
    print(patients.shape)

    X = patients[:, :-1]
    y = patients[:, -1].astype(int)

    dA = {}

    for hn in hidden_nodes:
        for e in epochs:
            name = str(hn) + '_' + str(e)
            dA[name] = trainer.train_da(X,
                                        learning_rate=0.1,
                                        coruption_rate=0.2,
                                        batch_size=10,
                                        training_epochs=e,
                                        n_hidden=hn)
    save_run(file, dA)
Esempio n. 3
0
    def testDaRun(self):
        p = 100
        hn = 2

        patients = sim.create_patients(patient_count=p,
                                       observed_variables=100,
                                       systematic_bias=0.1,
                                       input_variance=0.1,
                                       effects=4,
                                       per_effect=5,
                                       effect_mag=5,
                                       trial=1,
                                       sim_model=1,
                                       missing_data=0)
        X = patients[:, :-1]
        y = patients[:, -1]

        dAs = {}
        dAs[p] = {}
        dAs[p][hn] = trainer.train_da(X,
                                      learning_rate=0.1,
                                      coruption_rate=0.2,
                                      batch_size=10,
                                      training_epochs=1000,
                                      n_hidden=hn,
                                      missing_data=None)

        self.assertTrue(str(dAs[p][hn].trained_cost)[:7] == str(3.78843))

        scores = classifier.classify(X, y, dAs)
        print(scores)

        da_rfc_scores = [
            1.0, 1.0, 1.0, 0.90000000000000002, 1.0, 1.0, 0.96000000000000008,
            1.0, 0.78000000000000003, 1.0
        ]
        rfc_score_name = 'da_' + str(p) + '_' + str(hn) + '_rfc'
        self.assertTrue(scores[rfc_score_name] == da_rfc_scores)
Esempio n. 4
0
    def testDaRun(self):
        p = 100
        hn = 2

        patients = sim.create_patients(patient_count=p,
                                       observed_variables=100,
                                       systematic_bias=0.1,
                                       input_variance=0.1,
                                       effects=4,
                                       per_effect=5,
                                       effect_mag=5,
                                       trial=1,
                                       sim_model=1,
                                       missing_data=0)
        X = patients[:, :-1]
        y = patients[:, -1]

        dAs = {}
        dAs[p] = {}
        dAs[p][hn] = trainer.train_da(X,
                                      learning_rate=0.1,
                                      coruption_rate=0.2,
                                      batch_size=10,
                                      training_epochs=1000,
                                      n_hidden=hn,
                                      missing_data=None)

        self.assertTrue(str(dAs[p][hn].trained_cost)[:7] == str(3.78843))

        scores = classifier.classify(X, y, dAs)
        print(scores)

        da_rfc_scores = [1.0, 1.0, 1.0, 0.90000000000000002, 1.0, 1.0,
                         0.96000000000000008, 1.0, 0.78000000000000003, 1.0]
        rfc_score_name = 'da_' + str(p) + '_' + str(hn) + '_rfc'
        self.assertTrue(scores[rfc_score_name] == da_rfc_scores)
Esempio n. 5
0
def run(run_name='test', patient_counts=[100, 200, 500],
        da_patients=[1000], hidden_nodes=[1, 2, 4, 8],
        missing_data=[0]):
    # loop through patient files
    np.random.seed(seed=123)
    random.seed(123)

    overall_time = time.time()
    i = 0
    path = './data/' + run_name + '/'
    patients_path = path + 'patients'
    for file in os.listdir(patients_path):
        if file.endswith(".p"):
            for d in missing_data:
                run_start = time.time()
                i += 1
                scores = {}
                print(file, ' ', str(d))
                patients = pkl.load(open(patients_path + '/' + file, 'rb'))

                np.random.shuffle(patients)
                X = patients[:, :-1]
                y = patients[:, -1]

                if d > 0:
                    missing_vector = np.asarray(add_missing(patients, d))
                    X = np.array(X)
                    X[np.where(missing_vector == 0)] = 'NaN'
                    imp = Imputer(strategy='mean', axis=0)
                    X = imp.fit_transform(X)
                else:
                    missing_vector = None

                print(sum(y), len(y))

                dAs = {}
                for p in da_patients:
                    dAs[p] = {}
                    for n in hidden_nodes:
                        print(p, n)
                        dAs[p][n] = trainer.train_da(X[:p],
                                                     learning_rate=0.1,
                                                     coruption_rate=0.2,
                                                     batch_size=100,
                                                     training_epochs=1000,
                                                     n_hidden=n,
                                                     missing_data=missing_vector)

                for count in patient_counts:
                    scores[count] = classify(X[:count], y[:count], dAs)

                first_part = file.split('.p')[0]
                score_name = first_part + '_s.p'

                if len(missing_data) > 0:
                    pkl.dump(scores, open(path + 'scores/m' + str(d) + '_' +
                                          score_name, "wb"), protocol=2)
                else:
                    pkl.dump(scores, open(path + 'scores/' + score_name, "wb"),
                             protocol=2)
                print(scores)
                del patients
                run_end = time.time()
                print(i, ' run time:', str(run_end - run_start), ' total: ',
                      str(run_end - overall_time))
    print(i)
def run(run_name='test', patient_counts=[100, 200, 500],
        da_patients=[1000], hidden_nodes=[1, 2, 4, 8],
        missing_data=[0]):
    # loop through patient files
    np.random.seed(seed=123)
    random.seed(123)

    overall_time = time.time()
    i = 0
    path = './data/' + run_name + '/'
    patients_path = path + 'patients'
    for file in os.listdir(patients_path):
        if file.endswith(".p"):
            for d in missing_data:
                run_start = time.time()
                i += 1
                scores = {}
                print(file, ' ', str(d))
                patients = pkl.load(open(patients_path + '/' + file, 'rb'))

                np.random.shuffle(patients)
                X = patients[:, :-1]
                y = patients[:, -1]

                if d > 0:
                    missing_vector = np.asarray(add_missing(patients, d))
                    X = np.array(X)
                    X[np.where(missing_vector == 0)] = 'NaN'
                    imp = Imputer(strategy='mean', axis=0)
                    X = imp.fit_transform(X)
                else:
                    missing_vector = None

                print(sum(y), len(y))

                dAs = {}
                for p in da_patients:
                    dAs[p] = {}
                    for n in hidden_nodes:
                        print(p, n)
                        dAs[p][n] = trainer.train_da(X[:p],
                                                     learning_rate=0.1,
                                                     coruption_rate=0.2,
                                                     batch_size=100,
                                                     training_epochs=1000,
                                                     n_hidden=n,
                                                     missing_data=missing_vector)

                for count in patient_counts:
                    scores[count] = classify(X[:count], y[:count], dAs)

                first_part = file.split('.p')[0]
                score_name = first_part + '_s.p'

                if len(missing_data) > 0:
                    pkl.dump(scores, open(path + 'scores/m' + str(d) + '_' +
                                          score_name, "wb"), protocol=2)
                else:
                    pkl.dump(scores, open(path + 'scores/' + score_name, "wb"),
                             protocol=2)
                print(scores)
                del patients
                run_end = time.time()
                print(i, ' run time:', str(run_end - run_start), ' total: ',
                      str(run_end - overall_time))
    print(i)