def first_run():
    model = model_builder.build_model()
    model = model_trainer.train_model(model, x_train, y_train)

    result = model_tester.test_model(model, x_test, y_test)

    model.save("./nn_models/mnist_nn_v1.h5")
def train_winner(winning_design, filename, testfilename, columns, targets, testtargets, train_kwargs, separator = ','):
    #print("\nTraining the winning design {0}...".format(str(winning_design)))
    model_file = train_model(winning_design, filename, columns, targets, comsize_third = 10, epochs = 300, separator = separator ,
                             **train_kwargs)

    print("\nProceeding with plotting on training data...")
    model_output_file = test_model(model_file, filename, targets[0], targets[1], separator, *columns)

    print("\nModel output stored in {0}".format(model_output_file))

    #Against the same file, just another column
    #scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    cmd = 'python model_tester.py "{model_file}" YOUR_TEST_FILE_HERE "{0}" "{1}" "{2}"'.format(separator, targets[0], targets[1], \
                                                                            model_file = model_file)
    selfcmd = ''
    if testfilename is not None:
        selfcmd = 'python model_tester.py "{model_file}" "{testfile}" "{0}" "{1}" "{2}"'.format(separator, testtargets[0], testtargets[1], \
                                                                        model_file = model_file, testfile = testfilename)
    for col in columns:
        cmd += ' "{0}"'.format(col)
        if testfilename is not None:
            selfcmd += ' "{0}"'.format(col)

    print('''
    Completed.
    If you wish to test your model against some test data, use this command:
        {0}
    To get scatter plots for test data, run this command:
        {1}'''.format(cmd, selfcmd))
예제 #3
0
def train_winner(winning_design,
                 filename,
                 testfilename,
                 columns,
                 targets,
                 testtargets,
                 train_kwargs,
                 separator=','):
    #print("\nTraining the winning design {0}...".format(str(winning_design)))
    model_file = train_model(winning_design,
                             filename,
                             columns,
                             targets,
                             comsize_third=10,
                             epochs=300,
                             separator=separator,
                             **train_kwargs)

    print("\nProceeding with plotting on training data...")
    model_output_file = test_model(model_file, filename, targets[0],
                                   targets[1], separator, *columns)

    print("\nModel output stored in {0}".format(model_output_file))

    #Against the same file, just another column
    #scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    cmd = 'python model_tester.py "{model_file}" YOUR_TEST_FILE_HERE "{0}" "{1}" "{2}"'.format(separator, targets[0], targets[1], \
                                                                            model_file = model_file)
    selfcmd = ''
    if testfilename is not None:
        selfcmd = 'python model_tester.py "{model_file}" "{testfile}" "{0}" "{1}" "{2}"'.format(separator, testtargets[0], testtargets[1], \
                                                                        model_file = model_file, testfile = testfilename)
    for col in columns:
        cmd += ' "{0}"'.format(col)
        if testfilename is not None:
            selfcmd += ' "{0}"'.format(col)

    print('''
    Completed.
    If you wish to test your model against some test data, use this command:
        {0}
    To get scatter plots for test data, run this command:
        {1}'''.format(cmd, selfcmd))
예제 #4
0
def check_keys(my_data):
    # test for various keyboard inputs
    (event, background, draw_color, line_width, keep_going, screen, mat) = my_data

    if event.key == pygame.K_q:
        keep_going = False

    elif event.key == pygame.K_c:
        background.fill((255, 255, 255))
        draw_pixelated(np.zeros((28, 28)), screen)

    # s - for saving a digit and pattern
    elif event.key == pygame.K_s:
        answer = int(ask(screen, ""))

        f_m = matrix_manipulate.focus_mat(mat)

        shrinking_model.learn_pattern(f_m, answer, "one_zero_shrinking")
        extended_model.learn_pattern(f_m, answer, "one_zero_extended")
        multiplication_model.learn_pattern(f_m, answer, "one_zero_multiplication")

        """shrinking_model.learn_pattern(f_m, answer, "shrinking_model")
        extended_model.learn_pattern(f_m, answer, "extended_model")
        multiplication_model.learn_pattern(f_m, answer, "multiplication_model")"""

    # t - for testing some real time hand write digit
    elif event.key == pygame.K_t:
        model_tester.test_model(my_data, 'one_zero_shrinking')
        model_tester.test_model(my_data, 'one_zero_extended')
        model_tester.test_model(my_data, 'one_zero_multiplication')

        """model_tester.test_model(my_data, 'multiplication_model')
        model_tester.test_model(my_data, "shrinking_model")
        model_tester.test_model(my_data, "extended_model")"""

    background.fill((255, 255, 255))
    draw_pixelated(np.zeros((28, 28)), screen)

    my_data = (event, background, draw_color, line_width, keep_going)
    return my_data
예제 #5
0
def main():

    transformer_list = []

    regex_features = True
    icd9_features = False
    labs_features = False
    text_features = False

    if regex_features:
        transformer_list += [
            ('EF', EFTransformer('all', 1, None)),
            ('EF', EFTransformer('mean', 5, None)),
            ('EF', EFTransformer('max', 5, None)),
            ('LBBB', LBBBTransformer(30 * 3)),
            ('SR', SinusRhythmTransformer(30 * 3)),
            ('NYHA', NYHATransformer(30 * 3)),
            ('NICM', NICMTransformer(30 * 3)),
            ('QRS', QRSTransformer('all', 1, None)),
            ('QRS', QRSTransformer('mean', 5, None)),
        ]
    if icd9_features:
        transformer_list += [('Dia', ICD9_Transformer())]
    if text_features:
        transformer_list += [
            ('Car',
             FeaturePipeline([('notes_transformer_car',
                               GetConcatenatedNotesTransformer('Car')),
                              ('tfidf', TfidfTransformer())])),
            ('Lno',
             FeaturePipeline([('notes_transformer_lno',
                               GetConcatenatedNotesTransformer('Lno')),
                              ('tfidf', TfidfTransformer)]))
        ]
    if labs_features:
        transformer_list += [
            ('Enc', GetEncountersFeaturesTransformer(5)),
            ('Labs_Counts',
             FeaturePipeline([('labs_counts_transformer',
                               GetLabsCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_Low_Counts',
             FeaturePipeline([('labs_low_counts_transformer',
                               GetLabsLowCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_High_Counts',
             FeaturePipeline([('labs_high_counts_transformer',
                               GetLabsHighCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_Latest_Low',
             FeaturePipeline([('labs_latest_low_transformer',
                               GetLabsLatestLowDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_Latest_High',
             FeaturePipeline([('labs_latest_high_transformer',
                               GetLabsLatestHighDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_History',
             FeaturePipeline([('labs_history_transformer',
                               GetLabsHistoryDictTransformer([1])),
                              ('dict_vectorizer', DictVectorizer())]))
        ]

    features = FeatureUnion(transformer_list)

    if len(sys.argv) > 1 and unicode(sys.argv[1]).isnumeric():
        data_size = min(int(sys.argv[1]), 906)
    else:
        data_size = 25

    if len(sys.argv) > 2 and unicode(sys.argv[2]).isnumeric():
        num_cv_splits = int(sys.argv[2])
    else:
        num_cv_splits = 5

    print "Data size: " + str(data_size)
    print "CV splits: " + str(num_cv_splits)

    if len(sys.argv) > 3:
        method = sys.argv[3]
    else:
        method = 'adaboost'

    #method = 'lr'
    #method = 'svm'
    method = 'adaboost'
    #method = 'cdm'

    model_args = dict()
    if method in ['lr', 'svm']:
        if len(sys.argv) > 4 and unicode(sys.argv[4]).isnumeric():
            model_args['regularization'] = float(sys.argv[4])
        else:
            model_args['regularization'] = 0.
    if method == 'adaboost':
        if len(sys.argv) > 4 and unicode(sys.argv[4]).isnumeric():
            model_args['n_estimators'] = int(sys.argv[4])
        else:
            model_args['n_estimators'] = 50

    show_progress = True
    print 'Method:', method
    test_model(features, data_size, num_cv_splits, method, show_progress,
               model_args)
예제 #6
0
    print("\nTraining a cox committee...")

    comsize = 1
    if len(sys.argv) > 1:
        comsize = int(sys.argv[1])

    model_file = train_model(filename,
                             columns,
                             targets,
                             separator=separator,
                             comsize=comsize)

    print("\nProceeding with plotting on training data...")

    model_output_file = test_model(model_file, filename, targets[0],
                                   targets[1], separator, *columns)

    print("\nModel output stored in {0}".format(model_output_file))

    #Against the same file, just another column
    #scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    cmd = 'python model_tester.py "{model_file}" YOUR_TEST_FILE_HERE "{0}" "{1}" "{2}"'.format(separator, targets[0], targets[1], \
                                                                                model_file = model_file)
    selfcmd = ''
    if testfilename is not None:
        selfcmd = 'python model_tester.py "{model_file}" "{testfile}" "{0}" "{1}" "{2}"'.format(separator, testtargets[0], testtargets[1], \
                                                                        model_file = model_file, testfile = testfilename)
    for col in columns:
        cmd += ' "{0}"'.format(col)
        if testfilename is not None:
                            use_header=True)

    print("Retrieving test data...")
    unNormedTestP, T = parse_file(testdata,
                                  inputcols=columns,
                                  targetcols=targets,
                                  normalize=False,
                                  separator=',',
                                  use_header=True)

    print("Normalizing test data...")
    P = normalizeArrayLike(unNormedTestP, normP)

    #Scatter training data
    model_output_file = test_model(model,
                                   trainingdata,
                                   targets[0],
                                   targets[1],
                                   ',',
                                   time_step_size=2,
                                   *columns)
    scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    #Scatter test data
    model_output_file = test_model_arrays(model,
                                          testdata,
                                          P,
                                          T,
                                          time_step_size=2)
    scatterplot_files(model_output_file, 0, 2, model_output_file, 1)
예제 #8
0
def main():
    transformer_list = []
    if False:
        transformer_list += SymptomsExtractorTransformerGenerator(
            ['Car', 'Lno'], 'found', None, 6 * 30).getSymptoms()
    if False:
        transformer_list += [('Dia', ICD9_Transformer())]
    if False:
        transformer_list += [
            ('EF', EFTransformer('all', 1, None)),
            ('EF', EFTransformer('mean', 5, None)),
            ('EF', EFTransformer('max', 5, None)),
            ('LBBB', LBBBTransformer()),
            ('SR', SinusRhythmTransformer()),
            ('NYHA', NYHATransformer()),
            ('QRS', QRSTransformer('all', 1, None)),  #Bugs with QRS
        ]
    if False:
        transformer_list += [
            ('Car',
             FeaturePipeline([('notes_transformer_car',
                               GetConcatenatedNotesTransformer('Car')),
                              ('tfidf', TfidfTransformer())])),
            ('Lno',
             FeaturePipeline([('notes_transformer_lno',
                               GetConcatenatedNotesTransformer('Lno')),
                              ('tfidf', TfidfTransformer)]))
        ]

    if False:
        transformer_list += [
            #('Car', FeaturePipeline([
            #    ('notes_transformer_car', GetLatestNotesTransformer('Car', 100)),
            #    ('notes_aggregator_car', DocumentConcatenatorTransformer()),
            #    ('trigram', CountVectorizer(ngram_range=(3,3), min_df=2))
            #])),
            ('Car',
             FeaturePipeline([('notes_transformer_car',
                               GetConcatenatedNotesTransformer('Car')),
                              ('bigram',
                               CountVectorizer(ngram_range=(2, 2),
                                               min_df=0.05))])),
            ('Lno',
             FeaturePipeline([('notes_transformer_lno',
                               GetConcatenatedNotesTransformer('Lno')),
                              ('bigram',
                               CountVectorizer(ngram_range=(2, 2),
                                               min_df=0.05))]))
        ]

    if True:
        transformer_list += [
            #('Enc', GetEncountersFeaturesTransformer(100, True)),
            ('Labs_Counts',
             FeaturePipeline([('labs_counts_transformer',
                               GetLabsCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_Low_Counts',
             FeaturePipeline([('labs_low_counts_transformer',
                               GetLabsLowCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            ('Labs_High_Counts',
             FeaturePipeline([('labs_high_counts_transformer',
                               GetLabsHighCountsDictTransformer()),
                              ('dict_vectorizer', DictVectorizer())])),
            #('Labs_Latest_Low', FeaturePipeline([
            #    ('labs_latest_low_transformer', GetLabsLatestLowDictTransformer()),
            #    ('dict_vectorizer', DictVectorizer())
            #])),
            #('Labs_Latest_High',FeaturePipeline([
            #    ('labs_latest_high_transformer', GetLabsLatestHighDictTransformer()),
            #    ('dict_vectorizer', DictVectorizer())
            #])),
            #('Labs_History', FeaturePipeline([
            #    ('labs_history_transformer', GetLabsHistoryDictTransformer([1])),
            #    ('dict_vectorizer', DictVectorizer())
            #]))
        ]

    features = FeatureUnion(transformer_list)

    if len(sys.argv) > 1 and unicode(sys.argv[1]).isnumeric():
        data_size = min(int(sys.argv[1]), 906)
    else:
        data_size = 25

    if len(sys.argv) > 2 and unicode(sys.argv[2]).isnumeric():
        num_cv_splits = int(sys.argv[2])
    else:
        num_cv_splits = 5

    print "Data size: " + str(data_size)
    print "CV splits: " + str(num_cv_splits)

    #method = 'lr'
    #method = 'svm'
    method = 'adaboost'
    #method = 'cdm'

    model_args = dict()
    if method in ['lr', 'svm']:
        if len(sys.argv) > 3 and unicode(sys.argv[3]).isnumeric():
            model_args['C'] = float(sys.argv[3])
        #else:
        #model_args['C'] = 0.
    if method == 'adaboost':
        if len(sys.argv) > 3 and unicode(sys.argv[3]).isnumeric():
            model_args['n_estimators'] = int(sys.argv[3])
        else:
            model_args['n_estimators'] = 50

    show_progress = True
    print 'Method:', method
    test_model(features, data_size, num_cv_splits, method, show_progress,
               model_args)
예제 #9
0
def main():
    transformer_list = []
    if False:
        transformer_list += SymptomsExtractorTransformerGenerator(['Car','Lno'], 'found', None, 6*30).getSymptoms()
    if False:
        transformer_list += [
                    ('Dia', ICD9_Transformer())
                ]
    if False:
        transformer_list += [ 
                    ('EF', EFTransformer('all', 1, None)),
                    ('EF', EFTransformer('mean', 5, None)),
                    ('EF', EFTransformer('max', 5, None)),
                    ('LBBB', LBBBTransformer()),
                    ('SR', SinusRhythmTransformer()),
                    ('NYHA', NYHATransformer()),
                    ('QRS', QRSTransformer('all', 1, None)),#Bugs with QRS
                ]
    if False:
        transformer_list += [
                    ('Car', FeaturePipeline([
                        ('notes_transformer_car', GetConcatenatedNotesTransformer('Car')),
                        ('tfidf', TfidfTransformer())
                    ])),
                    ('Lno', FeaturePipeline([
                       ('notes_transformer_lno', GetConcatenatedNotesTransformer('Lno')),
                       ('tfidf', TfidfTransformer)
                    ]))
                ]

    if False:
        transformer_list += [
                    #('Car', FeaturePipeline([
                    #    ('notes_transformer_car', GetLatestNotesTransformer('Car', 100)),
                    #    ('notes_aggregator_car', DocumentConcatenatorTransformer()),
                    #    ('trigram', CountVectorizer(ngram_range=(3,3), min_df=2))
                    #])),
                    ('Car', FeaturePipeline([
                        ('notes_transformer_car', GetConcatenatedNotesTransformer('Car')),
                        ('bigram', CountVectorizer(ngram_range=(2,2), min_df=0.05))
                    ])),
                    ('Lno', FeaturePipeline([
                       ('notes_transformer_lno', GetConcatenatedNotesTransformer('Lno')),
                       ('bigram', CountVectorizer(ngram_range=(2,2), min_df=0.05))
                    ]))
                ]


    if True:
        transformer_list += [
                    #('Enc', GetEncountersFeaturesTransformer(100, True)),
                    ('Labs_Counts',FeaturePipeline([
                        ('labs_counts_transformer', GetLabsCountsDictTransformer()),
                        ('dict_vectorizer', DictVectorizer())
                    ])),
                    ('Labs_Low_Counts',FeaturePipeline([
                        ('labs_low_counts_transformer', GetLabsLowCountsDictTransformer()),
                       ('dict_vectorizer', DictVectorizer())
                    ])),
                    ('Labs_High_Counts', FeaturePipeline([
                        ('labs_high_counts_transformer', GetLabsHighCountsDictTransformer()),
                        ('dict_vectorizer', DictVectorizer())
                    ])),
                    #('Labs_Latest_Low', FeaturePipeline([
                    #    ('labs_latest_low_transformer', GetLabsLatestLowDictTransformer()),
                    #    ('dict_vectorizer', DictVectorizer())
                    #])),
                    #('Labs_Latest_High',FeaturePipeline([
                    #    ('labs_latest_high_transformer', GetLabsLatestHighDictTransformer()),
                    #    ('dict_vectorizer', DictVectorizer())
                    #])),
                    #('Labs_History', FeaturePipeline([
                    #    ('labs_history_transformer', GetLabsHistoryDictTransformer([1])),
                    #    ('dict_vectorizer', DictVectorizer())
                    #]))
                ]

    
    features = FeatureUnion(transformer_list)

    if len(sys.argv) > 1 and unicode(sys.argv[1]).isnumeric():
        data_size = min(int(sys.argv[1]), 906)
    else:
        data_size = 25

    if len(sys.argv) > 2 and unicode(sys.argv[2]).isnumeric():
        num_cv_splits = int(sys.argv[2])
    else:
        num_cv_splits = 5

    print "Data size: " + str(data_size)
    print "CV splits: " + str(num_cv_splits)

    #method = 'lr'
    #method = 'svm'
    method = 'adaboost'
    #method = 'cdm'

    model_args = dict()
    if method in ['lr', 'svm']:
        if len(sys.argv) > 3 and unicode(sys.argv[3]).isnumeric():
            model_args['C'] = float(sys.argv[3])
        #else:
            #model_args['C'] = 0.
    if method == 'adaboost':
        if len(sys.argv) > 3 and unicode(sys.argv[3]).isnumeric():
            model_args['n_estimators'] = int(sys.argv[3])
        else:
            model_args['n_estimators'] = 50
        

    show_progress = True
    print 'Method:', method
    test_model(features, data_size, num_cv_splits, method, show_progress, model_args)
예제 #10
0
def main():
    features = FeatureUnion([
        ('Dia', icd9),
        ('EF', EFTransformer('all', 1, None)),
        ('EF', EFTransformer('mean', 5, None)),
        ('EF', EFTransformer('max', 5, None)),
        ('LBBB', LBBBTransformer()),
        #('SR', SinusRhythmTransformer()),
        #('Car_Doc2Vec', Doc2Vec_Note_Transformer('Car', 'doc2vec_models/car_1.model', 10, dbow_file='doc2vec_models/car_dbow.model'))
        # ('QRS', QRSTransformer('all', 1, None)),#Bugs with QRS
        ('car_ngram',
         FeaturePipeline([
             ('notes_car',
              GetConcatenatedNotesTransformer(note_type='Car',
                                              look_back_months=12)),
             ('ngram_car', CountVectorizer(ngram_range=(2, 2), min_df=.05))
         ]))
        #('Car', FeaturePipeline([
        #    ('notes_transformer_car', GetConcatenatedNotesTransformer('Car')),
        #    ('tfidf', car_tfidf)
        #])),
        #('Lno', FeaturePipeline([
        #    ('notes_transformer_lno', GetConcatenatedNotesTransformer('Lno')),
        #    ('tfidf', lno_tfidf)
        #])),
        #('Enc', enc),
        #('Labs_Counts',FeaturePipeline([
        #    ('labs_counts_transformer', GetLabsCountsDictTransformer()),
        #    ('dict_vectorizer', DictVectorizer())
        #])),
        #('Labs_Low_Counts',FeaturePipeline([
        #    ('labs_low_counts_transformer', GetLabsLowCountsDictTransformer()),
        #    ('dict_vectorizer', DictVectorizer())
        #])),
        #('Labs_High_Counts', FeaturePipeline([
        #    ('labs_high_counts_transformer', GetLabsHighCountsDictTransformer()),
        #    ('dict_vectorizer', DictVectorizer())
        #])),
        #('Labs_Latest_Low', FeaturePipeline([
        #    ('labs_latest_low_transformer', GetLabsLatestLowDictTransformer()),
        #    ('dict_vectorizer', DictVectorizer())
        #])),
        #('Labs_Latest_High',FeaturePipeline([
        #    ('labs_latest_high_transformer', GetLabsLatestHighDictTransformer()),
        #    ('dict_vectorizer', DictVectorizer())
        #])),
        # ('Labs_History', FeaturePipeline([
        #     ('labs_history_transformer', GetLabsHistoryDictTransformer([1])),
        #     ('dict_vectorizer', DictVectorizer())
        # ])),
    ])

    if len(sys.argv) > 1 and unicode(sys.argv[1]).isnumeric():
        data_size = min(906, int(sys.argv[1]))
    else:
        data_size = 25

    if len(sys.argv) > 2 and unicode(sys.argv[2]).isnumeric():
        num_cv_splits = int(sys.argv[2])
    else:
        num_cv_splits = 2

    method = 'lr'
    #method = 'svm'

    show_progress = True

    test_model(features, data_size, num_cv_splits, method, show_progress)
예제 #11
0
def main():
    features = FeatureUnion([
                ('Dia', icd9 ),
                ('EF', EFTransformer('all', 1, None)),
                ('EF', EFTransformer('mean', 5, None)),
                ('EF', EFTransformer('max', 5, None)),
                ('LBBB', LBBBTransformer()),
                #('SR', SinusRhythmTransformer()),
                #('Car_Doc2Vec', Doc2Vec_Note_Transformer('Car', 'doc2vec_models/car_1.model', 10, dbow_file='doc2vec_models/car_dbow.model'))
               # ('QRS', QRSTransformer('all', 1, None)),#Bugs with QRS
                ('car_ngram', FeaturePipeline([
                    ('notes_car', GetConcatenatedNotesTransformer(note_type='Car',look_back_months=12)),
                    ('ngram_car', CountVectorizer(ngram_range=(2, 2), min_df=.05))
                ]))
                #('Car', FeaturePipeline([
                #    ('notes_transformer_car', GetConcatenatedNotesTransformer('Car')),
                #    ('tfidf', car_tfidf)
                #])),
                #('Lno', FeaturePipeline([
                #    ('notes_transformer_lno', GetConcatenatedNotesTransformer('Lno')),
                #    ('tfidf', lno_tfidf)
                #])),
                #('Enc', enc),
                #('Labs_Counts',FeaturePipeline([
                #    ('labs_counts_transformer', GetLabsCountsDictTransformer()),
                #    ('dict_vectorizer', DictVectorizer())
                #])),
                #('Labs_Low_Counts',FeaturePipeline([
                #    ('labs_low_counts_transformer', GetLabsLowCountsDictTransformer()),
                #    ('dict_vectorizer', DictVectorizer())
                #])),
                #('Labs_High_Counts', FeaturePipeline([
                #    ('labs_high_counts_transformer', GetLabsHighCountsDictTransformer()),
                #    ('dict_vectorizer', DictVectorizer())
                #])),
                #('Labs_Latest_Low', FeaturePipeline([
                #    ('labs_latest_low_transformer', GetLabsLatestLowDictTransformer()),
                #    ('dict_vectorizer', DictVectorizer())
                #])),
                #('Labs_Latest_High',FeaturePipeline([
                #    ('labs_latest_high_transformer', GetLabsLatestHighDictTransformer()),
                #    ('dict_vectorizer', DictVectorizer())
                #])),
               # ('Labs_History', FeaturePipeline([
               #     ('labs_history_transformer', GetLabsHistoryDictTransformer([1])),
               #     ('dict_vectorizer', DictVectorizer())
               # ])),
            ])


    if len(sys.argv) > 1 and unicode(sys.argv[1]).isnumeric():
        data_size = min(906, int(sys.argv[1]))
    else:
        data_size = 25

    if len(sys.argv) > 2 and unicode(sys.argv[2]).isnumeric():
        num_cv_splits = int(sys.argv[2])
    else:
        num_cv_splits = 2

    method = 'lr'
    #method = 'svm'

    show_progress = True

    test_model(features, data_size, num_cv_splits, method, show_progress)
    print("Model: {}".format(model))

    #Define the data
    testdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_targetthird.csv'
    columns = ['age', 'log(1+lymfmet)', 'n_pos', 'tumsize', 'log(1+er_cyt)', 'log(1+pgr_cyt)', 'pgr_cyt_pos',
               'er_cyt_pos', 'size_gt_20', 'er_cyt', 'pgr_cyt']

    targets = ['time_10y', 'event_10y']
    trainingdata = '/home/gibson/jonask/DataSets/breast_cancer_1/n4369_trainingtwothirds.csv'

    print("Retrieving training data...")
    # Normalize the test data as we normalized the training data
    normP, bah = parse_file(trainingdata, inputcols = columns, targetcols = targets, normalize = False, separator = ',',
                      use_header = True)

    print("Retrieving test data...")
    unNormedTestP, T = parse_file(testdata, inputcols = columns, targetcols = targets, normalize = False, separator = ',',
                      use_header = True)

    print("Normalizing test data...")
    P = normalizeArrayLike(unNormedTestP, normP)

    #Scatter training data
    model_output_file = test_model(model, trainingdata, targets[0], targets[1], ',', time_step_size = 2, *columns)
    scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    #Scatter test data
    model_output_file = test_model_arrays(model, testdata, P, T, time_step_size=2)
    scatterplot_files(model_output_file, 0, 2, model_output_file, 1)
예제 #13
0
    #            'BSI_A7', 'BSI_A8', 'BSI_A9', 'BSI_A10', 'BSI_A11', 'BSI_A12', 'N_A1', 'N_A2', 'N_A3',
    #             'N_A4', 'N_A5', 'N_A6', 'N_A7', 'N_A8', 'N_A9', 'N_A10', 'N_A11', 'N_A12')
    # targets = ['Stid', 'Event']
    # testtargets = []

    print("\nTraining a cox committee...")

    comsize = 1
    if len(sys.argv) > 1:
        comsize = int(sys.argv[1])

    model_file = train_model(filename, columns, targets, separator=separator, comsize=comsize)

    print("\nProceeding with plotting on training data...")

    model_output_file = test_model(model_file, filename, targets[0], targets[1], separator, *columns)

    print("\nModel output stored in {0}".format(model_output_file))

    # Against the same file, just another column
    # scatterplot_files(model_output_file, 0, 2, model_output_file, 1)

    cmd = 'python model_tester.py "{model_file}" YOUR_TEST_FILE_HERE "{0}" "{1}" "{2}"'.format(
        separator, targets[0], targets[1], model_file=model_file
    )
    selfcmd = ""
    if testfilename is not None:
        selfcmd = 'python model_tester.py "{model_file}" "{testfile}" "{0}" "{1}" "{2}"'.format(
            separator, testtargets[0], testtargets[1], model_file=model_file, testfile=testfilename
        )
    for col in columns:
예제 #14
0
    while runs < max_runs:
        episodes = 0
        agent = Agent(stateCnt, actionCnt)
        print("training run ", runs + 1)
        while episodes < MAX_EPISODES:
            env.run(agent)
            episodes = episodes + 1
        #ss = 0  # blah blah
        if ENV_LEARN:
            agent.brain.env_model.env_model.model.save("models/env_model_" +
                                                       MODEL_VER + ".h5")
            agent.brain.env_model.r_model.save("models/r_model_" + MODEL_VER +
                                               ".h5")
        agent.brain.controller.save('models/controller_' + MODEL_VER + ".h5")
        print("testing run ", runs + 1)
        d_cnt, R_cnt = test_model(VAE_VER, MODEL_VER, use_all=False, val=True)
        done_counts.append(d_cnt)
        R_counts.append(R_cnt)
        runs += 1
        #plt.plot(r_history)
        #plt.show()
finally:
    if ENV_LEARN:
        agent.brain.env_model.env_model.model.save("models/env_model_" +
                                                   MODEL_VER + ".h5")
        agent.brain.env_model.r_model.save("models/r_model_" + MODEL_VER +
                                           ".h5")
    agent.brain.controller.save('models/controller_' + MODEL_VER + ".h5")
    done_counts = np.asarray(done_counts)
    R_counts = np.asarray(R_counts)
    print("Done counts: average = ", done_counts.mean(), "max = ",