Ejemplo n.º 1
0
def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)

    elif opt.mode == "ensemble":
        model1 = load_model(opt.saved_model1)
        model1.name = 'model1'
        for layer in model1.layers:
            layer.name = layer.name + str("_1")
        model2 = load_model(opt.saved_model2)
        model2.name = 'model2'
        for layer in model2.layers:
            layer.name = layer.name + str("_2")
        models = [model1, model2]

        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word_models(models, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")


#         x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
#             "data", opt.debug)
#         num_training_data = x_train.shape[0]
#         sequence_length = x_train.shape[1]
#         model_inputs = Input(shape=(sequence_length,), dtype='int32')
#         model = ensemble(models, model_inputs)
#         model.save(opt.model_to_be_saved)

    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")
Ejemplo n.º 2
0
def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size, opt.optimizer)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)

    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")
Ejemplo n.º 3
0
def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop1,
                            opt.drop2, sequence_length, vocabulary_size)
        print("Training Model...")
        model.fit(x_train,
                  y_train,
                  batch_size=opt.batch_size,
                  epochs=opt.epochs,
                  verbose=2,
                  callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)
    else:
        if opt.mode == "score_valid":
            model = load_model(opt.saved_model)
            vocabulary = json.load(open(os.path.join("data", "vocab.json")))
            predict_dict = predict_final_word([model], vocabulary, opt.input)
            sub_file = make_submission(predict_dict, opt.student_id, opt.input)
            scoring(sub_file, os.path.join("data"), type="valid")
        else:
            model0 = load_model('models/model0.h5')
            model1 = load_model('models/model1.h5')
            model2 = load_model('models/model2.h5')
            model3 = load_model('models/model3.h5')
            model4 = load_model('models/model4.h5')
            model5 = load_model('models/model5.h5')
            model6 = load_model('models/model6.h5')
            model7 = load_model('models/model7.h5')
            model8 = load_model('models/model8.h5')
            model9 = load_model('models/model9.h5')
            model_list = [
                model0, model1, model2, model3, model4, model5, model6, model7,
                model8, model9
            ]
            vocabulary = json.load(open(os.path.join("data", "vocab.json")))
            predict_dict = predict_final_word(model_list, vocabulary,
                                              opt.input)
            sub_file = make_submission(predict_dict, opt.student_id, opt.input)
            scoring(sub_file, os.path.join("data"), type="valid")
Ejemplo n.º 4
0
def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.embedding_dim, opt.hidden_size, opt.drop,
                            sequence_length, vocabulary_size)
        print("Traning Model...")
        history = model.fit(
            x_train,
            y_train,
            batch_size=opt.batch_size,
            epochs=opt.epochs,
            verbose=1,
            callbacks=[TestCallback((x_valid, y_valid), model=model)])
        model.save(opt.saved_model)

        # Save the model architecture
        #with open('model_architecture.yaml', 'w') as f:
        #    f.write(model.to_json())

        print("Training cost time: ", time.time() - st)

    else:
        # Model reconstruction from JSON file
        #with open('model_architecture.yaml', 'r') as f:
        #    model = model_from_yaml(f.read())
        model = load_model(
            opt.saved_model,
            custom_objects={'LayerNormalization': LayerNormalization})
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")
Ejemplo n.º 5
0
 def testScoringCase(self):
     prediction_key = Prediction(contract_one=0.00,
                                 contract_two=0.00,
                                 liquidity=100,
                                 resolved=False,
                                 outcome='CONTRACT_ONE',
                                 statement='Test',
                                 end_time=datetime.datetime.now()).put()
     user_key = Profile(balance=100,
                        user_ledger=[
                            LedgerRecords(
                                prediction_id=prediction_key.urlsafe(),
                                contract_one=10.00,
                                contract_two=0.00)
                        ]).put()
     trade_key = Trade(prediction_id=prediction_key,
                       user_id=user_key,
                       direction='BUY',
                       contract='CONTRACT_ONE',
                       quantity=10).put()
     user = user_key.get()
     audit = scoring()
     self.assertEqual(10, audit[0]['earned'])
Ejemplo n.º 6
0
def main(opt):
    os.environ['CUDA_VISIBLE_DEVICES'] = opt.gpu
    np.random.seed(opt.seed)  # set a seed for reproduciaiblity
    if opt.mode == "train":
        st = time.time()
        print('Loading data')
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        model = build_model(opt.model, opt.embedding_dim, opt.hidden_size,
                            opt.drop, opt.filter, sequence_length,
                            vocabulary_size)
        adam = Adam()
        model.compile(loss='sparse_categorical_crossentropy', optimizer=adam)
        print("Traning Model...")
        checkpoint = ModelCheckpoint(opt.saved_model,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='min')
        early = EarlyStopping(monitor="val_loss", mode="min", patience=5)
        history = model.fit(x_train,
                            y_train,
                            batch_size=opt.batch_size,
                            epochs=100,
                            verbose=1,
                            validation_data=(x_valid, y_valid),
                            callbacks=[
                                TestCallback((x_valid, y_valid), model=model),
                                checkpoint, early
                            ])
        model.save(opt.saved_model)
        print("Training cost time: ", time.time() - st)
    elif opt.mode == "ensemble":
        x_train, y_train, x_valid, y_valid, vocabulary_size = load_data(
            "data", opt.debug)

        num_training_data = x_train.shape[0]
        sequence_length = x_train.shape[1]
        print(num_training_data)

        print('Vocab Size', vocabulary_size)

        ENSEMBLE_DIR = "models/ensemble/"
        model_files = []
        for (dirpath, dirnames, filenames) in os.walk(ENSEMBLE_DIR):
            model_files.extend(filenames)
            break
        models = []
        model_count = 0
        for filename in model_files:
            model = load_model(ENSEMBLE_DIR + filename)
            model.name = "model" + str(model_count)
            model_count += 1
            models.append(model)

        build_save_ensemble_model(opt.saved_model, models, sequence_length)
    else:
        model = load_model(opt.saved_model)
        vocabulary = json.load(open(os.path.join("data", "vocab.json")))
        predict_dict = predict_final_word(model, vocabulary, opt.input)
        sub_file = make_submission(predict_dict, opt.student_id, opt.input)
        if opt.score:
            scoring(sub_file, os.path.join("data"), type="valid")
Ejemplo n.º 7
0
 def on_epoch_end(self, epoch, logs={}):
     x, y = self.test_data
     predict_dict = predict_final_word(self.model, self.vocabulary, self.filename)
     sub_file = make_submission(predict_dict, opt.student_id, opt.input)
     scoring(sub_file, os.path.join("data"), type="valid")