Ejemplo n.º 1
0
def main():
    args = parse_arg()
    param = json.load(open(args.config_path, "r"))
    model, train_processed, valid_processed = prepare_model_and_data(param)

    hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                          bin_size=30,
                                                          hist_mode='LCH')
    tprint("defining generator")
    train_generator = mz.DataGenerator(train_processed,
                                       batch_size=param['batch_size'],
                                       shuffle=True,
                                       callbacks=[hist_callback])

    valid_x, valid_y = valid_processed.unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                               x=valid_x,
                                               y=valid_y,
                                               batch_size=len(valid_x))
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                  patience=1,
                                                  verbose=1,
                                                  mode='min')
    tprint("fitting")
    callbacks = [evaluate]

    if param['early_stop']:
        callbacks.append(early_stop)
    history = model.fit_generator(train_generator,
                                  epochs=100,
                                  callbacks=callbacks,
                                  workers=5,
                                  use_multiprocessing=False)
    def build(self, data_pack, **kwargs) -> DataGenerator:
        """
        Build a DataGenerator.

        :param data_pack: DataPack to build upon.
        :param kwargs: Additional keyword arguments to override the keyword
            arguments passed in `__init__`.
        """
        return mz.DataGenerator(data_pack, **{**self._kwargs, **kwargs})
Ejemplo n.º 3
0
def train(topic_number, embedding, model_type='drmm'):

    task = mz.tasks.Ranking()
    train_raw = train_data(topic_number)

    if model_type == 'dense':
        train_processed, model = dense_preprocess(train_raw, task)
        if model.params.completed():
            model.build()
            model.compile()
            x, y = train_processed.unpack()
            model.fit(x, y, batch_size=32, epochs=5)
            if not os.path.exists(os.path.join(MODEL_DUMP, MODEL_TYPE)):
                os.makedirs(os.path.join(MODEL_DUMP, MODEL_TYPE))
            model.save(os.path.join(MODEL_DUMP, MODEL_TYPE, str(topic_number)))

    if model_type == 'drmm':
        # glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=300)
        train_processed, preprocessor, model = drmm_preprocess(train_raw, task, embed_out_dim=embedding.output_dim)

        if model.params.completed():
            model.build()
            model.compile()
            embedding_matrix = embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
            # normalize the word embedding for fast histogram generating.
            l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
            embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]
            model.load_embedding_matrix(embedding_matrix)
            hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                                  bin_size=30,
                                                                  hist_mode='LCH')
            train_generator = mz.DataGenerator(train_processed,
                                               mode='point',
                                               num_dup=5,
                                               num_neg=10,
                                               batch_size=20,
                                               callbacks=[hist_callback])
            history = model.fit_generator(train_generator,
                                          epochs=30,
                                          workers=30,
                                          use_multiprocessing=True)

            if not os.path.exists(os.path.join(MODEL_DUMP, MODEL_TYPE)):
                os.makedirs(os.path.join(MODEL_DUMP, MODEL_TYPE))
            model.save(os.path.join(MODEL_DUMP, MODEL_TYPE, str(topic_number)))
Ejemplo n.º 4
0
def get_model_and_data(topic_number, d_pack_test, model_type, embedding):

    if model_type == 'dense':
        # load model
        model = mz.load_model(os.path.join(MODEL_DUMP, MODEL_TYPE, str(topic_number)))

        # prepare preprocessor
        train_raw = train_data(topic_number)
        preprocessor = mz.preprocessors.BasicPreprocessor()
        preprocessor.fit(train_raw)

        # transform document data
        test_processed = preprocessor.transform(d_pack_test)
        test_x, test_y = test_processed.unpack()

    if model_type == 'drmm':
        # load model
        model = mz.load_model(os.path.join(MODEL_DUMP, MODEL_TYPE, str(topic_number)))
        task = mz.tasks.Ranking()
        train_raw = train_data(topic_number)
        preprocessor = mz.preprocessors.BasicPreprocessor(fixed_length_left=10,
                                                          fixed_length_right=100,
                                                          remove_stop_words=False)
        preprocessor.fit(train_raw)

        test_processed = preprocessor.transform(d_pack_test)
        embedding_matrix = embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
        # normalize the word embedding for fast histogram generating.
        l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
        embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]
        model.load_embedding_matrix(embedding_matrix)
        hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                              bin_size=30,
                                                              hist_mode='LCH')
        test_generator = mz.DataGenerator(data_pack=test_processed, mode='point',
                                          callbacks=[hist_callback])
        test_x, test_y = test_generator[:]

    return model, test_x
Ejemplo n.º 5
0
def test_resample():
    model = mz.models.Naive()
    prpr = model.get_default_preprocessor()
    data_raw = mz.datasets.toy.load_data()
    data = prpr.fit_transform(data_raw)
    model.params.update(prpr.context)
    model.params['task'] = mz.tasks.Ranking()
    model.build()
    model.compile()

    data_gen = mz.DataGenerator(
        data_pack=data,
        mode='pair',
        resample=True,
        batch_size=4
    )

    class CheckResample(keras.callbacks.Callback):
        def __init__(self, data_gen):
            super().__init__()
            self._data_gen = data_gen
            self._orig_indices = None
            self._flags = []

        def on_epoch_end(self, epoch, logs=None):
            curr_indices = self._data_gen.batch_indices
            if not self._orig_indices:
                self._orig_indices = copy.deepcopy(curr_indices)
            else:
                self._flags.append(self._orig_indices != curr_indices)
                self._orig_indices = curr_indices

    check_resample = CheckResample(data_gen)
    model.fit_generator(data_gen, epochs=5, callbacks=[check_resample])
    assert check_resample._flags
    assert all(check_resample._flags)
Ejemplo n.º 6
0
def test_generator(request, test_data_processed):
    return mz.DataGenerator(test_data_processed)
Ejemplo n.º 7
0
    mz.metrics.NormalizedDiscountedCumulativeGain(k=3),
    mz.metrics.MeanAveragePrecision()
]

# #Initialize the model, fine-tune the hyper-parameters.
model = mz.models.DSSM()
model.params['input_shapes'] = preprocessor.context['input_shapes']
model.params['task'] = ranking_task
model.guess_and_fill_missing_params()
model.build()
model.compile()

# #Generate pair-wise training data on-the-fly, evaluate model performance using customized callbacks on validation data.
# #WARNING: PairDataGenerator will be deprecated in MatchZoo v2.2. Use `DataGenerator` with callbacks instead.
# #train_generator = mz.PairDataGenerator(train_processed, num_dup=1, num_neg=4, batch_size=64, shuffle=True)
train_generator = mz.DataGenerator(train_processed,
                                   num_dup=1,
                                   num_neg=4,
                                   batch_size=64,
                                   shuffle=True)
valid_x, valid_y = valid_processed.unpack()
evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=valid_x,
                                           y=valid_y,
                                           batch_size=len(valid_x))
history = model.fit_generator(train_generator,
                              epochs=20,
                              callbacks=[evaluate],
                              workers=5,
                              use_multiprocessing=False)
Ejemplo n.º 8
0
def main2():
    args = parse_arg()
    param = json.load(open(args.config_path, "r"))
    tprint("Loading data")
    preprocessor, train_processed, valid_processed = drmm_processed()
    print(train_processed)
    tprint("Defining task")
    classification_task = mz.tasks.classification.Classification()
    classification_task.metrics = ['accuracy']
    output_dim = 300
    tprint('output_dim : {}'.format(output_dim))
    # Initialize the model, fine-tune the hyper-parameters.
    tprint("building model")
    model = get_drmm_model(preprocessor, classification_task, output_dim)
    # for key, v in param.items():
    #     if key in model.params:
    #         model.params[key] = v
    #
    #model.guess_and_fill_missing_params(verbose=1)

    step_per_epoch = 423 * 128
    num_max_steps = 100 * step_per_epoch
    #
    # if 'lr_decay' in param and param['lr_decay']:
    #     lr = tf.keras.optimizers.schedules.ExponentialDecay(
    #         initial_learning_rate=param['lr'],
    #         decay_steps=num_max_steps / 20,
    #         decay_rate=0.9)
    # else:
    #     lr = param['lr']
    # model.params['optimizer'] = tf.keras.optimizers.Adam(learning_rate=lr)

    model.build()
    model.compile()
    tprint("processing embedding")
    term_index = preprocessor.context['vocab_unit'].state['term_index']
    embedding_matrix = prepare_embedding(output_dim, term_index)
    model.load_embedding_matrix(embedding_matrix)

    hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                          bin_size=30,
                                                          hist_mode='LCH')
    tprint("defining generator")
    train_generator = mz.DataGenerator(train_processed,
                                       batch_size=param['batch_size'],
                                       shuffle=True,
                                       callbacks=[hist_callback])

    valid_x, valid_y = valid_processed.unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                               x=valid_x,
                                               y=valid_y,
                                               batch_size=len(valid_x))
    early_stop = tf.keras.callbacks.EarlyStopping(monitor='loss',
                                                  patience=1,
                                                  verbose=1,
                                                  mode='min')
    tprint("fitting")
    callbacks = [evaluate]

    # if param['early_stop']:
    #     callbacks.append(early_stop)
    history = model.fit_generator(train_generator,
                                  epochs=100,
                                  callbacks=callbacks,
                                  workers=30,
                                  use_multiprocessing=True)
Ejemplo n.º 9
0
def drmm_api(qpool, logdir, dataset_path, train_id, parameter):
    keras.backend.clear_session()
    # load数据并创建preprocessor对象
    train_pack = load_train_data(train_id, parameter['existing_dataset'], parameter['task'])
    predict_pack = load_test_data(train_id, parameter['existing_dataset'], parameter['task'])
    preprocessor = mz.preprocessors.BasicPreprocessor(fixed_length_left=10, fixed_length_right=100,
                                                      remove_stop_words=False)
    # 重定向stderr到log文件
    logdir.set_preprocess_id(train_id)
    err_old = sys.stderr
    sys.stderr = logdir
    # preprocessor.fit的内容写出到log,写完后关闭重定向,保存preprocessor
    train_pack_processed = preprocessor.fit_transform(train_pack)
    sys.stderr = err_old
    preprocessor.save(ROOT_PATH + 'matchzoo_temp_files/preprocessors/' + train_id + '.drmm_preprocessor')
    predict_pack_processed = preprocessor.transform(predict_pack)
    with open(ROOT_PATH + 'matchzoo_temp_files/logger/' + train_id + '.preprocess_log', 'a') as f:
        f.write('Preprocess finished!')
    ranking_task = mz.tasks.Ranking(loss=mz.losses.RankHingeLoss())
    ranking_task.metrics = [
        mz.metrics.NormalizedDiscountedCumulativeGain(k=3),
        mz.metrics.NormalizedDiscountedCumulativeGain(k=5),
        mz.metrics.MeanAveragePrecision()
    ]
    model = mz.models.DRMMTKS()
    bin_size = 30
    #model.params['input_shapes'] = [[10, ], [10, bin_size, ]]
    model.params.update(preprocessor.context)
    model.params['task'] = ranking_task
    model.params['mask_value'] = parameter['mask_value']
    model.params['embedding_input_dim'] = preprocessor.context['vocab_size']
    model.params['embedding_output_dim'] = parameter['embedding_output_dim']
    model.params['mlp_num_layers'] = parameter['mlp_num_layers']
    model.params['mlp_num_units'] = parameter['mlp_num_units']
    model.params['mlp_num_fan_out'] = parameter['mlp_num_fan_out']
    model.params['top_k'] = parameter['top_k']
    model.params['mlp_activation_func'] = 'relu'
    model.params['optimizer'] = 'adadelta'
    model.guess_and_fill_missing_params()
    model.build()
    model.compile()
    model.backend.summary()
    glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=300)
    embedding_matrix = glove_embedding.build_matrix(preprocessor.context['vocab_unit'].state['term_index'])
    l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
    embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]
    model.load_embedding_matrix(embedding_matrix)
    '''
    pred_generator = mz.HistogramDataGenerator(data_pack=predict_pack_processed,
                                               embedding_matrix=embedding_matrix,
                                               bin_size=bin_size,
                                               hist_mode='LCH')
    pred_x, pred_y = pred_generator[:]
    '''
    pred_x, pred_y = predict_pack_processed.unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                               x=pred_x,
                                               y=pred_y,
                                               once_every=1,
                                               batch_size=len(pred_y)
                                               )
    train_generator = mz.DataGenerator(train_pack_processed, mode='pair',
                                                    num_dup=2, num_neg=1, batch_size=20)
    # 重定向stdout到log当中
    qpool.set_trainid(train_id)
    old = sys.stdout
    sys.stdout = qpool
    model.fit_generator(train_generator, epochs=parameter['epochs'], callbacks=[evaluate], workers=5, use_multiprocessing=False)
    sys.stdout = old
    model.save(ROOT_PATH + 'matchzoo_temp_files/models/' + train_id + '.drmm_model')
Ejemplo n.º 10
0
embedding_matrix = glove_embedding.build_matrix(
    preprocessor.context['vocab_unit'].state['term_index'])
#normalize the word embedding for fast histogram generating.
l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]

model.load_embedding_matrix(embedding_matrix)

print("embedding matrix loaded")

hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                      bin_size=30,
                                                      hist_mode='LCH')

pred_generator = mz.DataGenerator(dev_pack_processed,
                                  mode='point',
                                  callbacks=[hist_callback])

print("pred generator")

pred_x, pred_y = pred_generator[:]
evaluate = mz.callbacks.EvaluateAllMetrics(
    model,
    x=pred_x,
    y=pred_y,
    once_every=2,
    batch_size=len(pred_y),
    model_save_path='./pretrained_models/drmm_pretrained_model_fold' + fold +
    '/')

train_generator = mz.DataGenerator(train_pack_processed,
Ejemplo n.º 11
0
    model.load_embedding_matrix(embeddingMatrix)

    print('Unpacking test data')

    xTest, yTest = dataTestProc.unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                               x=xTest,
                                               y=yTest,
                                               batch_size=len(xTest))

    print('Generating training data!')

    # This needs to use the processed data!
    trainGenerator = mz.DataGenerator(dataTrainProc,
                                      mode='pair',
                                      num_dup=5,
                                      num_neg=1,
                                      batch_size=BATCH_SIZE)
    print('num batches:', len(trainGenerator))
    history = model.fit_generator(trainGenerator,
                                  epochs=epochQty,
                                  callbacks=[evaluate],
                                  workers=WORKERS_QTY,
                                  use_multiprocessing=USE_MULTI_PROC)

    model.save(modelFile)
    print(model.evaluate(xTest, yTest, batch_size=128))

#except:
# tb is traceback
#type, value, tb = sys.exc_info()
Ejemplo n.º 12
0
model.backend.summary()

glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=300)
embedding_matrix = glove_embedding.build_matrix(
    preprocessor.context['vocab_unit'].state['term_index'])
# normalize the word embedding for fast histogram generating.
l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]
model.load_embedding_matrix(embedding_matrix)

hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                      bin_size=30,
                                                      hist_mode='LCH')

test_generator = mz.DataGenerator(test_processed,
                                  mode='point',
                                  callbacks=[hist_callback])
test_x, test_y = test_generator[:]

eval_generator = mz.DataGenerator(validation_processed,
                                  mode='point',
                                  callbacks=[hist_callback])
val_x, val_y = eval_generator[:]

evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=val_x,
                                           y=val_y,
                                           batch_size=len(val_y))
callback_earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                       mode='min',
                                                       verbose=0,
Ejemplo n.º 13
0
model.backend.summary()

embedding_matrix = embedding.build_matrix(
    preprocessor.context['vocab_unit'].state['term_index'])
model.load_embedding_matrix(embedding_matrix)

test_x, test_y = test_pack_processed.unpack()
evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=test_x,
                                           y=test_y,
                                           batch_size=len(test_x))

train_generator = mz.DataGenerator(train_pack_processed,
                                   mode='pair',
                                   num_dup=2,
                                   num_neg=1,
                                   batch_size=4)
# X, y = train_pack_processed.unpack()
# print('num batches:', len(train_generator))

# early_stop = EarlyStopping(monitor=mz.metrics.NormalizedDiscountedCumulativeGain(k=1), mode="max", patience=1)
# check_point = ModelCheckpoint(model_path + "best_model.h5",
#                               monitor=mz.metrics.NormalizedDiscountedCumulativeGain(k=1),
#                               verbose=1, save_best_only=True, mode="min")
history = model.fit_generator(train_generator,
                              epochs=10,
                              callbacks=[evaluate],
                              workers=1)
# model.fit(x=X, y=y, batch_size=32, epochs=100, callbacks=[evaluate, early_stop])
Ejemplo n.º 14
0
def tutorial_drmm():
    # data = get_processed_data()
    # data = knrm_processed()
    print("Loading data")
    data = get_processed_data_from_cache()
    preprocessor, train_processed, valid_processed = data
    # save_to_pickle(data, "matchzoo_prac1")
    print("Defining task")
    ranking_task = mz.tasks.Ranking(loss=mz.losses.RankCrossEntropyLoss(
        num_neg=4))
    ranking_task.metrics = [
        mz.metrics.NormalizedDiscountedCumulativeGain(k=3),
        mz.metrics.MeanAveragePrecision()
    ]
    glove_embedding = mz.datasets.embeddings.load_glove_embedding(
        dimension=300)
    print('output_dim', glove_embedding.output_dim)

    #Initialize the model, fine-tune the hyper-parameters.
    print("building model")
    bin_size = 30
    model = mz.models.DRMM()
    model.params.update(preprocessor.context)
    model.params['input_shapes'] = [[
        10,
    ], [
        10,
        bin_size,
    ]]
    model.params['task'] = ranking_task
    model.params['mask_value'] = 0
    model.params['embedding_output_dim'] = glove_embedding.output_dim
    model.params['mlp_num_layers'] = 1
    model.params['mlp_num_units'] = 10
    model.params['mlp_num_fan_out'] = 1
    model.params['mlp_activation_func'] = 'tanh'
    model.params['optimizer'] = 'adadelta'
    model.build()
    model.compile()
    model.backend.summary()
    embedding_matrix = glove_embedding.build_matrix(
        preprocessor.context['vocab_unit'].state['term_index'])
    print(embedding_matrix.shape)
    # normalize the word embedding for fast histogram generating.
    l2_norm = np.sqrt((embedding_matrix * embedding_matrix).sum(axis=1))
    print(l2_norm.shape)
    embedding_matrix = embedding_matrix / l2_norm[:, np.newaxis]
    print(embedding_matrix.shape)
    model.load_embedding_matrix(embedding_matrix)
    hist_callback = mz.data_generator.callbacks.Histogram(embedding_matrix,
                                                          bin_size=30,
                                                          hist_mode='LCH')
    pred_generator = mz.DataGenerator(valid_processed,
                                      mode='point',
                                      callbacks=[hist_callback])
    pred_x, pred_y = pred_generator[:]
    print("defining generator")
    train_generator = mz.DataGenerator(train_processed,
                                       mode='pair',
                                       num_dup=5,
                                       num_neg=10,
                                       batch_size=20,
                                       callbacks=[hist_callback])

    valid_x, valid_y = valid_processed.unpack()
    evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                               x=valid_x,
                                               y=valid_y,
                                               batch_size=len(valid_x))
    print("fitting")
    history = model.fit_generator(train_generator,
                                  epochs=20,
                                  callbacks=[evaluate],
                                  workers=5,
                                  use_multiprocessing=False)
Ejemplo n.º 15
0
def data_gen():
    return mz.DataGenerator(mz.datasets.toy.load_data())
Ejemplo n.º 16
0

if __name__ == '__main__':
    nltk.download('punkt')
    train_data_path = 'my_train_data.csv'
    test_data_path = 'my_test_data.csv'
    train_data = load_data(train_data_path)  # 这里就是上面数据格式转换的训练集和测试集路径
    test_data = load_data(test_data_path)
    
    train_dev_split = int(len(train_data) * 0.9)  # 验证集占训练数据的0.1
    train = train_data[:train_dev_split]
    dev = train_data[train_dev_split:]
    train_pack_processed = preprocessor.fit_transform(train)  # 其实就是做了一个字符转id操作,所以对于中文文本,不需要分词
    dev_pack_processed = preprocessor.transform(dev)  
    test_pack_processed = preprocessor.transform(test_data)
    train_data_generator = mz.DataGenerator(train_pack_processed, batch_size=32, shuffle=True)  # 训练数据生成器

    test_x, test_y = test_pack_processed.unpack()
    dev_x, dev_y = dev_pack_processed.unpack()


    model = build()
    batch_size = 32

    evaluate = mz.callbacks.EvaluateAllMetrics(model, x=dev_x, y=dev_y, batch_size=batch_size)
    model.fit_generator(train_data_generator, epochs=5, callbacks=[evaluate], workers=5, use_multiprocessing=False)
    y_pred = model.predict(test_x)

    left_id = test_x['id_left']
    right_id = test_x['id_right']
    assert (len(left_id) == len(left_id))
Ejemplo n.º 17
0
def train_generator(request, train_data_processed):
    return mz.DataGenerator(train_data_processed)
model.params['dropout_rate'] = 0.33

print(model.params)
model.build()
model.compile()
model.backend.summary()

glove_embedding = mz.datasets.embeddings.load_glove_embedding(dimension=300)
embedding_matrix = glove_embedding.build_matrix(
    preprocessor.context['vocab_unit'].state['term_index'])
model.load_embedding_matrix(embedding_matrix)

dpool_callback = mz.data_generator.callbacks.DynamicPooling(
    fixed_length_left=20, fixed_length_right=20)

val_generator = mz.DataGenerator(validation_processed,
                                 callbacks=[dpool_callback])
val_x, val_y = val_generator[:]

test_generator = mz.DataGenerator(test_processed, callbacks=[dpool_callback])
test_x, test_y = test_generator[:]

evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=val_x,
                                           y=val_y,
                                           batch_size=len(val_y))
callback_earlystopping = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                       mode='min',
                                                       verbose=0,
                                                       patience=100,
                                                       min_delta=0.001)
mcp_save = keras.callbacks.ModelCheckpoint('best_one_matchpyramid',
Ejemplo n.º 19
0
model.params.update(preprocessor.context)
model.params['task'] = ranking_task
model.params['filters'] = 64
model.params['conv_activation_func'] = 'relu'
model.params['optimizer'] = 'adam'
model.guess_and_fill_missing_params(verbose=0)
model.build()
model.compile()

pred_x, pred_y = valid_processed.unpack()
evaluate = mz.callbacks.EvaluateAllMetrics(model,
                                           x=pred_x,
                                           y=pred_y,
                                           batch_size=len(pred_y))
x = int(input('Enter number of batchs:'))
data_generator = mz.DataGenerator(train_processed, batch_size=x)

x = int(input('Enter number of epochs:'))
history = model.fit_generator(data_generator,
                              epochs=x,
                              callbacks=[evaluate],
                              use_multiprocessing=True,
                              workers=max(1,
                                          multiprocessing.cpu_count() - 1))

pred_x, pred_y = test_processed.unpack()
result = model.predict(pred_x)
final_res = []
for x in result:
    final_res.append(x[1])
Ejemplo n.º 20
0
model.params['mlp_num_fan_out'] = 100
model.params['mlp_activation_func'] = 'relu'
model.params['dropout_rate'] = 0.5
model.params['optimizer'] = 'adam'
model.guess_and_fill_missing_params()
model.build()
model.compile()

print("hyper-parameters: ")
hparams = dict(model.params, **hparams)
pprint(hparams)

print("start to train...")
# train & save model
train_generator = mz.DataGenerator(train_pack_processed,
                                   batch_size=hparams['batch_size'],
                                   shuffle=True)
model.fit_generator(train_generator,
                    epochs=hparams['n_epochs'],
                    use_multiprocessing=False,
                    workers=1,
                    verbose=0)
model.save(models_dir)

# evaluate on test dataset
valid_x, valid_y = valid_pack_processed.unpack()
preds = model.predict(valid_x)
y_trues, y_preds = [], []
for pred, y in zip(preds, valid_y):
    y_true = y[1]
    y_trues.append(int(y[1]))