コード例 #1
0
def train():
    global model
    data_reset = request.get_json()
    X = [data_reset['user_history'], data_reset['item_history']]
    y = data_reset['rating_history']
    nb_users, nb_items = data_reset['nb_users'], data_reset['nb_items']

    model = EmbeddingModel(nb_users, nb_items, embedding_size=30)
    model.fit(X, y, verbose=True)

    return jsonify({'info': 'successful'})
コード例 #2
0
def main():
    obj_etl = ETLDataPipeline("data/train.csv", "data/test.csv")
    train, test = obj_etl.read_data()
    train = obj_etl.drop_cols([
        'id', 'bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4', 'nom_0', 'nom_1',
        'nom_2', 'nom_3', 'nom_4', 'ord_0', 'ord_1', 'ord_2', 'ord_3', 'ord_4',
        'ord_5', 'day', 'month'
    ])
    train = obj_etl.convert_dtypes(
        ['nom_5', 'nom_6', 'nom_7', 'nom_8', 'nom_9'])
    train = obj_etl.encoder(['nom_5', 'nom_6', 'nom_7', 'nom_8', 'nom_9'])
    target = obj_etl.get_target('target')
    X_train, X_valid, y_train, y_valid = obj_etl.get_train_test(train, target)

    val1 = np.reshape(X_train['nom_5'].values, (-1, 1))
    val2 = np.reshape(X_train['nom_6'].values, (-1, 1))
    val3 = np.reshape(X_train['nom_7'].values, (-1, 1))
    val4 = np.reshape(X_train['nom_8'].values, (-1, 1))
    val5 = np.reshape(X_train['nom_9'].values, (-1, 1))
    val6 = np.reshape(y_train.values, (-1, 1))

    val11 = np.reshape(X_valid['nom_5'].values, (-1, 1))
    val22 = np.reshape(X_valid['nom_6'].values, (-1, 1))
    val33 = np.reshape(X_valid['nom_7'].values, (-1, 1))
    val44 = np.reshape(X_valid['nom_8'].values, (-1, 1))
    val55 = np.reshape(X_valid['nom_9'].values, (-1, 1))
    val66 = np.reshape(y_valid.values, (-1, 1))

    tf.random.set_seed(0)

    # 100 is number of epochs, 32 is batch size
    s = 100 * len(X_train) // 32
    learning_rate = tf.keras.optimizers.schedules.ExponentialDecay(
        0.01, s, 0.1)
    opt = tf.keras.optimizers.Adam(learning_rate)

    model = EmbeddingModel(
        hidden_units=3,
        output_units=1,
        embeddings_initializer=tf.random.normal,
        kernel_initializer=tf.keras.initializers.he_uniform(seed=0),
        dropout_rate=0.4,
        activation="sigmoid",
        trainable=True)
    model.compile(loss=tf.keras.losses.binary_crossentropy,
                  metrics=['accuracy'],
                  optimizer=opt)
    baseline_history = model.fit(
        (val1, val2, val3, val4, val5),
        val6,
        epochs=10,
        batch_size=32,
        validation_data=((val11, val22, val33, val44, val55), val66),
        class_weight={
            0: 0.5,
            1: 0.5
        })
コード例 #3
0
def main():
    # hyper parameter setting 
    emb_dim = 50 
    epochs = 2
    model_path = 'model.h5'
    negative_samples = 1
    num_words = 10000 
    window_size = 1
    
    # corpus
    text = load_data(filepath = '../chap04/data/ja.text8')
    
    # vocablary 
    vocab = build_vocablary(text, num_words) 
    
    # create dataset 
    x, y = create_dataset(text, vocab, num_words, window_size, negative_samples)
    
    # construction of model
    model = EmbeddingModel(num_words, emb_dim)
    model = model.build()
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy')
    
    # callback 
    callbacks = [
        EarlyStopping(patience=1),
        ModelCheckpoint(model_path, save_best_only=True)
    ]
    
    # model 
    model.fit(x=x,y=y,
              batch_size=128,
              epochs=epochs,
              validation_split=0.2,
              callbacks=callbacks)
    
    # prediction 
    model = load_model(model_path)
    api = InferenceAPI(model, vocab)
    pprint(api.most_similar(word='日本'))