Exemple #1
0
def model1(maxlen, batch_size, num_epochs, w2v, traindf, cvdf):
    train_gen = batch_generator(
        df=traindf,
        encoder=lambda b: data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen),
        batch_size=batch_size,
        force_batch_size=True)
    cv_gen = batch_generator(
        df=cvdf,
        encoder=lambda b: data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen),
        batch_size=batch_size)

    # creates the neural network
    model = Sequential()
    model.add(LSTM(60, input_dim=300, return_sequences=True))
    model.add(Dropout(0.5))
    model.add(LSTM(60))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    # compiles the model
    model.compile('rmsprop',
                  'binary_crossentropy',
                  metrics=['accuracy', 'mse'])

    nb_val_samples = len(cvdf)
    return model, train_gen, cv_gen, nb_val_samples
Exemple #2
0
 def test_encoder(b):
     encoded = data_init.encode_w2v(df=b,
                                    w2v=w2v,
                                    maxlen=maxlen,
                                    labeled=False,
                                    categorical=categorical)
     return [encoded, encoded, encoded]
Exemple #3
0
 def encoder(b):
     encoded = data_init.encode_w2v(df=b,
                                    w2v=w2v,
                                    maxlen=maxlen,
                                    labeled=False)
     return [encoded, encoded, encoded]
Exemple #4
0
 def encoder(b):
     encoded_x, encoded_y = data_init.encode_w2v(df=b,
                                                 w2v=w2v,
                                                 maxlen=maxlen)
     return [[encoded_x, encoded_x, encoded_x], encoded_y]
Exemple #5
0
 def encoder(b):
     encoded_x, encoded_y = data_init.encode_w2v(df=b,
                                                 w2v=w2v,
                                                 maxlen=maxlen,
                                                 categorical=categorical)
     return [[encoded_x, encoded_x, encoded_x], encoded_y]
Exemple #6
0
            batch_size=batch_size,
            num_epochs=num_epochs,
            w2v=w2v,
            traindf=traindf,
            cvdf=cvdf)

        # trains the model
        print('Training model...')
        train(model, nn_name, samples_per_epoch, num_epochs, train_gen, cv_gen,
              nb_val_samples)

        # generates the output file
        print('Getting predictions...')
        testdf = pd.read_csv('../data/test.csv')
        testdf = data_init.clean_df(testdf, labeled=False)
        test_encoder = lambda b: data_init.encode_w2v(
            df=b, w2v=w2v, maxlen=maxlen, labeled=False)
        data_init.output_results(model, testdf, test_encoder, batch_size)

    elif '--w2v-2' in sys.argv:
        # loads the train and Cross-Validation DataFrames
        print('Loading data...')
        traindf = pd.read_csv('data/train.csv')
        cvdf = pd.read_csv('data/cv.csv')

        print(len(traindf), 'train sequences')
        print(len(cvdf), 'cv sequences')

        maxlen = 400  # all texts are set to this length (either padding or truncating them)
        batch_size = 50  # training batch size
        nn_name = 'w2v-convX3-lstmX2-regression'  # name of the NN (used for saving the model and logs)
        num_epochs = 70  # number of epochs to train