def model1(maxlen, batch_size, num_epochs, w2v, traindf, cvdf): train_gen = batch_generator( df=traindf, encoder=lambda b: data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen), batch_size=batch_size, force_batch_size=True) cv_gen = batch_generator( df=cvdf, encoder=lambda b: data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen), batch_size=batch_size) # creates the neural network model = Sequential() model.add(LSTM(60, input_dim=300, return_sequences=True)) model.add(Dropout(0.5)) model.add(LSTM(60)) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) # compiles the model model.compile('rmsprop', 'binary_crossentropy', metrics=['accuracy', 'mse']) nb_val_samples = len(cvdf) return model, train_gen, cv_gen, nb_val_samples
def test_encoder(b): encoded = data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen, labeled=False, categorical=categorical) return [encoded, encoded, encoded]
def encoder(b): encoded = data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen, labeled=False) return [encoded, encoded, encoded]
def encoder(b): encoded_x, encoded_y = data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen) return [[encoded_x, encoded_x, encoded_x], encoded_y]
def encoder(b): encoded_x, encoded_y = data_init.encode_w2v(df=b, w2v=w2v, maxlen=maxlen, categorical=categorical) return [[encoded_x, encoded_x, encoded_x], encoded_y]
batch_size=batch_size, num_epochs=num_epochs, w2v=w2v, traindf=traindf, cvdf=cvdf) # trains the model print('Training model...') train(model, nn_name, samples_per_epoch, num_epochs, train_gen, cv_gen, nb_val_samples) # generates the output file print('Getting predictions...') testdf = pd.read_csv('../data/test.csv') testdf = data_init.clean_df(testdf, labeled=False) test_encoder = lambda b: data_init.encode_w2v( df=b, w2v=w2v, maxlen=maxlen, labeled=False) data_init.output_results(model, testdf, test_encoder, batch_size) elif '--w2v-2' in sys.argv: # loads the train and Cross-Validation DataFrames print('Loading data...') traindf = pd.read_csv('data/train.csv') cvdf = pd.read_csv('data/cv.csv') print(len(traindf), 'train sequences') print(len(cvdf), 'cv sequences') maxlen = 400 # all texts are set to this length (either padding or truncating them) batch_size = 50 # training batch size nn_name = 'w2v-convX3-lstmX2-regression' # name of the NN (used for saving the model and logs) num_epochs = 70 # number of epochs to train