Example #1
0
#sys.argv[4] = max length of sentence to consider (currently using 30); needed to handle fixed size of input matrix
#sys.argv[5] = number of layers
#sys.argv[6] = c_size
#sys.argv[7] = # of epochs
#sys.argv[8] = loss function
#sys.argv[9] = optimizer
#sys.argv[10] = pickle file for training data
#sys.argv[11] = pickle file for testing data
#sys.argv[12] = OPTIONAL location of .h5 file to save weights


if "embed" not in sys.argv[3]:
    print("loading embeddings")
    w2v = g.Word2Vec.load_word2vec_format(sys.argv[3], binary=False)

    model = m.LSTM_keras(num_layers=int(sys.argv[5]), embeddingClass=w2v, w2vDimension=int(len(w2v["the"])), max_seq_length=int(sys.argv[4]), cSize=int(sys.argv[6]), num_epochs=int(sys.argv[7]), loss_function=sys.argv[8], optimizer=sys.argv[9])

else:
    embedding = e.Embedding_keras()

    print("calculating vocab size")
    embedding.getVocabSize(sys.argv[1], int(sys.argv[2]))
    print("vocab size", embedding.vocSize)

    print("building embedding layer")
    embedding.build()

    model = m.LSTM_keras(num_layers=int(sys.argv[5]), embeddingLayerClass=embedding, max_seq_length=int(sys.argv[4]), cSize=int(sys.argv[6]), num_epochs=int(sys.argv[7]), loss_function=sys.argv[8], optimizer=sys.argv[9])


print("preparing data file")