logger = create_logger(__name__) environment.reproducible() theano.config.floatX = 'float32' if not os.path.exists(WORD_EMBEDDING_NN): raise EnvironmentError("Can't find NN model. File {} doesn't exist {}." "Probably you haven't train it yet. " "Run `train_word_embedding_nn.py` script.") logger.info("Reading data") data = pd.read_csv(REVIEWS_FILE, sep='\t') logger.info("Loading word embedding NN") word2vec = WordEmbeddingNN.load(WORD_EMBEDDING_NN) prepare_data_pipeline = Pipeline([ ('tokenize_texts', TokenizeText(ignore_stopwords=False)), ('ignore_unknown_words', IgnoreUnknownWords(dictionary=word2vec.vocab)), ('word_embedding', word2vec), ]) classifier = algorithms.RPROP( [ layers.Relu(100), layers.Relu(200), layers.Sigmoid(50), layers.RoundedOutput(1), ], error='binary_crossentropy',