def __init__(self, batch_size, embedding_layer: layers.Embedding = None, activation=None, **kwargs): super().__init__(**kwargs) self.embedding_layer = embedding_layer self.vocab_size = embedding_layer.get_config()['input_dim'] self.activation = activations.get(activation) self.trainable = False self.batch_size = batch_size
# In[15]: tokens = [] tokenizer = RegexpTokenizer(r'\w+') for i, s in enumerate(speeches['speech']): tokens.append(tokenizer.tokenize(s)) if i % 10000 == 0: # print(i) # In[17]: n_fact = embedding_layer.get_config()['output_dim'] idx_tokens = [[vocab[word] for word in doc if word in vocab.keys()] \ for doc in tokens] # In[21]: from sklearn.cross_validation import train_test_split train_idxs, test_idxs, y_train, y_test = train_test_split(speeches.index, np.where(speeches['party'] == 'R', 1, 0), test_size=0.2) # In[22]: