Esempio n. 1
0
 def __init__(self,
              batch_size,
              embedding_layer: layers.Embedding = None,
              activation=None,
              **kwargs):
     super().__init__(**kwargs)
     self.embedding_layer = embedding_layer
     self.vocab_size = embedding_layer.get_config()['input_dim']
     self.activation = activations.get(activation)
     self.trainable = False
     self.batch_size = batch_size

# In[15]:

tokens = []
tokenizer = RegexpTokenizer(r'\w+')
for i, s in enumerate(speeches['speech']):
    tokens.append(tokenizer.tokenize(s))
    
    if i % 10000 == 0:
        # print(i)


# In[17]:

n_fact = embedding_layer.get_config()['output_dim']
idx_tokens = [[vocab[word]
               for word in doc if word in vocab.keys()] \
               for doc in tokens]


# In[21]:

from sklearn.cross_validation import train_test_split

train_idxs, test_idxs, y_train, y_test = train_test_split(speeches.index, 
                                                          np.where(speeches['party'] == 'R', 1, 0),
                                                          test_size=0.2)


# In[22]: