#########################

class Params(object):
    pass

p = Params()
p.embeddings = embeddings
p.conv_layer_n = 1
p.ks = [3]
p.fold = [1]
p.W = [W]
p.b = [b]
p.W_logreg = W_logreg
p.b_logreg = b_logreg

dcnn = DCNN(p)

##################### Testing ####################

from test_util import (assert_matrix_eq, assert_about_eq)

x = np.asarray(np.random.randint(vocab_size, size = (3, 6)),
               dtype=np.int32
)

y = np.asarray(np.random.randint(5, size = 3), 
               dtype=np.int32
)
    
########### Embedding layer ##############
actual = f_el(x)
Ejemplo n.º 2
0
    get_padding = lambda sent: [padding_token_index] * (max_len - len(sent))
    padded_sents = [(sent + get_padding(sent)) for sent in sents]
    return padded_sents


WORD2INDEX = load(open("data/twitter.pkl"))[3]
PADDING_INDEX = WORD2INDEX[u"<PADDING>"]

from param_util import load_dcnn_model_params
from dcnn import DCNN

params = load_dcnn_model_params(
    "models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl"
)

MODEL = DCNN(params)


def sentiment_scores_of_sents(sents):
    """
    Predict the sentiment positive scores for a bunch of sentences
    
    >>> sentiment_scores_of_sents([u'simultaneously heart breaking and very funny , the last kiss is really all about performances .', u'( u ) stupid .'])
    array([ 0.78528505,  0.0455901 ])
    """
    word_indices = [
        get_word_index_array(nltk.word_tokenize(sent), WORD2INDEX)
        for sent in sents
    ]

    x = np.asarray(pad_sents(word_indices, PADDING_INDEX), dtype=np.int32)
Ejemplo n.º 3
0
class Params(object):
    pass


p = Params()
p.embeddings = embeddings
p.conv_layer_n = 1
p.ks = [3]
p.fold = [1]
p.W = [W]
p.b = [b]
p.W_logreg = W_logreg
p.b_logreg = b_logreg

dcnn = DCNN(p)

##################### Testing ####################

from test_util import (assert_matrix_eq, assert_about_eq)

x = np.asarray(np.random.randint(vocab_size, size=(3, 6)), dtype=np.int32)

y = np.asarray(np.random.randint(5, size=3), dtype=np.int32)

########### Embedding layer ##############
actual = f_el(x)
expected = dcnn.e_layer.output(x)
assert_matrix_eq(actual, expected, "Embedding")

########## Conv layer ###################
Ejemplo n.º 4
0
from dcnn import DCNN

from util import load_data
from param_util import load_dcnn_model_params

params = load_dcnn_model_params(
    "models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl"
)

model = DCNN(params)

datasets = load_data("data/twitter.pkl")

dev_set_x, dev_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

dev_set_x, dev_set_y = dev_set_x.get_value(), dev_set_y.get_value()
test_set_x, test_set_y = test_set_x.get_value(), test_set_y.get_value()

print "dev error:", model._errors(dev_set_x, dev_set_y)
print "test error:", model._errors(test_set_x, test_set_y)
Ejemplo n.º 5
0
# Cuántas palabras tieen que identificar el modelo
VOCAB_SIZE = tokenizer.vocab_size
EMB_DIM = 200  # Todas y cada una de las palabras se mapearán a un espacio de dimensión 200
NB_FILTERS = 100
FFN_UNITS = 256
NB_CLASSES = 2  # len(set(train_labels))
DROPOUT_RATE = 0.2
BATCH_SIZE = 32
NB_EPOCHS = 5

# FASE DE ENTRENAMIENTO:
print('Entrenamiento')

Dcnn = DCNN(vocab_size=VOCAB_SIZE,
            emb_dim=EMB_DIM,
            nb_filters=NB_FILTERS,
            FFN_units=FFN_UNITS,
            nb_classes=NB_CLASSES,
            dropout_rate=DROPOUT_RATE)

if NB_CLASSES == 2:
    Dcnn.compile(loss="binary_crossentropy",
                 optimizer="adam",
                 metrics=["accuracy"])
else:
    Dcnn.compile(loss="sparse_categorical_crossentropy",
                 optimizer="adam",
                 metrics=["sparse_categorical_accuracy"])

# Sistema que nos permite marcar checkpoints en
# el entrenamiento para que cada cierto tiempo
# se vaya guardando la información y podamos
from dcnn import DCNN

from util import load_data
from param_util import load_dcnn_model_params

params = load_dcnn_model_params("models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl")

model = DCNN(params)


datasets = load_data("data/twitter.pkl")

dev_set_x, dev_set_y = datasets[1]
test_set_x, test_set_y = datasets[2]

dev_set_x, dev_set_y = dev_set_x.get_value(), dev_set_y.get_value()
test_set_x, test_set_y = test_set_x.get_value(), test_set_y.get_value()

print "dev error:", model._errors(dev_set_x, dev_set_y)
print "test error:", model._errors(test_set_x, test_set_y)