######################### class Params(object): pass p = Params() p.embeddings = embeddings p.conv_layer_n = 1 p.ks = [3] p.fold = [1] p.W = [W] p.b = [b] p.W_logreg = W_logreg p.b_logreg = b_logreg dcnn = DCNN(p) ##################### Testing #################### from test_util import (assert_matrix_eq, assert_about_eq) x = np.asarray(np.random.randint(vocab_size, size = (3, 6)), dtype=np.int32 ) y = np.asarray(np.random.randint(5, size = 3), dtype=np.int32 ) ########### Embedding layer ############## actual = f_el(x)
get_padding = lambda sent: [padding_token_index] * (max_len - len(sent)) padded_sents = [(sent + get_padding(sent)) for sent in sents] return padded_sents WORD2INDEX = load(open("data/twitter.pkl"))[3] PADDING_INDEX = WORD2INDEX[u"<PADDING>"] from param_util import load_dcnn_model_params from dcnn import DCNN params = load_dcnn_model_params( "models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl" ) MODEL = DCNN(params) def sentiment_scores_of_sents(sents): """ Predict the sentiment positive scores for a bunch of sentences >>> sentiment_scores_of_sents([u'simultaneously heart breaking and very funny , the last kiss is really all about performances .', u'( u ) stupid .']) array([ 0.78528505, 0.0455901 ]) """ word_indices = [ get_word_index_array(nltk.word_tokenize(sent), WORD2INDEX) for sent in sents ] x = np.asarray(pad_sents(word_indices, PADDING_INDEX), dtype=np.int32)
class Params(object): pass p = Params() p.embeddings = embeddings p.conv_layer_n = 1 p.ks = [3] p.fold = [1] p.W = [W] p.b = [b] p.W_logreg = W_logreg p.b_logreg = b_logreg dcnn = DCNN(p) ##################### Testing #################### from test_util import (assert_matrix_eq, assert_about_eq) x = np.asarray(np.random.randint(vocab_size, size=(3, 6)), dtype=np.int32) y = np.asarray(np.random.randint(5, size=3), dtype=np.int32) ########### Embedding layer ############## actual = f_el(x) expected = dcnn.e_layer.output(x) assert_matrix_eq(actual, expected, "Embedding") ########## Conv layer ###################
from dcnn import DCNN from util import load_data from param_util import load_dcnn_model_params params = load_dcnn_model_params( "models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl" ) model = DCNN(params) datasets = load_data("data/twitter.pkl") dev_set_x, dev_set_y = datasets[1] test_set_x, test_set_y = datasets[2] dev_set_x, dev_set_y = dev_set_x.get_value(), dev_set_y.get_value() test_set_x, test_set_y = test_set_x.get_value(), test_set_y.get_value() print "dev error:", model._errors(dev_set_x, dev_set_y) print "test error:", model._errors(test_set_x, test_set_y)
# Cuántas palabras tieen que identificar el modelo VOCAB_SIZE = tokenizer.vocab_size EMB_DIM = 200 # Todas y cada una de las palabras se mapearán a un espacio de dimensión 200 NB_FILTERS = 100 FFN_UNITS = 256 NB_CLASSES = 2 # len(set(train_labels)) DROPOUT_RATE = 0.2 BATCH_SIZE = 32 NB_EPOCHS = 5 # FASE DE ENTRENAMIENTO: print('Entrenamiento') Dcnn = DCNN(vocab_size=VOCAB_SIZE, emb_dim=EMB_DIM, nb_filters=NB_FILTERS, FFN_units=FFN_UNITS, nb_classes=NB_CLASSES, dropout_rate=DROPOUT_RATE) if NB_CLASSES == 2: Dcnn.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) else: Dcnn.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["sparse_categorical_accuracy"]) # Sistema que nos permite marcar checkpoints en # el entrenamiento para que cada cierto tiempo # se vaya guardando la información y podamos
from dcnn import DCNN from util import load_data from param_util import load_dcnn_model_params params = load_dcnn_model_params("models/filter_widths=8,6,,batch_size=10,,ks=20,8,,fold=1,1,,conv_layer_n=2,,ebd_dm=48,,l2_regs=1e-06,1e-06,1e-06,0.0001,,dr=0.5,0.5,,nkerns=7,12.pkl") model = DCNN(params) datasets = load_data("data/twitter.pkl") dev_set_x, dev_set_y = datasets[1] test_set_x, test_set_y = datasets[2] dev_set_x, dev_set_y = dev_set_x.get_value(), dev_set_y.get_value() test_set_x, test_set_y = test_set_x.get_value(), test_set_y.get_value() print "dev error:", model._errors(dev_set_x, dev_set_y) print "test error:", model._errors(test_set_x, test_set_y)