Example #1
0
def tern_convNN(trainSamples, trainTags, testSamples, testTags, embed_size,
                epoc):
    # prep data
    # First set up word to index encoding
    word_to_index = word2ix(trainSamples)
    # Create encoded list of samples
    encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples]
    # pad all samples
    maximum_length = max(preprocess.get_max_len(trainSamples),
                         preprocess.get_max_len(testSamples))
    padded_s = pad_sequences(encoded_samples,
                             maxlen=maximum_length,
                             padding='post')
    testWord_to_ix = word2ix(testSamples)
    encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples]
    padded_t = pad_sequences(encoded_test,
                             maxlen=maximum_length,
                             padding='post')
    # build model
    model = Sequential()
    model.add(
        Embedding(len(word_to_index), embed_size, input_length=maximum_length))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(Conv1D(64, 3, activation='relu'))
    model.add(MaxPooling1D(3))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(Conv1D(128, 3, activation='relu'))
    model.add(GlobalAveragePooling1D())
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))

    # test model
    model.compile(loss='categorical_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    earlystop = callbacks.EarlyStopping(monitor='accuracy',
                                        min_delta=0,
                                        patience=3)
    model.fit(padded_s,
              trainTags,
              batch_size=16,
              epochs=epoc,
              callbacks=[earlystop])
    #score = model.evaluate(padded_t, testTags, batch_size=16)
    predictionProbs = model.predict(padded_t)
    predictions = []
    for p in predictionProbs:
        p = p.tolist()
        maxval = max(p)
        predictions.append(index2rating[p.index(maxval)])
    trueTags = []
    for t in testTags:
        maxval = max(t)
        trueTags.append(index2rating[t.index(maxval)])
    confusion = metrics.confusion_matrix(trueTags, predictions)
    return confusion
Example #2
0
def tern_basic(trainSamples, trainTags, testSamples, testTags, embed_size,
               epoc):
    # First set up word to index encoding
    word_to_index = word2ix(trainSamples)
    # Create encoded list of samples
    encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples]
    # pad all samples
    maximum_length = max(preprocess.get_max_len(trainSamples),
                         preprocess.get_max_len(testSamples))
    padded = pad_sequences(encoded_samples,
                           maxlen=maximum_length,
                           padding='post')
    # define the model
    model = Sequential()
    model.add(
        Embedding(len(word_to_index), embed_size, input_length=maximum_length))
    model.add(Flatten())
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(3, activation='softmax'))
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    # compile
    model.compile(optimizer=sgd,
                  loss='categorical_crossentropy',
                  metrics=['acc'])
    # fit model
    trainTags = np.asarray(trainTags)
    earlystop = callbacks.EarlyStopping(monitor='acc',
                                        min_delta=0,
                                        patience=10)
    model.fit(padded, trainTags, epochs=epoc, verbose=0, callbacks=[earlystop])
    # prepare test data
    testWord_to_ix = word2ix(testSamples)
    encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples]
    padded_test = pad_sequences(encoded_test,
                                maxlen=maximum_length,
                                padding='post')
    # test
    predictionProbs = model.predict(padded_test)
    predictions = []
    for p in predictionProbs:
        p = p.tolist()
        maxval = max(p)
        predictions.append(index2rating[p.index(maxval)])
    trueTags = []
    for t in testTags:
        maxval = max(t)
        trueTags.append(index2rating[t.index(maxval)])
    confusion = metrics.confusion_matrix(trueTags, predictions)
    #score = model.evaluate(padded_test, testTags, verbose=1)
    return confusion
Example #3
0
def lstm(trainSamples, trainTags, testSamples, testTags, embed_size, epoc):
    # prep data
    # First set up word to index encoding
    word_to_index = word2ix(trainSamples)
    # Create encoded list of samples
    encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples]
    # pad all samples
    maximum_length = max(preprocess.get_max_len(trainSamples),
                         preprocess.get_max_len(testSamples))
    padded_s = pad_sequences(encoded_samples,
                             maxlen=maximum_length,
                             padding='post')
    testWord_to_ix = word2ix(testSamples)
    encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples]
    padded_t = pad_sequences(encoded_test,
                             maxlen=maximum_length,
                             padding='post')
    # Make model
    model = Sequential()
    model.add(Embedding(len(word_to_index), output_dim=256))
    model.add(LSTM(128))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(loss='binary_crossentropy',
                  optimizer='rmsprop',
                  metrics=['accuracy'])

    earlystop = callbacks.EarlyStopping(monitor='accuracy',
                                        min_delta=0,
                                        patience=3)
    model.fit(padded_s,
              trainTags,
              batch_size=16,
              epochs=epoc,
              callbacks=[earlystop])
    #score = model.evaluate(padded_t, testTags, batch_size=16)
    predictions = model.predict(padded_t)
    predictions[predictions >= 0.5] = 1
    predictions[predictions < 0.5] = -1
    confusion = metrics.confusion_matrix(testTags, predictions)
    return confusion