def tern_convNN(trainSamples, trainTags, testSamples, testTags, embed_size, epoc): # prep data # First set up word to index encoding word_to_index = word2ix(trainSamples) # Create encoded list of samples encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples] # pad all samples maximum_length = max(preprocess.get_max_len(trainSamples), preprocess.get_max_len(testSamples)) padded_s = pad_sequences(encoded_samples, maxlen=maximum_length, padding='post') testWord_to_ix = word2ix(testSamples) encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples] padded_t = pad_sequences(encoded_test, maxlen=maximum_length, padding='post') # build model model = Sequential() model.add( Embedding(len(word_to_index), embed_size, input_length=maximum_length)) model.add(Conv1D(64, 3, activation='relu')) model.add(Conv1D(64, 3, activation='relu')) model.add(MaxPooling1D(3)) model.add(Conv1D(128, 3, activation='relu')) model.add(Conv1D(128, 3, activation='relu')) model.add(GlobalAveragePooling1D()) model.add(Dropout(0.5)) model.add(Dense(3, activation='softmax')) # test model model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) earlystop = callbacks.EarlyStopping(monitor='accuracy', min_delta=0, patience=3) model.fit(padded_s, trainTags, batch_size=16, epochs=epoc, callbacks=[earlystop]) #score = model.evaluate(padded_t, testTags, batch_size=16) predictionProbs = model.predict(padded_t) predictions = [] for p in predictionProbs: p = p.tolist() maxval = max(p) predictions.append(index2rating[p.index(maxval)]) trueTags = [] for t in testTags: maxval = max(t) trueTags.append(index2rating[t.index(maxval)]) confusion = metrics.confusion_matrix(trueTags, predictions) return confusion
def tern_basic(trainSamples, trainTags, testSamples, testTags, embed_size, epoc): # First set up word to index encoding word_to_index = word2ix(trainSamples) # Create encoded list of samples encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples] # pad all samples maximum_length = max(preprocess.get_max_len(trainSamples), preprocess.get_max_len(testSamples)) padded = pad_sequences(encoded_samples, maxlen=maximum_length, padding='post') # define the model model = Sequential() model.add( Embedding(len(word_to_index), embed_size, input_length=maximum_length)) model.add(Flatten()) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(64, activation='relu')) model.add(Dropout(0.5)) model.add(Dense(3, activation='softmax')) sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) # compile model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['acc']) # fit model trainTags = np.asarray(trainTags) earlystop = callbacks.EarlyStopping(monitor='acc', min_delta=0, patience=10) model.fit(padded, trainTags, epochs=epoc, verbose=0, callbacks=[earlystop]) # prepare test data testWord_to_ix = word2ix(testSamples) encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples] padded_test = pad_sequences(encoded_test, maxlen=maximum_length, padding='post') # test predictionProbs = model.predict(padded_test) predictions = [] for p in predictionProbs: p = p.tolist() maxval = max(p) predictions.append(index2rating[p.index(maxval)]) trueTags = [] for t in testTags: maxval = max(t) trueTags.append(index2rating[t.index(maxval)]) confusion = metrics.confusion_matrix(trueTags, predictions) #score = model.evaluate(padded_test, testTags, verbose=1) return confusion
def lstm(trainSamples, trainTags, testSamples, testTags, embed_size, epoc): # prep data # First set up word to index encoding word_to_index = word2ix(trainSamples) # Create encoded list of samples encoded_samples = [encodeSample(s, word_to_index) for s in trainSamples] # pad all samples maximum_length = max(preprocess.get_max_len(trainSamples), preprocess.get_max_len(testSamples)) padded_s = pad_sequences(encoded_samples, maxlen=maximum_length, padding='post') testWord_to_ix = word2ix(testSamples) encoded_test = [encodeSample(s, testWord_to_ix) for s in testSamples] padded_t = pad_sequences(encoded_test, maxlen=maximum_length, padding='post') # Make model model = Sequential() model.add(Embedding(len(word_to_index), output_dim=256)) model.add(LSTM(128)) model.add(Dropout(0.5)) model.add(Dense(1, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy']) earlystop = callbacks.EarlyStopping(monitor='accuracy', min_delta=0, patience=3) model.fit(padded_s, trainTags, batch_size=16, epochs=epoc, callbacks=[earlystop]) #score = model.evaluate(padded_t, testTags, batch_size=16) predictions = model.predict(padded_t) predictions[predictions >= 0.5] = 1 predictions[predictions < 0.5] = -1 confusion = metrics.confusion_matrix(testTags, predictions) return confusion