예제 #1
0
from nltk.corpus import stopwords
from nltk.stem import SnowballStemmer
from math import ceil
import config
#from tqdm import tqdm

# create and configure the app
app = Flask(__name__, instance_relative_config=True)
app.config.from_object("config.Config")

filename = app.config['MODEL']
max_length = app.config['SENTENCE_MAX_LENGTH']
embedding_dim = app.config['EMBEDDING_DIM']
vacab_size = app.config['VOCABULARY_SIZE']
model = SentimentModel(embedding_dim, vacab_size, max_length)
model.load_weights(filename)


def preprocess(text, stem=False):

    stemmer = SnowballStemmer('english')
    text_cleaning_re = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+"
    text = re.sub(text_cleaning_re, ' ', str(text).lower()).strip()
    tokens = []
    stop_words = stopwords.words('english')

    for token in text.split():
        if token not in stop_words:
            if stem:
                tokens.append(stemmer.stem(token))
            else:
예제 #2
0
                       cnn_filters=CNN_FILTERS,
                       dnn_units=DNN_UNITS,
                       model_output_classes=OUTPUT_CLASSES,
                       dropout_rate=DROPOUT_RATE)

if OUTPUT_CLASSES == 2:
    model.compile(loss="binary_crossentropy",
                  optimizer="adam",
                  metrics=["accuracy"])
else:
    model.compile(loss="sparse_categorical_crossentropy",
                  optimizer="adam",
                  metrics=["sparse_categorical_accuracy"])

latest = tf.train.latest_checkpoint('./new_weights')
model.load_weights(latest)

# model.load_weights('./weights/base_model_weights')
model.build(None, None)

# print(model.summary())


def encode_sentence(sent):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sent))


def get_prediction(sentence):
    tokens = encode_sentence(sentence)
    inputs = tf.expand_dims(tokens, 0)