from nltk.corpus import stopwords from nltk.stem import SnowballStemmer from math import ceil import config #from tqdm import tqdm # create and configure the app app = Flask(__name__, instance_relative_config=True) app.config.from_object("config.Config") filename = app.config['MODEL'] max_length = app.config['SENTENCE_MAX_LENGTH'] embedding_dim = app.config['EMBEDDING_DIM'] vacab_size = app.config['VOCABULARY_SIZE'] model = SentimentModel(embedding_dim, vacab_size, max_length) model.load_weights(filename) def preprocess(text, stem=False): stemmer = SnowballStemmer('english') text_cleaning_re = "@\S+|https?:\S+|http?:\S|[^A-Za-z0-9]+" text = re.sub(text_cleaning_re, ' ', str(text).lower()).strip() tokens = [] stop_words = stopwords.words('english') for token in text.split(): if token not in stop_words: if stem: tokens.append(stemmer.stem(token)) else:
cnn_filters=CNN_FILTERS, dnn_units=DNN_UNITS, model_output_classes=OUTPUT_CLASSES, dropout_rate=DROPOUT_RATE) if OUTPUT_CLASSES == 2: model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) else: model.compile(loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["sparse_categorical_accuracy"]) latest = tf.train.latest_checkpoint('./new_weights') model.load_weights(latest) # model.load_weights('./weights/base_model_weights') model.build(None, None) # print(model.summary()) def encode_sentence(sent): return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sent)) def get_prediction(sentence): tokens = encode_sentence(sentence) inputs = tf.expand_dims(tokens, 0)