def sendData(): if (request.form.get('input-url')): value = request.form.get('input-url') session['sentence_list'] = tokenize('url', value) elif (request.form.get('input-text')): value = request.form.get('input-text') session['sentence_list'] = tokenize('text', value) return render_template('ask.html', sentence_list=session['sentence_list'])
def updatefactalways(self, fact, reading): # We better still give it a miss if the update will fail if 'reading' not in fact: return # Identify probable pinyin in the user's freeform input, reformat them according to the # current rules, and pop the result back into the field fact['reading'] = preparetokens(self.config, [model.Word(*model.tokenize(reading))])
def reformataudio(self, audio): output = u"" for recognised, match in utils.regexparse( re.compile(ur"\[sound:([^\]]*)\]"), audio): if recognised: # Must be a sound tag - leave it well alone output += match.group(0) else: # Process as if this non-sound tag were a reading, in order to turn it into some tags output += generateaudio(self.notifier, self.mediamanager, self.config, [model.Word(*model.tokenize(match))])
def reformataudio(self, audio): output = u"" for recognised, match in utils.regexparse(re.compile(ur"\[sound:([^\]]*)\]"), audio): if recognised: # Must be a sound tag - leave it well alone output += match.group(0) else: # Process as if this non-sound tag were a reading, in order to turn it into some tags output += generateaudio(self.notifier, self.mediamanager, self.config, [model.Word(*model.tokenize(match))])
def reformatreading(self, reading): return preparetokens(self.config, [model.Word(*model.tokenize(reading))])
def unpreparetokens(flat): return [model.Word(*model.tokenize(striphtml(flat)))]
plt.switch_backend('agg') from keras import backend as K from keras.engine.topology import Layer from keras import initializers #%matplotlib inline import pickle import model texts = [] labels = [] df = pd.read_csv('../dataset/dataset.csv') df = df.dropna() df = df.reset_index(drop=True) print("Information on the dataset") print('Shape of dataset ', df.shape) print(df.columns) print('No. of unique news types: ', len(set(df['Type']))) print(df.head()) texts, labels, sorted_type, indexed_type = model.df_to_list(df, texts, labels) pickle.dump(indexed_type, open('indexed_type.sav', 'wb')) word_index, embedding_matrix, data, labels, sequences = model.tokenize(texts, labels) model, history = model.model(word_index, embedding_matrix, sorted_type, data, labels) model.save_model(model) model.plot(history)
def predict(text: str): input_id, attention_mask = model.tokenize(text) prediction = model.predict(input_id, attention_mask) return {"prediction": prediction}