Example #1
0
def sendData():
    if (request.form.get('input-url')):
        value = request.form.get('input-url')
        session['sentence_list'] = tokenize('url', value)
    elif (request.form.get('input-text')):
        value = request.form.get('input-text')
        session['sentence_list'] = tokenize('text', value)
    return render_template('ask.html', sentence_list=session['sentence_list'])
Example #2
0
 def updatefactalways(self, fact, reading):
     # We better still give it a miss if the update will fail
     if 'reading' not in fact:
         return
 
     # Identify probable pinyin in the user's freeform input, reformat them according to the
     # current rules, and pop the result back into the field
     fact['reading'] = preparetokens(self.config, [model.Word(*model.tokenize(reading))])
Example #3
0
    def updatefactalways(self, fact, reading):
        # We better still give it a miss if the update will fail
        if 'reading' not in fact:
            return

        # Identify probable pinyin in the user's freeform input, reformat them according to the
        # current rules, and pop the result back into the field
        fact['reading'] = preparetokens(self.config,
                                        [model.Word(*model.tokenize(reading))])
Example #4
0
 def reformataudio(self, audio):
     output = u""
     for recognised, match in utils.regexparse(
             re.compile(ur"\[sound:([^\]]*)\]"), audio):
         if recognised:
             # Must be a sound tag - leave it well alone
             output += match.group(0)
         else:
             # Process as if this non-sound tag were a reading, in order to turn it into some tags
             output += generateaudio(self.notifier, self.mediamanager,
                                     self.config,
                                     [model.Word(*model.tokenize(match))])
Example #5
0
 def reformataudio(self, audio):
     output = u""
     for recognised, match in utils.regexparse(re.compile(ur"\[sound:([^\]]*)\]"), audio):
         if recognised:
             # Must be a sound tag - leave it well alone
             output += match.group(0)
         else:
             # Process as if this non-sound tag were a reading, in order to turn it into some tags
             output += generateaudio(self.notifier, self.mediamanager, self.config, [model.Word(*model.tokenize(match))])
Example #6
0
 def reformatreading(self, reading):
     return preparetokens(self.config, [model.Word(*model.tokenize(reading))])
Example #7
0
def unpreparetokens(flat):
    return [model.Word(*model.tokenize(striphtml(flat)))]
plt.switch_backend('agg')
from keras import backend as K
from keras.engine.topology import Layer
from keras import initializers
#%matplotlib inline
import pickle
import model


texts = []
labels = []



df = pd.read_csv('../dataset/dataset.csv')
df = df.dropna()
df = df.reset_index(drop=True)
print("Information on the dataset")
print('Shape of dataset ', df.shape)
print(df.columns)
print('No. of unique news types: ', len(set(df['Type'])))
print(df.head())


texts, labels, sorted_type, indexed_type = model.df_to_list(df, texts, labels)
pickle.dump(indexed_type, open('indexed_type.sav', 'wb'))
word_index, embedding_matrix, data, labels, sequences = model.tokenize(texts, labels)
model, history = model.model(word_index, embedding_matrix, sorted_type, data, labels)
model.save_model(model)
model.plot(history)
Example #9
0
def predict(text: str):
    input_id, attention_mask = model.tokenize(text)
    prediction = model.predict(input_id, attention_mask)
    return {"prediction": prediction}
Example #10
0
 def reformatreading(self, reading):
     return preparetokens(self.config, [model.Word(*model.tokenize(reading))])
Example #11
0
def unpreparetokens(flat):
    return [model.Word(*model.tokenize(striphtml(flat)))]