Esempio n. 1
0
 def mergeddictmwdictreading2mwaudio(self, mergeddictmws, noundictreading):
     dictreading = []
     for _, mwpinyinwords in mergeddictmws:
         # The audio field will contain <random number> <mw> <noun> for every possible MW
         # NB: we explicitly encode the tokens rather than doing a lookup because e.g. 几 has
         # several readings, but we know precisely the one we want here and can avoid ambiguity
         dictreading.append(model.Word(random.choice(numbers.hanziquantitypinyin)))
         dictreading.extend(mwpinyinwords)
         dictreading.extend(noundictreading)
         # This comma doesn't currently do anything, but it might come in useful if we
         # add delay generation in the audio code later on
         dictreading.append(model.Word(model.Text(", ")))
     
     # Only apply the sandhi generator at this point: we have carefully avoided doing it for the
     # input up to now (especially for the noundictreading). Probably doesn't make a difference
     # with the current implementation, but better safe than sorry.
     return generateaudio(self.notifier, self.mediamanager, self.config, transformations.tonesandhi(dictreading))
Esempio n. 2
0
    def updatefactalways(self, fact, reading):
        # We better still give it a miss if the update will fail
        if 'reading' not in fact:
            return

        # Identify probable pinyin in the user's freeform input, reformat them according to the
        # current rules, and pop the result back into the field
        fact['reading'] = preparetokens(self.config,
                                        [model.Word(*model.tokenize(reading))])
Esempio n. 3
0
def set_word_status():
    word = request.json["word"]
    state = request.json["state"]
    wordrow = model.Word.query.filter_by(word=word).first()
    if not wordrow:
        word = model.Word(word=word, state="learning", language=language())
        db.session.add(word)
    else:
        wordrow.state = state
    db.session.commit()
    return jsonify(result=True)
Esempio n. 4
0
def finish_page(id, page_num):
    page = model.Article.page(id, language(), page_num)
    for word in page.words_in_page:
        wordrow = model.Word.query.filter_by(word=word).first()
        if not wordrow:
            new_wordrow = model.Word(word=word,
                                     state="known",
                                     language=language())
            db.session.add(new_wordrow)
    db.session.commit()
    return redirect(url_for("read_article", id=id, page_num=page_num + 1))
Esempio n. 5
0
def process_word_name(message):
    word_name = message.text
    word_name = word_name.capitalize()
    post = post_dict[message.chat.id]
    new_word = model.Word(word_name)
    post.words.append(new_word)
    post_dict[message.chat.id] = post
    msg = bot.send_message(
        message.chat.id,
        'Caso Nao Envie seu Usuário A Postagem sera Nomeada Como "@DextyOficialBot"',
        reply_markup=skip_markup())
    bot.register_next_step_handler(msg, process_part_of_speech)
Esempio n. 6
0
def process_word_name(message):
    word_name = message.text
    word_name = word_name.capitalize()
    post = post_dict[message.chat.id]
    new_word = model.Word(word_name)
    post.words.append(new_word)
    post_dict[message.chat.id] = post
    msg = bot.send_message(
        message.chat.id,
        'Send phonetic transcription. You can skip by pressing button "Skip"',
        reply_markup=skip_markup())
    bot.register_next_step_handler(msg, process_phonetic_transcription)
Esempio n. 7
0
 def reformataudio(self, audio):
     output = u""
     for recognised, match in utils.regexparse(
             re.compile(ur"\[sound:([^\]]*)\]"), audio):
         if recognised:
             # Must be a sound tag - leave it well alone
             output += match.group(0)
         else:
             # Process as if this non-sound tag were a reading, in order to turn it into some tags
             output += generateaudio(self.notifier, self.mediamanager,
                                     self.config,
                                     [model.Word(*model.tokenize(match))])
Esempio n. 8
0
def process_word_name(message):
    word_name = message.text
    word_name = word_name.capitalize()
    post = post_dict[message.chat.id]
    new_word = model.Word(word_name)
    post.words.append(new_word)
    post_dict[message.chat.id] = post
    msg = bot.send_message(
        message.chat.id,
        'Envie Seu Usuário exemplo : @DextyOficial [ Pos o Envio da um Click em [OK]"',
        reply_markup=skip_markup())
    bot.register_next_step_handler(msg, process_part_of_speech)
Esempio n. 9
0
def meaningfromnumberlike(expression, dictionary):
    stringify = lambda digits: u"".join([unicode(digit) for digit in digits])
    handlers = [
        lambda digits: stringify(digits),
        lambda leadingdigits, trailingdigits: stringify(leadingdigits) + "." + stringify(trailingdigits),
        lambda digits: stringify(digits) + "AD",
        lambda digits: stringify(digits) + "%",
        lambda numdigits, denomdigits: stringify(numdigits) + "/" + stringify(denomdigits)
      ]
    
    # Generates a meaning from approximately Western expressions (almost useless, but does handle nian suffix)
    text = parsewesternnumberlike(expression, *handlers)
    
    if not(text):
        # Generate a meaning from approximately Chinese expressions
        text = parsechinesenumberlike(expression, *handlers)
        
    # Wrap the result in the appropriate gumpf
    return utils.bind_none(text, lambda nonnulltext: [[model.Word(model.Text(nonnulltext))]])
Esempio n. 10
0
import model

word = model.Word()

word.make("MORSEISFUN228")
while True:
    word.pronounce()
Esempio n. 11
0
 def reformatreading(self, reading):
     return preparetokens(self.config, [model.Word(*model.tokenize(reading))])
Esempio n. 12
0
def unpreparetokens(flat):
    return [model.Word(*model.tokenize(striphtml(flat)))]
Esempio n. 13
0
                    help='a plain text file')

args = parser.parse_args()

####################
# database connect #
####################
engine = create_engine(args.db_url)
Session = sessionmaker(bind=engine)
session = Session()

model.Base.metadata.create_all(engine)
model.session = session

tree = parsetree(args.text.read())

wid = 1
for n in range(len(tree.sentences)):
    s = tree[n]
    sentence = session.query(model.Sentence).get(s.id)
    if sentence is None:
        sentence = model.Sentence(id=s.id, sentence=s.string)
        session.add(sentence)

    for w in s.words:
        session.add(
            model.Word(id=wid, word=w.string, pos=w.pos, sentence=sentence))
        wid += 1

session.commit()
Esempio n. 14
0
from datetime import datetime

import model

if __name__ == '__main__':

    min_word_length = 4

    print "Loading words..."
    tstart = datetime.now()

    model.Word.objects.delete()

    with open('words.txt', 'r') as f:
        words = []
        for line in f:
            line = line.strip().lower()
            if (len(line) >= min_word_length):
                words.append(line)

    for word in words:
        word_object = model.Word(word=word).save()

    tend = datetime.now()
    print "Loaded words: Total time: " + str((tend - tstart).seconds)
Esempio n. 15
0

def delete_all_chars(word, chars):
    for char in chars:
        word = word.replace(char, '')
    return word


if __name__ == '__main__':

    min_word_length = 4

    print "Loading words..."
    tstart = datetime.now()

    model.Word.objects.delete()

    stop_words = set(stopwords.words('english'))
    stop_words = map(lambda x: delete_all_chars(x, string.punctuation),
                     stop_words)

    with open('words.txt', 'r') as f:
        words = []
        for line in f:
            line = line.strip().lower()
            if ((len(line) >= min_word_length) and (line not in stop_words)):
                word_object = model.Word(word=line).save()

    tend = datetime.now()
    print "Loaded words: Total time: " + str((tend - tstart).seconds)