Exemple #1
0
def create_tokens(collection_id, files, is_g2p):
    tokens = []
    num_errors = 0
    error_line, error_file = None, None
    for file in files:
        if is_g2p:
            i_f = file.stream.read().decode("utf-8").split('\n')
            for idx, line in enumerate(i_f):
                try:
                    text, src, scr, *pron = line.split('\t')[0:]
                    pron = '\t'.join(p for p in pron)
                    token = Token(text,
                                  file.filename,
                                  collection_id,
                                  score=scr,
                                  pron=pron,
                                  source=src)
                    tokens.append(token)
                    db.session.add(token)
                except ValueError as error:
                    num_errors += 1
                    error_line = idx + 1
                    error_file = file.filename
                    continue
        else:
            content = file.stream.read().decode("utf-8").strip().split('\n')
            for c in content:
                # we split each file by new lines
                if c[-1] == ',':
                    # this is a hack for the SQL stuff.
                    c = c[:-1]
                token = Token(c, file.filename, collection_id)
                tokens.append(token)
                # add token to database
                db.session.add(token)

    if num_errors > 0:
        flash(
            f'{num_errors} villur komu upp, fyrsta villan í {error_file} í línu {error_line}',
            category='danger')

    db.session.commit()

    # save token text to file
    for token in tokens:
        token.save_to_disk()
    db.session.commit()

    # reset the number of tokens in the collection
    collection = Collection.query.get(collection_id)
    collection.update_numbers()
    db.session.commit()
    return tokens