def create_tokens(collection_id, files, is_g2p): tokens = [] num_errors = 0 error_line, error_file = None, None for file in files: if is_g2p: i_f = file.stream.read().decode("utf-8").split('\n') for idx, line in enumerate(i_f): try: text, src, scr, *pron = line.split('\t')[0:] pron = '\t'.join(p for p in pron) token = Token(text, file.filename, collection_id, score=scr, pron=pron, source=src) tokens.append(token) db.session.add(token) except ValueError as error: num_errors += 1 error_line = idx + 1 error_file = file.filename continue else: content = file.stream.read().decode("utf-8").strip().split('\n') for c in content: # we split each file by new lines if c[-1] == ',': # this is a hack for the SQL stuff. c = c[:-1] token = Token(c, file.filename, collection_id) tokens.append(token) # add token to database db.session.add(token) if num_errors > 0: flash( f'{num_errors} villur komu upp, fyrsta villan í {error_file} í línu {error_line}', category='danger') db.session.commit() # save token text to file for token in tokens: token.save_to_disk() db.session.commit() # reset the number of tokens in the collection collection = Collection.query.get(collection_id) collection.update_numbers() db.session.commit() return tokens