def lines(self, human=False, abbr=False): if len(self.corpus_files) > 0: return utils.format_number( self.corpus_files[0].file.lines, abbr=abbr) if human else self.corpus_files[0].file.lines else: return 0
def train_console(id): engine = Engine.query.filter_by(id=id).first() config_file_path = os.path.join( os.path.realpath( os.path.join(app.config['PRELOADED_ENGINES_FOLDER'], engine.path)), 'config.yaml') config = None try: with open(config_file_path, 'r') as config_file: config = yaml.load(config_file, Loader=yaml.FullLoader) except: pass launched = datetime.datetime.timestamp(engine.launched) finished = datetime.datetime.timestamp( engine.finished) if engine.finished else None corpora_raw = Corpus_Engine.query.filter_by(engine_id=engine.id, is_info=True).all() corpora = {} for corpus_entry in corpora_raw: if corpus_entry.phase in corpora: corpora[corpus_entry.phase].append( (corpus_entry.corpus, utils.format_number(corpus_entry.selected_size, abbr=True))) else: corpora[corpus_entry.phase] = [ (corpus_entry.corpus, utils.format_number(corpus_entry.selected_size, abbr=True)) ] return render_template("train_console.html.jinja2", page_name="train", engine=engine, config=config, launched=launched, finished=finished, elapsed=engine.runtime, corpora=corpora, elapsed_format=utils.seconds_to_timestring( engine.runtime) if engine.runtime else None)
def chars(self, human=False, abbr=False): char_count = 0 for file_entry in self.corpus_files: char_count += file_entry.file.chars return utils.format_number(char_count, abbr=abbr) if human else char_count
def words(self, human=False, abbr=False): word_count = 0 for file_entry in self.corpus_files: word_count += file_entry.file.words return utils.format_number(word_count, abbr=abbr) if human else word_count
def library_corpora_feed(): public = request.form.get('public') == "true" if public: library_objects = user_utils.get_user_corpora(public=True).all() else: library_objects = user_utils.get_user_corpora().all() user_library = [lc.corpus for lc in library_objects] # We are not using the datatables helper since this is an specific case # and we need more control to group corpora draw = int(request.form.get('draw')) search = request.form.get('search[value]') start = int(request.form.get('start')) length = int(request.form.get('length')) order = int(request.form.get('order[0][column]')) dir = request.form.get('order[0][dir]') corpus_rows = [] for corpus in user_library: corpus_rows.append([ corpus.id, corpus.name, corpus.source.name + (corpus.target.name if corpus.target else ""), corpus.lines(), corpus.words(), corpus.chars(), corpus.uploaded() ]) recordsTotal = len(corpus_rows) recordsFiltered = 0 if order: corpus_rows.sort(key=lambda c: c[order], reverse=(dir == 'asc')) if start is not None and length is not None: corpus_rows = corpus_rows[start:(start + length)] corpus_data = [] for row in corpus_rows: corpus = Corpus.query.filter_by(id=row[0]).first() file_entries = corpus.corpus_files file_entries.sort(key=lambda f: f.role) file_data = [] for file_entry in file_entries: file = file_entry.file uploaded_date = datetime.fromtimestamp( datetime.timestamp(file.uploaded)).strftime("%d/%m/%Y") file_data.append([ file.id, file.name, file.language.name, utils.format_number(file.lines), utils.format_number(file.words), corpus.topic.name if corpus.topic else "", uploaded_date, { "corpus_owner": file.uploader.id == user_utils.get_uid() if file.uploader else False, "corpus_uploader": file.uploader.username if file.uploader else "MutNMT", "corpus_id": corpus.id, "corpus_name": corpus.name, "corpus_description": corpus.description, "corpus_source": corpus.source.name, "corpus_target": corpus.target.name if corpus.target else "", "corpus_public": corpus.public, "corpus_size": corpus.corpus_files[0].file.lines, "corpus_preview": url_for('library.corpora_preview', id=corpus.id), "corpus_share": url_for('library.library_share_toggle', type='library_corpora', id=corpus.id), "corpus_delete": url_for('library.library_delete', id=corpus.id, type='library_corpora'), "corpus_grab": url_for('library.library_grab', id=corpus.id, type='library_corpora'), "corpus_ungrab": url_for('library.library_ungrab', id=corpus.id, type='library_corpora'), "corpus_export": url_for('library.library_export', id=corpus.id, type="library_corpora"), "file_preview": url_for('data.data_preview', file_id=file.id) } ]) if search: found = False for col in row + file_data: found = found or (search.lower() in str(col).lower()) if found: corpus_data = corpus_data + file_data recordsFiltered += 1 else: corpus_data = corpus_data + file_data return jsonify({ "draw": draw + 1, "recordsTotal": recordsTotal, "recordsFiltered": recordsFiltered if search else recordsTotal, "data": corpus_data })