Beispiel #1
0
def align(username, lang_from, lang_to, id_from, id_to):
    files_from = helper.get_files_list(
        os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER,
                     lang_from))
    files_to = helper.get_files_list(
        os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER,
                     lang_to))
    logging.info(
        f"[{username}]. Aligning documents. {files_from[id_from]}, {files_to[id_to]}."
    )
    if len(files_from) < id_from + 1 or len(files_to) < id_to + 1:
        logging.info(f"[{username}]. Documents not found.")
        return con.EMPTY_SIMS

    processing_folder_from_to = os.path.join(con.UPLOAD_FOLDER, username,
                                             con.PROCESSING_FOLDER, lang_from,
                                             lang_to)
    helper.check_folder(processing_folder_from_to)
    processing_from_to = os.path.join(processing_folder_from_to,
                                      files_from[id_from])

    res_img = os.path.join(con.STATIC_FOLDER, con.IMG_FOLDER, username,
                           f"{files_from[id_from]}.png")
    res_img_best = os.path.join(con.STATIC_FOLDER, con.IMG_FOLDER, username,
                                f"{files_from[id_from]}.best.png")
    splitted_from = os.path.join(con.UPLOAD_FOLDER, username,
                                 con.SPLITTED_FOLDER, lang_from,
                                 files_from[id_from])
    splitted_to = os.path.join(con.UPLOAD_FOLDER, username,
                               con.SPLITTED_FOLDER, lang_to, files_to[id_to])

    logging.info(f"[{username}]. Cleaning images.")
    helper.clean_img_user_foler(username, files_from[id_from])

    logging.debug(
        f"[{username}]. Preparing for alignment. {splitted_from}, {splitted_to}."
    )
    with open(splitted_from, mode="r", encoding="utf-8") as input_from, \
         open(splitted_to, mode="r", encoding="utf-8") as input_to:
        #  ,open(ngramed_proxy_ru, mode="r", encoding="utf-8") as input_proxy:
        lines_from = input_from.readlines()
        lines_to = input_to.readlines()
        #lines_ru_proxy = input_proxy.readlines()

    #TODO refactor to queues (!)
    state.init_processing(
        processing_from_to,
        (con.PROC_INIT, config.TEST_RESTRICTION_MAX_BATCHES, 0))
    alignment = Process(target=aligner.serialize_docs,
                        args=(lines_from, lines_to, processing_from_to,
                              res_img, res_img_best, lang_from, lang_to),
                        daemon=True)
    alignment.start()

    #aligner.serialize_docs(lines_from, lines_to, processing_from_to, res_img, res_img_best, lang_from, lang_to)
    return con.EMPTY_LINES
Beispiel #2
0
def items(username, lang):

    #TODO add language code validation

    helper.create_folders(username, lang)
    #load documents
    if request.method == "POST":
        if lang in request.files:
            file = request.files[lang]
            logging.debug(
                f"[{username}]. Loading lang document {file.filename}.")
            raw_path = os.path.join(con.UPLOAD_FOLDER, username,
                                    con.RAW_FOLDER, lang, file.filename)
            file.save(raw_path)
            splitter.split_by_sentences(file.filename, lang, username)
            logging.debug(f"[{username}]. Success. {file.filename} is loaded.")
        return ('', 200)
    #return documents list
    files = {
        "items": {
            lang:
            helper.get_files_list(
                os.path.join(con.UPLOAD_FOLDER, username, con.RAW_FOLDER,
                             lang))
        }
    }
    return files
Beispiel #3
0
def download_processsing(username, lang_from, lang_to, file_id, lang,
                         file_format, threshold):
    logging.debug(
        f"[{username}]. Downloading {lang_from}-{lang_to} {file_id} {lang} result document."
    )
    processing_folder = os.path.join(con.UPLOAD_FOLDER, username,
                                     con.PROCESSING_FOLDER, lang_from, lang_to)
    files = helper.get_files_list(processing_folder)
    processing_file = os.path.join(processing_folder, files[file_id])
    if not helper.check_file(processing_folder, files, file_id):
        abort(404)

    timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
    download_folder = os.path.join(con.UPLOAD_FOLDER, username,
                                   con.DOWNLOAD_FOLDER)
    helper.check_folder(download_folder)
    download_file = os.path.join(
        download_folder, "{0}_{1}_{2}.{3}".format(
            os.path.splitext(files[file_id])[0], lang, timestamp, file_format))

    logging.debug(
        f"[{username}]. Preparing file for downloading {download_file}.")

    if file_format == con.FORMAT_TMX:
        output.save_tmx(processing_file, download_file, lang_from, lang_to,
                        threshold)
    elif file_format == con.FORMAT_PLAIN:
        output.save_plain_text(processing_file,
                               download_file,
                               first_lang=lang == lang_from,
                               threshold=threshold)

    logging.debug(
        f"[{username}]. File {download_file} prepared. Sent to user.")
    return send_file(download_file, as_attachment=True)
Beispiel #4
0
def stop_alignment(username, lang_from, lang_to, file_id):
    logging.debug(
        f"[{username}]. Stopping alignment for {lang_from}-{lang_to} {file_id}."
    )
    processing_folder = os.path.join(con.UPLOAD_FOLDER, username,
                                     con.PROCESSING_FOLDER, lang_from, lang_to)
    files = helper.get_files_list(processing_folder)
    processing_file = os.path.join(processing_folder, files[file_id])
    if not helper.check_file(processing_folder, files, file_id):
        abort(404)
    state.destroy_processing_state(processing_file)
    return ('', 200)
Beispiel #5
0
def download_splitted(username, lang, id):
    logging.debug(f"[{username}]. Downloading {lang} {id} splitted document.")
    files = helper.get_files_list(
        os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang))
    if len(files) < id + 1:
        abort(404)
    path = os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang,
                        files[id])
    if not os.path.isfile(path):
        logging.debug(f"[{username}]. Document not found.")
        abort(404)
    logging.debug(f"[{username}]. Document found. Path: {path}. Sent to user.")
    return send_file(path, as_attachment=True)
Beispiel #6
0
def main():

    app_config = AppConfig()
    app_config.setup_dirs(app_config.template_dir, app_config.data_dir,
                          app_config.log_dir)

    # print(json.dumps(app_config.cfg, indent=4))

    source_loc = os.path.expanduser(app_config.cfg.get("ws_location"))
    print("Windows Spotlight Source Location:", source_loc)

    target_loc = os.path.expanduser(app_config.cfg.get("target_location"))
    print("Target Location:", target_loc)

    source_files = helper.get_files_list(source_loc)
    print("Total {} files found in source location.".format(len(source_files)))

    device_properties = app_config.cfg.get("device_properties")
    valid_dims = [(v.get("image_width"), v.get("image_height"))
                  for k, v in device_properties.items()]
    print("List of valid dimensions:", valid_dims)

    valid_source_set = helper.get_valid_images(
        source_files, valid_dims)  # List[Tuple[str, str]]
    print("Valid source files:", len(valid_source_set))

    valid_source_files = []
    target_files = []

    for img_file, img_type in valid_source_set:
        valid_source_files.append(img_file)

        target_file = os.path.join(
            target_loc, img_type,
            helper.add_file_extension(os.path.basename(img_file), ext='.jpg'))
        target_files.append(target_file)

    valid_file_set = helper.get_valid_target_files(
        valid_source_files, target_files)  # List[Tuple[str, str]]
    print("Files to extract:", len(valid_file_set))

    if len(valid_file_set) > 0:
        helper.transfer_files(valid_file_set, mode='copy')
    else:
        print("No new files to transfer!")
Beispiel #7
0
def edit_processing(username, lang_from, lang_to, file_id):
    processing_folder = os.path.join(con.UPLOAD_FOLDER, username,
                                     con.PROCESSING_FOLDER, lang_from, lang_to)
    files = helper.get_files_list(processing_folder)
    processing_file = os.path.join(processing_folder, files[file_id])
    if not helper.check_file(processing_folder, files, file_id):
        abort(404)
    logging.debug(f"[{username}]. Editing file. {processing_file}.")
    if not os.path.isfile(processing_file):
        abort(404)

    line_id, line_id_is_int = helper.tryParseInt(
        request.form.get("line_id", -1))
    text = request.form.get("text", '')
    text_type = request.form.get("text_type", con.TYPE_TO)
    if line_id_is_int and line_id >= 0:
        editor.edit_doc(processing_file, line_id, text, text_type)
    else:
        abort(400)
    return ('', 200)
Beispiel #8
0
def get_processing(username, lang_from, lang_to, file_id, count, page):
    processing_folder = os.path.join(con.UPLOAD_FOLDER, username,
                                     con.PROCESSING_FOLDER, lang_from, lang_to)
    files = helper.get_files_list(processing_folder)
    processing_file = os.path.join(processing_folder, files[file_id])
    if not helper.check_file(processing_folder, files, file_id):
        abort(404)

    res = []
    lines_count = 0
    shift = (page - 1) * count
    for line_from_orig, line_from, line_to, candidates in helper.read_processing(
            processing_file):
        lines_count += 1
        if count > 0 and (lines_count <= shift or lines_count > shift + count):
            continue
        res.append({
            "text":
            line_from[0].text.strip(),
            "line_id":
            line_from[0].line_id,
            "text_orig":
            line_from_orig.text.strip(),
            "trans": [{
                "text": t[0].text.strip(),
                "line_id": t[0].line_id,
                "sim": t[1]
            } for t in candidates],
            "selected": {
                "text": line_to[0].text.strip(),
                "line_id": line_to[0].line_id,
                "sim": line_to[1]
            }
        })
    total_pages = (lines_count //
                   count) + (1 if lines_count % count != 0 else 0)
    meta = {"page": page, "total_pages": total_pages}
    return {"items": res, "meta": meta}
Beispiel #9
0
def splitted(username, lang, id, count, page):
    files = helper.get_files_list(
        os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang))
    if len(files) < id + 1:
        return con.EMPTY_LINES
    path = os.path.join(con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang,
                        files[id])
    if not os.path.isfile(path):
        return {"items": {lang: []}}

    lines = []
    lines_count = 0
    symbols_count = 0
    shift = (page - 1) * count

    with open(path, mode='r', encoding='utf-8') as input_file:
        while True:
            line = input_file.readline()
            if not line:
                break
            lines_count += 1
            symbols_count += len(line)
            if count > 0 and (lines_count <= shift
                              or lines_count > shift + count):
                continue
            lines.append((line, lines_count))

    total_pages = (lines_count //
                   count) + (1 if lines_count % count != 0 else 0)
    meta = {
        "lines_count": lines_count,
        "symbols_count": symbols_count,
        "page": page,
        "total_pages": total_pages
    }
    return {"items": {lang: lines}, "meta": {lang: meta}}