parser.add_argument("-a", action="store_true", help="all editions")
    parser.add_argument("-c",
                        default="config.ini",
                        help="path to config.ini file")
    parser.add_argument(
        "-f",
        default="",
        help="editions file, calculate for editions in this file")

    args = parser.parse_args()

    cparser = configparser.ConfigParser()
    cparser.read(args.c)
    elastic_search_index_files_path = cparser['section']['elastic_dir']

    utils.setup(args.c)
    align_reader = GeneralAlignReader()

    if args.b:
        for edition in align_reader.bert_files:
            create_json_file_for_edition(edition,
                                         elastic_search_index_files_path)
    elif args.n:
        for lang in align_reader.all_langs:
            for edition in align_reader.lang_files[lang]:
                if edition not in align_reader.bert_files:
                    create_json_file_for_edition(
                        edition, elastic_search_index_files_path)
    elif args.e != "":
        edition = args.e
        if edition in align_reader.bert_files:
Example #2
0
    #os.remove(intersect_file)
    log_state(src_lang_name, trg_lang_name, "end")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="extract the alignments for languages mentioned in lang_files.txt file."\
     "-s start number, -e end number, -o alignment output dir, -i alignments index dir, -a aligner (sim_align, other)",
    epilog="example: python eflomal_align_maker.py -s 0 -e 200, -o /output -i /index -a other")
    parser.add_argument("-s", default=0)
    parser.add_argument("-e", default=sys.maxsize)
    parser.add_argument("-o", default="")
    parser.add_argument("-i", default="")
    parser.add_argument("-a", default="sim_align")
    parser.add_argument("-w", default=1)

    utils.setup(os.environ['CONFIG_PATH'])
    align_reader = GeneralAlignReader()

    args = parser.parse_args()
    if args.o == "" or args.i == "":
        print("Please specify index and output dirs")
        exit()

    format = "%(asctime)s: %(message)s"
    logging.basicConfig(format=format, level=logging.INFO, datefmt="%H:%M:%S")

    aligner = args.a
    output_path = args.o
    index_path = args.i

    lang_files = align_reader.lang_files
Example #3
0
from flask import Flask
from flask_compress import Compress
from config import Config
from flask_cors import CORS
from app import utils

app = Flask(__name__)
CORS(app)  # required for Cross-origin Request Sharing
app._static_folder = "../static"
app.config.from_object(Config)
Compress(app)

utils.setup(app.config['CONFIG_PATH'])
from app import align
Example #4
0
ces_cache = Cache(read_CES_senteces_file)     
lang_files = {}
langs_order = []
file_edition_mapping = {}
bert_100 = []
prefixes = {}
numversesplit = {}

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Create ParCourE format corpora from CES format.", 
	epilog="example: python -m convert_corpus_from_CES_format -c config.ini")

    parser.add_argument("-c", default="")


	
    args = parser.parse_args()
    if args.c == "":
        print("please specify config file")
        exit()

    read_config(args.c)
    create_dirs()
    save_config(args.c)
    setup(args.c)


    for file in CES_alignment_files:
        process_alignment_file(file)
    
    save_PC_config_files()