Python getの例、documentstore_migracao.config.get Pythonの例

コード例 #1

0

ファイルを表示

def pack_article_xml(file_xml_path):
    original_filename, ign = files.extract_filename_ext_by_path(file_xml_path)

    obj_xml = xml.file2objXML(file_xml_path)

    sps_package = SPS_Package(obj_xml, original_filename)

    SPS_PKG_PATH = config.get("SPS_PKG_PATH")
    INCOMPLETE_SPS_PKG_PATH = config.get("INCOMPLETE_SPS_PKG_PATH")

    pkg_path = os.path.join(SPS_PKG_PATH, original_filename)
    bad_pkg_path = os.path.join(INCOMPLETE_SPS_PKG_PATH, original_filename)

    asset_replacements = list(set(sps_package.replace_assets_names()))
    logger.info("%s possui %s ativos digitais", file_xml_path,
                len(asset_replacements))

    renditions, renditions_metadata = sps_package.get_renditions_metadata()
    logger.info("%s possui %s renditions", file_xml_path, len(renditions))

    package_path = packing_assets(asset_replacements + renditions, pkg_path,
                                  bad_pkg_path, sps_package.package_name)

    files.write_file(os.path.join(package_path, "manifest.json"),
                     json.dumps(renditions_metadata))

    xml.objXML2file(
        os.path.join(package_path, "%s.xml" % (sps_package.package_name)),
        obj_xml)

コード例 #2

0

ファイルを表示

def convert_article_ALLxml(spy=False):
    """Converte todos os arquivos HTML/XML que estão na pasta fonte."""

    logger.debug("Starting XML conversion, it may take sometime.")
    logger.warning("If you are facing problems with Python crashing during "
                   "conversion try to export this environment "
                   "variable: `OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES`")

    xmls = [
        os.path.join(config.get("SOURCE_PATH"), xml)
        for xml in files.xml_files_list(config.get("SOURCE_PATH"))
    ]

    jobs = [{"file_xml_path": xml, "spy": spy} for xml in xmls]

    with tqdm(total=len(xmls)) as pbar:

        def update_bar(pbar=pbar):
            pbar.update(1)

        def log_exceptions(exception, job, logger=logger):
            logger.error(
                "Could not convert file '%s'. The exception '%s' was raised.",
                job["file_xml_path"],
                exception,
            )

        DoJobsConcurrently(
            convert_article_xml,
            jobs=jobs,
            executor=concurrent.futures.ProcessPoolExecutor,
            max_workers=int(config.get("PROCESSPOOL_MAX_WORKERS")),
            exception_callback=log_exceptions,
            update_bar=update_bar,
        )

コード例 #3

0

ファイルを表示

def pack_article_xml(file_xml_path):
    original_filename, ign = files.extract_filename_ext_by_path(file_xml_path)

    obj_xml = xml.file2objXML(file_xml_path)

    sps_package = SPS_Package(obj_xml, original_filename)

    SPS_PKG_PATH = config.get("SPS_PKG_PATH")
    INCOMPLETE_SPS_PKG_PATH = config.get("INCOMPLETE_SPS_PKG_PATH")

    pkg_path = os.path.join(SPS_PKG_PATH, original_filename)
    bad_pkg_path = os.path.join(INCOMPLETE_SPS_PKG_PATH, original_filename)

    files.make_empty_dir(pkg_path)

    asset_replacements = list(set(sps_package.replace_assets_names()))
    logger.info("%s possui %s ativos digitais", file_xml_path,
                len(asset_replacements))

    package_path = packing_assets(asset_replacements, pkg_path, bad_pkg_path,
                                  sps_package.package_name)

    xml.objXML2file(
        os.path.join(package_path, "%s.xml" % (sps_package.package_name)),
        obj_xml)

コード例 #4

0

ファイルを表示

def get_and_write(pid, stage_path, poison_pill):
    def save_file(stage_path, file_path, documents_pid, article_content):
        logger.debug("\t Salvando arquivo '%s'", file_path)
        files.write_file(file_path, article_content)
        files.register_latest_stage(stage_path, documents_pid)

    if poison_pill.poisoned:
        return

    documents_pid = pid.strip()

    logger.debug("\t coletando dados do Documento '%s'", documents_pid)
    xml_article = article.ext_article_txt(documents_pid)
    if xml_article:
        save_file(
            stage_path,
            os.path.join(config.get("SOURCE_PATH"), "%s.xml" % documents_pid),
            documents_pid,
            xml_article,
        )

    json_article = article.ext_article_json(documents_pid)
    if json_article:
        save_file(
            stage_path,
            os.path.join(config.get("SOURCE_PATH"), "%s.json" % documents_pid),
            documents_pid,
            json_article,
        )

コード例 #5

0

ファイルを表示

def download_asset(old_path, new_fname, dest_path):
    """Returns msg, if error"""
    if old_path.startswith("http"):
        location = old_path
    else:
        try:
            location = urljoin(config.get("STATIC_URL_FILE"), old_path.strip())
        except ValueError as exc:
            return 'cannot join URL parts "%s" and "%s": %s' % (
                config.get("STATIC_URL_FILE"),
                old_path,
                exc,
            )

    # Verifica se o arquivo ja foi baixado anteriormente
    filename_m, ext_m = files.extract_filename_ext_by_path(old_path)
    dest_path_file = os.path.join(dest_path,
                                  "%s%s" % (new_fname.strip(), ext_m))
    if os.path.exists(dest_path_file):
        logger.info("Arquivo ja baixado: %s", dest_path_file)
        return

    try:
        request_file = request.get(location,
                                   timeout=int(config.get("TIMEOUT") or 10))
    except request.HTTPGetError as e:
        try:
            msg = str(e)
        except TypeError:
            msg = "Unknown error"
        logger.error(e)
        return msg
    else:
        files.write_file_binary(dest_path_file, request_file.content)

コード例 #6

0

ファイルを表示

def ext_identifiers():

    journals_id = request.get(
        "%s/journal/identifiers/" % config.get("AM_URL_API"),
        params={
            "collection": config.get("SCIELO_COLLECTION")
        },
    ).json()
    return journals_id

コード例 #7

0

ファイルを表示

def list_converted_xml_view(request):
    list_files_xmls = files.xml_files_list(config.get("CONVERSION_PATH"))
    list_files_xmls += files.xml_files_list(config.get("VALID_XML_PATH"))
    xmls = Page(
        list_files_xmls,
        page=int(request.params.get("page", 1)),
        items_per_page=20,
        item_count=len(list_files_xmls),
    )
    return {"xmls": xmls, "page_title": "Lista de XMLS Convertidos"}

コード例 #8

0

ファイルを表示

def ext_journal(issn):

    journal = request.get(
        "%s/journal" % config.get("AM_URL_API"),
        params={
            "collection": config.get("SCIELO_COLLECTION"),
            "issn": issn
        },
    ).json()
    return Journal(journal[0])

コード例 #9

0

ファイルを表示

ファイル: article.py プロジェクト: jamilatta/document-store-migracao

def ext_identifiers(issn_journal):
    articles_id = request.get(
        "%s/article/identifiers/" % config.get("AM_URL_API"),
        params={
            "collection": config.get("SCIELO_COLLECTION"),
            "issn": issn_journal
        },
    )
    if articles_id:
        return articles_id.json()

コード例 #10

0

ファイルを表示

def convert_article_ALLxml():

    logger.info("Iniciando Conversão do xmls")
    list_files_xmls = files.xml_files_list(config.get("SOURCE_PATH"))
    for file_xml in tqdm(list_files_xmls):

        try:
            convert_article_xml(
                os.path.join(config.get("SOURCE_PATH"), file_xml))
        except Exception as ex:
            logger.error(file_xml)
            logger.exception(ex)

コード例 #11

0

ファイルを表示

def get_asset(old_path, new_fname, dest_path):
    """Obtém os ativos digitais no sistema de arquivo e realiza a persistência
    no ``dest_path``.

    Args:
        old_path: Caminho do ativo
        new_fname: Novo nome para o ativo
        dest_path: Pasta de destino

    Retornos:
        Sem retornos.

        Persiste o ativo no ``dest_path``

    Exceções:
        IOError
        TypeError
    """
    if old_path.startswith("http"):
        asset_path = urlparse(old_path).path
    else:
        asset_path = old_path

    asset_path = asset_path.strip('/')

    # Verifica se o arquivo ja foi baixado anteriormente
    filename_m, ext_m = files.extract_filename_ext_by_path(old_path)
    dest_path_file = os.path.join(dest_path,
                                  "%s%s" % (new_fname.strip(), ext_m))
    if os.path.exists(dest_path_file):
        logger.debug("Arquivo já armazenado na pasta de destino: %s",
                     dest_path_file)
        return

    paths = [
        os.path.join(config.get('SOURCE_IMG_FILE'), asset_path),
        os.path.join(config.get('SOURCE_PDF_FILE'), asset_path),
    ]
    if (filename_m, ext_m) == ("seta", ".gif"):
        seta_path = os.path.join(config.get('SOURCE_IMG_FILE'), "img",
                                 "seta.gif")
        paths.insert(0, seta_path)

    try:
        for path in paths:
            path = find_file(path)
            if path:
                break
        content = files.read_file_binary(path)
    except (TypeError, FileNotFoundError, IOError):
        raise AssetNotFoundError(f"Not found {old_path}")
    else:
        files.write_file_binary(dest_path_file, content)

コード例 #12

0

ファイルを表示

ファイル: packing.py プロジェクト: cesarbruschetta/document-store-migracao

def pack_article_ALLxml():

    logger.info("Empacotando os documentos XML")
    list_files_xmls = files.xml_files_list(config.get("VALID_XML_PATH"))
    for file_xml in tqdm(list_files_xmls):

        try:
            pack_article_xml(os.path.join(config.get("VALID_XML_PATH"), file_xml))

        except (PermissionError, OSError, etree.Error) as ex:
            logger.error("Falha no empacotamento de %s" % file_xml)
            logger.exception(ex)

コード例 #13

0

ファイルを表示

ファイル: article.py プロジェクト: jamilatta/document-store-migracao

def ext_article(code, **ext_params):
    params = ext_params
    params.update({
        "collection": config.get("SCIELO_COLLECTION"),
        "code": code
    })
    try:
        article = request.get("%s/article" % config.get("AM_URL_API"),
                              params=params)
    except request.HTTPGetError:
        logger.error("Erro coletando dados do artigo PID %s" % code)
    else:
        return article

コード例 #14

0

ファイルを表示

def ext_journal(issn):
    try:
        journal = request.get(
            "%s/journal" % config.get("AM_URL_API"),
            params={
                "collection": config.get("SCIELO_COLLECTION"),
                "issn": issn
            },
        )
    except request.HTTPGetError:
        logger.error("Journal nao encontrado: %s: %s" %
                     (config.get("SCIELO_COLLECTION"), issn))
    else:
        return Journal(journal.json()[0])

コード例 #15

0

ファイルを表示

def reading_article_ALLxml():

    logger.info("Iniciando Leituras do xmls")
    list_files_xmls = files.list_dir(config.get("CONVERSION_PATH"))
    for file_xml in list_files_xmls:

        try:
            reading_article_xml(
                os.path.join(config.get("CONVERSION_PATH"), file_xml),
                move_success=False,
            )

        except Exception as ex:
            logger.error(file_xml)
            logger.exception(ex)

コード例 #16

0

ファイルを表示

ファイル: issue.py プロジェクト: robertatakenaka/document-store-migracao

def ext_issue(code, **ext_params):

    issue = request.get(
        "%s/issue" % config.AM_URL_API,
        params={"collection": config.get("SCIELO_COLLECTION"), "code": code},
    ).json()
    obj_issue = Issue(issue)

コード例 #17

0

ファイルを表示

ファイル: inserting.py プロジェクト: cesarbruschetta/document-store-migracao

def register_documents(session_db, storage, documents_sorter, folder) -> None:
    """Realiza o processo de importação de pacotes SPS no diretório indicado. O
    processo de importação segue as fases: registro de assets/renditions no
    object storage informado, registro do manifesto na base de dados do Kernel
    informada e ordenação dos documentos em um `documents_sorter` para posterior
    associação aos seus respectivos fascículos"""

    err_filename = os.path.join(config.get("ERRORS_PATH"),
                                "insert_documents.err")

    for path, _, sps_files in os.walk(folder):
        if not sps_files:
            continue

        try:
            xml = list(filter(lambda f: f.endswith(".xml"), sps_files))[0]
            xml_path = os.path.join(path, xml)
            constructor.article_xml_constructor(xml_path, path, False)
            registration_result = register_document(path, session_db, storage)

            if registration_result:
                document_xml, document_id = registration_result
                documents_sorter.insert_document(document_id, document_xml)

        except (IndexError, ValueError, TypeError, exceptions.XMLError) as ex:
            msg = "Falha ao registrar documento %s: %s" % (path, ex)
            logger.error(msg)
            files.write_file(err_filename, msg, "a")

コード例 #18

0

ファイルを表示

    def test_register_documents_in_documents_bundle_no_issn_in_document(
            self, mk_read_json_file, mk_open):
        documents = [
            {
                "pid_v3": "JwqGdMDrdcV3Z7MFHgtKvVk",
                "acron": "aiss",
                "eissn": None,
                "issn": None,
                "number": "4",
                "order": "00349",
                "pid": "S0021-25712009000400001",
                "supplement": None,
                "volume": "45",
                "year": "2009",
            },
        ]
        journals = [SAMPLES_JOURNAL]
        mk_read_json_file.return_value = journals
        mock_file = MagicMock()
        mock_file.readlines.return_value = [
            json.dumps(document) for document in documents
        ]
        mk_open.return_value.__enter__.return_value = mock_file
        mk_open.return_value.__exit__.return_value = Mock(return_value=False)

        inserting.register_documents_in_documents_bundle(
            Session(), "/tmp/documents.json", "/tmp/journals.json")

        err_filename = os.path.join(config.get("ERRORS_PATH"),
                                    "insert_documents_in_bundle.err")
        self.assertEqual(os.path.isfile(err_filename), True)
        with open(err_filename) as fp:
            content = fp.read()
            self.assertEqual(content, "JwqGdMDrdcV3Z7MFHgtKvVk\n")

コード例 #19

0

ファイルを表示

ファイル: extracted.py プロジェクト: jamilatta/document-store-migracao

def extract_all_data(list_documents_pids: List[str]):
    """Extrai documentos XML a partir de uma lista de PIDS
    de entrada"""

    pids_to_extract, pids_extracteds, stage_path = files.fetch_stages_info(
        list_documents_pids, __name__)

    logger.info("Iniciando extração dos Documentos")
    count = 0

    try:
        for documents_pid in tqdm(
                iterable=pids_to_extract,
                initial=len(pids_extracteds),
                total=len(list_documents_pids),
        ):
            documents_pid = documents_pid.strip()

            logger.debug("\t coletando dados do Documento '%s'", documents_pid)
            xml_article = article.ext_article_txt(documents_pid)
            if xml_article:
                count += 1

                file_path = os.path.join(config.get("SOURCE_PATH"),
                                         "%s.xml" % documents_pid)
                logger.debug("\t Salvando arquivo '%s'", file_path)
                files.write_file(file_path, xml_article)
                files.register_latest_stage(stage_path, documents_pid)
    except KeyboardInterrupt:
        ...

    logger.info("\t Total de %s artigos", count)

コード例 #20

0

ファイルを表示

async def fetch_articles(session, pid, cut_off_mark, output_filepath):
    """
    Obtém os artigos e gera um dicionário contendo as informação necessária para
    saída em JSON.

    A variável comp_data terá a seguinte estrutura:

        {
         'classic': 'Introduction One of the major current public health problems remains sepsis, which persists with hig',
         'new': 'One of the major current public health problems remains sepsis, which persists with high hospital mo',
         'url_classic': 'http://www.scielo.br/scielo.php?script=sci_arttext&pid=S0102-86502017000300175',
         'url_new': 'http://new.scielo.br/article/S0102-86502017000300175',
         'similarity': '72.22%',
         'pid_v2': 'S0102-86502017000300175',
         'similarity_technique': 'jaccard',
         'cut_off_mark': 90,
         'found_text_classic': true,
         'found_text_new': false
        }

    Args:
        session: http session object(aiohttp), sessão http
        pid: pid do artigo.
        cut_off_mark: Régua de similiridade.
        output_filepath: Caminho do arquivo de saída.
    Retornos:
        Não há retorno
    Exceções:
        Não lança exceções.
    """
    comp_data = {}

    for inst in config.get("SITE_INSTANCES"):
        html = await fetch_article(session, pid, inst.get("url"))
        comp_data["%s" % inst.get("name")] = extract(
            html,
            inst.get("html"),
            inst.get("remove_tags"),
            inst.get("remove_texts"),
            inst.get("compare_tags"),
        )
        comp_data["url_%s" % inst.get("name")] = inst.get("url").format(pid)

    sim, percent = sim_jaccard(
        normalize(comp_data["classic"]), normalize(comp_data["new"])
    )
    comp_data["similarity"] = percent

    comp_data["found_text_classic"] = bool(comp_data["classic"])
    comp_data["found_text_new"] = bool(comp_data["new"])

    if (sim * 100) > cut_off_mark:
        del comp_data["classic"]
        del comp_data["new"]

    comp_data["pid_v2"] = pid
    comp_data["similarity_technique"] = "jaccard"
    comp_data["cut_off_mark"] = cut_off_mark

    dump_jsonl(output_filepath, [json.dumps(comp_data)])

コード例 #21

0

ファイルを表示

ファイル: validation.py プロジェクト: joffilyfe/document-store-migracao

def validate_article_xml(file_xml_path, print_error=True):

    result = {}
    logger.debug(file_xml_path)
    try:
        xmlvalidator = XMLValidator.parse(file_xml_path)
        if config.get("VALIDATE_ALL") == "TRUE":
            is_valid, errors = xmlvalidator.validate_all()
        else:
            is_valid, errors = xmlvalidator.validate()
    except (exceptions.XMLSPSVersionError, etree.LxmlError) as e:
        result[str(e)] = {
            "count": 1,
            "lineno": [1],
            "message": [str(e)],
            "filename": {file_xml_path},
        }
        return result

    if not is_valid:
        for error in errors:
            if print_error:
                logger.error("%s - %s - %s", error.level, error.line,
                             error.message)

            message = error.message[:80]
            data = {
                "count": 1,
                "lineno": [error.line],
                "message": [error.message],
                "filename": {file_xml_path},
            }
            dicts.merge(result, message, data)

    return result

コード例 #22

0

ファイルを表示

ファイル: logger.py プロジェクト: robertatakenaka/document-store-migracao

def configure_logger():
    l_config.dictConfig({
        "version": 1,
        "formatters": {
            "default": {
                "format":
                "%(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s",
                "datefmt": "%Y-%m-%d %H:%M:%S",
            }
        },
        "handlers": {
            "console": {
                "level": "DEBUG",
                "class": "logging.StreamHandler",
                "formatter": "default",
                "stream": "ext://sys.stdout",
            },
            "file": {
                "level": "ERROR",
                "class": "logging.handlers.RotatingFileHandler",
                "formatter": "default",
                "filename": os.path.join(config.get("LOGGER_PATH"),
                                         "migracao.log"),
                "maxBytes": 10 * 1024,
                "backupCount": 3,
            },
        },
        "loggers": {
            "": {
                "level": "DEBUG",
                "handlers": ["console", "file"]
            }
        },
        "disable_existing_loggers": False,
    })

コード例 #23

0

ファイルを表示

    def test_register_documents_in_documents_bundle(
            self, mk_link_documents_bundle_with_documents, mk_read_json_file):
        documents = {
            "JwqGdMDrdcV3Z7MFHgtKvVk": {
                "acron": "aiss",
                "eissn": None,
                "issn": "0036-3634",
                "number": "04",
                "order": "00349",
                "pid": "S0021-25712009000400001",
                "pissn": "0036-3634",
                "supplement": None,
                "volume": "45",
                "year": "2009",
            }
        }
        journals = [SAMPLES_JOURNAL]
        mk_read_json_file.side_effect = [journals, documents]

        err_filename = os.path.join(config.get("ERRORS_PATH"),
                                    "insert_documents_in_bundle.err")

        session_db = Session()
        manifest = inserting.ManifestDomainAdapter(SAMPLE_ISSUES_KERNEL[0])
        session_db.documents_bundles.add(manifest)

        inserting.register_documents_in_documents_bundle(
            session_db, "/tmp/documents.json", "/tmp/journals.json")

        self.assertEqual(os.path.isfile(err_filename), True)
        with open(err_filename) as fp:
            content = fp.read()

            self.assertEqual(content, "0036-3634-2009-v45-n4\n")

コード例 #24

0

ファイルを表示

def convert_article_xml(file_xml_path):

    obj_xmltree = xml.loadToXML(file_xml_path)
    obj_xml = obj_xmltree.getroot()

    obj_xml.set("specific-use", "sps-1.9")
    obj_xml.set("dtd-version", "1.1")

    xml_sps = SPS_Package(obj_xmltree)
    # CONVERTE O BODY DO AM PARA SPS
    xml_sps.transform_body()
    # CONVERTE PUB-DATE PARA SPS 1.9
    xml_sps.transform_pubdate()

    # CONSTROI O SCIELO-id NO XML CONVERTIDO
    xml_sps.create_scielo_id()

    # Remove a TAG <counts> do XML
    xml_sps.transform_article_meta_count()

    languages = "-".join(xml_sps.languages)
    _, fname = os.path.split(file_xml_path)
    fname, fext = fname.rsplit(".", 1)

    new_file_xml_path = os.path.join(config.get("CONVERSION_PATH"),
                                     "%s.%s.%s" % (fname, languages, fext))

    xml.objXML2file(new_file_xml_path, xml_sps.xmltree, pretty=True)

コード例 #25

0

ファイルを表示

def run(path: str, output_file: str):
    """Roda um subprocesso com o isis2json de target para extrair dados
    de uma base ISIS em formato MST. O resultado da extração
    é armazenado em formato JSON em arquivo determinado pelo
    parâmetro output_file.
    """

    command = "java -cp %s org.python.util.jython %s -t 3 -p 'v' -o %s %s" % (
        config.get("CLASSPATH"),
        ISIS2JSON_PATH,
        output_file,
        path,
    )

    try:
        logger.debug("Extracting database file: %s" % path)
        subprocess.run(
            shlex.split(command),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            check=True,
        )
        logger.debug("Writing extracted result as JSON file in: %s" %
                     output_file)
    except Exception as exc:
        raise exceptions.ExtractError(str(exc)) from None

コード例 #26

0

ファイルを表示

    def setUp(self):
        self.data = dict([
            ("eissn", "1234-5678"),
            ("pissn", "0001-3714"),
            ("issn", "0987-0987"),
            ("year", "1998"),
            ("volume", "29"),
            ("number", "3"),
            ("supplement", None),
        ])
        self.aop_data = dict([("eissn", "0001-3714"), ("issn", "0001-3714"),
                              ("year", "2019")])
        self.bundle_id = "0001-3714-1998-v29-n3"
        self.issn = "0987-0987"

        if not os.path.isdir(config.get("ERRORS_PATH")):
            os.makedirs(config.get("ERRORS_PATH"))

コード例 #27

0

ファイルを表示

def download_asset(old_path, new_fname, dest_path):
    """Returns msg, if error"""
    location = urljoin(config.get("STATIC_URL_FILE"), old_path)
    try:
        request_file = request.get(location,
                                   timeout=int(config.get("TIMEOUT") or 10))
    except request.HTTPGetError as e:
        try:
            msg = str(e)
        except TypeError:
            msg = "Unknown error"
        logger.error(e)
        return msg
    else:
        filename_m, ext_m = files.extract_filename_ext_by_path(old_path)
        files.write_file_binary(
            os.path.join(dest_path, "%s%s" % (new_fname, ext_m)),
            request_file.content)

コード例 #28

0

ファイルを表示

def pack_article_ALLxml():
    """Gera os pacotes SPS a partir de um lista de XML validos.

    Args:
       Não há argumentos

    Retornos:
        Sem retornos.

        Persiste o XML no ``package_path``

    Exemplo:
        pack_article_ALLxml()

    Exceções:
        Não lança exceções.
    """

    xmls = [
        os.path.join(config.get("VALID_XML_PATH"), xml)
        for xml in files.xml_files_list(config.get("VALID_XML_PATH"))
    ]

    jobs = [{"file_xml_path": xml} for xml in xmls]

    with tqdm(total=len(xmls), initial=0) as pbar:

        def update_bar(pbar=pbar):
            pbar.update(1)

        def log_exceptions(exception, job, logger=logger):
            logger.error(
                "Could not pack file '%s'. The exception '%s' was raised.",
                job["file_xml_path"],
                exception,
            )

        DoJobsConcurrently(
            pack_article_xml,
            jobs=jobs,
            max_workers=int(config.get("THREADPOOL_MAX_WORKERS")),
            exception_callback=log_exceptions,
            update_bar=update_bar,
        )

コード例 #29

0

ファイルを表示

def migrate_logos_to_website(session, website_img_dir):
    """Read all Journals from Website MongoDB collection and, for each one, get journal
    logo from current website, save to website media directory, create an image record
    in SQLite Image Table and update journal document with logo URL.

    session: SQLite DB session created in `connect_to_databases`
    website_img_dir: Website media directory
    """
    journals = Journal.objects.all()
    if len(journals) == 0:
        raise exceptions.NoJournalInWebsiteError(
            "No journals in Website Database. Migrate Isis Journals first.")

    for journal in journals:
        logger.debug("Journal acronym %s", journal.acronym)
        logo_old_filename = "glogo.gif"
        logo_url = "{}img/revistas/{}/glogo.gif".format(
            config.get("STATIC_URL_FILE"), journal.acronym)
        try:
            logger.debug("Getting Journal logo in %s", logo_url)
            request_file = request.get(logo_url,
                                       timeout=int(
                                           config.get("TIMEOUT") or 10))
        except request.HTTPGetError as e:
            try:
                msg = str(e)
            except TypeError:
                msg = "Unknown error"
            logger.error(msg)
        else:
            logo_filename = "_".join([journal.acronym, logo_old_filename])
            dest_path_file = os.path.join(website_img_dir, logo_filename)
            logger.debug("Saving Journal logo in %s", dest_path_file)
            files.write_file_binary(dest_path_file, request_file.content)

            image_path = "images/%s" % logo_filename
            logger.debug("Saving logo as image in %s", image_path)
            session.add(Image(name=logo_filename, path=image_path))
            session.commit()

            journal.logo_url = "/media/%s" % image_path
            logger.debug("Updating Journal with logo_url %s", journal.logo_url)
            journal.save()

コード例 #30

0

ファイルを表示

ファイル: inserting.py プロジェクト: jamilatta/document-store-migracao

def register_documents(session_db, storage, documents_sorter) -> None:
    logger.info("Iniciando Envio dos do xmls")
    list_folders = files.list_files(config.get("SPS_PKG_PATH"))

    err_filename = os.path.join(config.get("ERRORS_PATH"),
                                "insert_documents.err")

    for folder in list_folders:
        try:
            document_path = os.path.join(config.get("SPS_PKG_PATH"), folder)
            registration_result = register_document(document_path, session_db,
                                                    storage)
            if registration_result:
                document_xml, document_id = registration_result
                documents_sorter.insert_document(document_id, document_xml)

        except Exception as ex:
            msg = "Falha ao registrar documento %s: %s" % (document_path, ex)
            logger.error(msg)
            files.write_file(err_filename, msg, "a")