예제 #1
0
def task_clean_etl_models_by_stage(stage='all', model_name='all'):
    """
    task que enfilera funções para remover modelos da fase (stage) indicada.
    Param:
    - `model_name`: modelo a ser removido: "collection", "journal", etc.
    """
    r_queues = RQueues()
    if stage == 'all':
        stages_list = STAGE_LIST
    else:
        stages_list = [stage, ]

    if model_name == 'all':
        model_name_list = ID_MODEL_CLASS.keys()
    else:
        model_name_list = [model_name]

    for model_name_ in model_name_list:
        for stage_ in stages_list:
            msg = u'Enfilerando task para remover o modelo: %s da stage: %s' % (model_name_, stage_)
            logger.info(msg)
            create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg)

            r_queues.enqueue('sync_ids', model_name_, delete_etl_models, stage_, model_name_)

            msg = u'Fim: Enfilerando task para remover o modelo: %s da stage: %s' % (model_name_, stage_)
            logger.info(msg)
            create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg)
예제 #2
0
def task_consume_diff_update(stage, model_name):
    """
    Task que consume os registros UPDATE dos diff filtrando pelos parametros:
    - @param stage: fase do ETL
    - @param model_name: nome do modelo ETL
    """

    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)

    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)

    action = 'update'
    SLICE_SIZE = 1000
    r_queues = RQueues()
    get_db_connection()
    diff_class = ETL_DIFFERS_BY_MODEL[model_name]
    diff_class_instance = diff_class()
    full_uuids_to_process = diff_class_instance.get_uuids_unapplied(stage, action)
    list_of_list_of_uuids = list(chunks(full_uuids_to_process, SLICE_SIZE))

    for list_of_uuids in list_of_list_of_uuids:
        list_of_uuids_flat = [str(uuid) for uuid in list_of_uuids]
        logger.info(u'enfilerando: consumo de UUUIDs selecionados (stage:%s, model: %s, action: %s)' % (stage, model_name, action))
        r_queues.enqueue(
            'sync_ids', model_name,
            task_differ_apply_for_selected_uuids,
            stage, model_name, action, list_of_uuids_flat)
예제 #3
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_delete_selected_collections_identifiers(selected_ids):
    """
        Task para apagar identificadores de Coleção.
        @param:
        - selected_ids: lista de pk dos documentos a serem removidos

        Se a lista `selected_ids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_ids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    model_class = CollectionIdModel
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_ids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_ids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue('sync_ids', 'collection',
                             task_delete_selected_collections_identifiers,
                             uuid_as_string_list)
    else:
        documents_to_delete = model_class.objects.filter(pk__in=selected_ids)
        documents_to_delete.delete()
예제 #4
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_journals(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Journal
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'journal', task_load_one_journal, uuid)
예제 #5
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_articles(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Article
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'article', task_load_one_article, uuid)
예제 #6
0
def task_clean_etl_models_by_stage(stage='all', model_name='all'):
    """
    task que enfilera funções para remover modelos da fase (stage) indicada.
    Param:
    - `model_name`: modelo a ser removido: "collection", "journal", etc.
    """
    r_queues = RQueues()
    if stage == 'all':
        stages_list = STAGE_LIST
    else:
        stages_list = [
            stage,
        ]

    if model_name == 'all':
        model_name_list = ID_MODEL_CLASS.keys()
    else:
        model_name_list = [model_name]

    for model_name_ in model_name_list:
        for stage_ in stages_list:
            msg = u'Enfilerando task para remover o modelo: %s da stage: %s' % (
                model_name_, stage_)
            logger.info(msg)
            create_sync_event_record('sync_ids', model_name_,
                                     'delete_identifiers', msg)

            r_queues.enqueue('sync_ids', model_name_, delete_etl_models,
                             stage_, model_name_)

            msg = u'Fim: Enfilerando task para remover o modelo: %s da stage: %s' % (
                model_name_, stage_)
            logger.info(msg)
            create_sync_event_record('sync_ids', model_name_,
                                     'delete_identifiers', msg)
예제 #7
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_articles(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Article
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'article', task_load_one_article, uuid)
예제 #8
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_press_releases(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Press Release
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'press_release', task_load_one_press_release, uuid)
예제 #9
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_delete_selected_collections(selected_uuids):
    """
        Task para apagar Coleções Carregadas.
        @param:
        - selected_uuids: lista de UUIDs dos documentos a serem removidos

        Se a lista `selected_uuids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_uuids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    stage = 'load'
    model = 'collection'
    model_class = LoadCollection
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list)
    else:
        # removemos o conjunto de documentos do LoadCollection indicados pelos uuids
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()

        # convertemos os uuid para _id e filtramos esses documentos no OPAC
        register_connections()
        opac_pks = [str(uuid).replace('-', '') for uuid in selected_uuids]
        with switch_db(opac_models.Collection, OPAC_WEBAPP_DB_NAME) as opac_model:
            selected_opac_records = opac_model.objects.filter(pk__in=opac_pks)
            selected_opac_records.delete()
예제 #10
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_news(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: News
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'news', task_load_one_news, uuid)
예제 #11
0
def task_delete_selected_diff_etl_model(stage, model_name, action, selected_uuids):
    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)
    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)
    if action not in ACTION_LIST:
        raise ValueError(u'param action: %s é inválido' % action)

    get_db_connection()
    model_class = DIFF_MODEL_CLASS_BY_NAME[model_name]
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(
                'sync_ids',
                model_name,
                task_delete_selected_diff_etl_model,
                stage, model_name, action, uuid_as_string_list)  # args da task
    else:
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()
예제 #12
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_delete_selected_collections(selected_uuids):
    """
        Task para apagar Coleções Transformadas.
        @param:
        - selected_uuids: lista de UUIDs dos documentos a serem removidos

        Se a lista `selected_uuids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_uuids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    stage = 'transform'
    model = 'collection'
    model_class = TransformCollection
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list)
    else:
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()
예제 #13
0
def task_delete_selected_diff_etl_model(stage, model_name, action,
                                        selected_uuids):
    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)
    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)
    if action not in ACTION_LIST:
        raise ValueError(u'param action: %s é inválido' % action)

    get_db_connection()
    model_class = DIFF_MODEL_CLASS_BY_NAME[model_name]
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue('sync_ids', model_name,
                             task_delete_selected_diff_etl_model, stage,
                             model_name, action,
                             uuid_as_string_list)  # args da task
    else:
        documents_to_delete = model_class.objects.filter(
            uuid__in=selected_uuids)
        documents_to_delete.delete()
예제 #14
0
파일: cleaner.py 프로젝트: jfunez/opac_proc
def task_clean_diff_models(stage='all', model_name='all', action='all'):
    """
    task que enfilera funções para remover modelos Diffs.
    Param:
    - `stage`: filtro do campo stage: "extract", "transform" ou "load"
    - `model_name`: modelo a ser removido: "collection", "journal", etc.
    - `action` filtro do campo: "add" | "update" | "delete"
    """
    r_queues = RQueues()

    if model_name == 'all':
        model_name_list = DIFF_MODEL_CLASS.keys()
    else:
        model_name_list = [model_name]

    if stage == 'all':
        stages_list = STAGE_LIST
    else:
        stages_list = [stage, ]

    if action == 'all':
        actions_list = ACTION_LIST
    else:
        actions_list = [action, ]

    for model_ in model_name_list:
        for stage_ in stages_list:
            for action_ in actions_list:
                msg = u'Enfilerando task para remover o diff model, modelo: %s, stage: %s, action: %s' % (model_, stage_, action_)
                logger.info(msg)
                create_sync_event_record('sync_ids', model_, 'delete_diff_models', msg)
                r_queues.enqueue('sync_ids', model_, delete_diff_models, stage_, model_, action_)
                msg = u'Fim: Enfilerando task para remover o diff model, modelo: %s, stage: %s, action: %s' % (model_, stage_, action_)
                logger.info(msg)
                create_sync_event_record('sync_ids', model_, 'delete_diff_models', msg)
예제 #15
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_news(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: News
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'news', task_load_one_news, uuid)
예제 #16
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_delete_selected_collections_identifiers(selected_ids):
    """
        Task para apagar identificadores de Coleção.
        @param:
        - selected_ids: lista de pk dos documentos a serem removidos

        Se a lista `selected_ids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_ids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    model_class = CollectionIdModel
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_ids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_ids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(
                'sync_ids', 'collection',
                task_delete_selected_collections_identifiers, uuid_as_string_list)
    else:
        documents_to_delete = model_class.objects.filter(pk__in=selected_ids)
        documents_to_delete.delete()
예제 #17
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_journals(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Journal
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'journal', task_load_one_journal, uuid)
예제 #18
0
def enqueue_full_populate_task_by_model(model_name='all'):
    logger.info("Inicinado: enqueue_full_populate_task_by_model para modelo: %s", model_name)
    create_sync_event_record(
        'sync_ids', model_name, 'enqueue_full_populate_task_by_model',
        u'Inciando enfileramento para preencher datas dos registros diff model: %s' % model_name)

    # setup
    get_db_connection()
    stage = 'sync_ids'
    r_queues = RQueues()
    model_class = None
    task_fn = None

    options = {
        'collection': {
            'model_class': models.ExtractCollection,
            'task_fn': task_populate_collections
        },
        'journal': {
            'model_class': models.ExtractJournal,
            'task_fn': task_populate_journals
        },
        'issue': {
            'model_class': models.ExtractIssue,
            'task_fn': task_populate_issues
        },
        'article': {
            'model_class': models.ExtractArticle,
            'task_fn': task_populate_articles
        },
        'news': {
            'model_class': models.ExtractNews,
            'task_fn': task_populate_news
        },
        'press_release': {
            'model_class': models.ExtractPressRelease,
            'task_fn': task_populate_press_release
        }
    }

    if model_name == 'all':
        for k, v in options.items():
            model_class = v['model_class']
            task_fn = v['task_fn']
            logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, k, model_class))
            r_queues.enqueue(stage, k, task_fn)
    elif model_name not in options.keys():
        raise ValueError('Param: model_name: %s inesperado' % model_name)
    else:
        model_class = options[model_name]['model_class']
        task_fn = options[model_name]['task_fn']
        logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, model_name, model_class))
        r_queues.enqueue(stage, model_name, task_fn)

    create_sync_event_record(
        'sync_ids', model_name, 'enqueue_full_populate_task_by_model',
        u'Fim do enfileramento para preencher datas dos registros diff model: %s' % model_name)

    logger.info("Fim: enqueue_full_populate_task_by_model para modelo: %s", model_name)
예제 #19
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_press_releases(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Press Release
    """
    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'press_release', task_load_one_press_release,
                         uuid)
예제 #20
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_collection_create():
    get_db_connection()
    stage = 'load'
    model = 'collection'
    r_queues = RQueues()

    for collection in models.TransformCollection.objects.all():
        r_queues.enqueue(stage, model, task_load_collection, collection.uuid)
예제 #21
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_collections(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Collection
    """

    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'collection', task_load_one_collection, uuid)
예제 #22
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_load_selected_collections(selected_uuids):
    """
        Task para processar Carga de um LISTA de UUIDs do modelo: Collection
    """

    r_queues = RQueues()
    for uuid in selected_uuids:
        r_queues.enqueue('load', 'collection', task_load_one_collection, uuid)
예제 #23
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_collection_create():
    get_db_connection()
    stage = 'transform'
    model = 'collection'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)
    r_queues.enqueue(
        stage, model,
        task_transform_collection)
예제 #24
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_selected_news_identifiers(selected_ids):
    """
        Task para processar NewsIdDataRetriever de um LISTA de IDs
        do modelo: NewsIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue('sync_ids', 'news', task_retrieve_one_news_identifier,
                         identifier_id)
예제 #25
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_journal_create():
    get_db_connection()
    stage = 'load'
    model = 'journal'
    r_queues = RQueues()

    for journal in models.TransformJournal.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_journal, uuid=journal.uuid)
예제 #26
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_extract_selected_issues(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Issue
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.IssueIdModel
    pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('issue_pid')
    for issue_pid in pids_iter:
        r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
예제 #27
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_selected_journals_identifiers(selected_ids):
    """
        Task para processar JournalIdDataRetriever de um LISTA de IDs
        do modelo: JournalIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue('sync_ids', 'journal',
                         task_retrieve_one_journal_identifier, identifier_id)
예제 #28
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_selected_articles_identifiers(selected_ids):
    """
        Task para processar ArticleIdDataRetriever de um LISTA de IDs
        do modelo: IssueIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue('sync_ids', 'article',
                         task_retrieve_one_article_identifier, identifier_id)
예제 #29
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_transform_selected_news(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: News
    """
    r_queues = RQueues()

    for uuid in selected_uuids:
        r_queues.enqueue('transform', 'news',
                         task_transform_one_news,
                         uuid)
예제 #30
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_transform_selected_press_releases(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Press Release
    """
    r_queues = RQueues()

    for uuid in selected_uuids:
        r_queues.enqueue('transform', 'press_release',
                         task_transform_one_press_release,
                         uuid)
예제 #31
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_issue_create():
    get_db_connection()
    stage = 'load'
    model = 'issue'
    r_queues = RQueues()

    for issue in models.TransformIssue.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_issue, uuid=issue.uuid)
예제 #32
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_transform_selected_journals(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Journal
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.JournalIdModel
    issns_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('journal_issn')
    for issn in issns_iter:
        r_queues.enqueue('transform', 'journal', task_transform_one_journal, issn)
예제 #33
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_press_release_create():
    get_db_connection()
    stage = 'load'
    model = 'press_release'
    r_queues = RQueues()

    for press_release in models.TransformPressRelease.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_press_release, uuid=press_release.uuid)
예제 #34
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_article_create():
    get_db_connection()
    stage = 'load'
    model = 'article'
    r_queues = RQueues()

    for article in models.TransformArticle.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_article, uuid=article.uuid)
예제 #35
0
파일: jobs.py 프로젝트: jamilatta/opac_proc
def task_news_create():
    get_db_connection()
    stage = 'load'
    model = 'news'
    r_queues = RQueues()

    for news in models.TransformNews.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_news, uuid=news.uuid)
예제 #36
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_retrieve_selected_journals_identifiers(selected_ids):
    """
        Task para processar JournalIdDataRetriever de um LISTA de IDs
        do modelo: JournalIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue(
            'sync_ids', 'journal',
            task_retrieve_one_journal_identifier, identifier_id)
예제 #37
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_extract_selected_issues(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Issue
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.IssueIdModel
    pids_iter = source_ids_model_class.objects.filter(
        uuid__in=selected_uuids).values_list('issue_pid')
    for issue_pid in pids_iter:
        r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
예제 #38
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_transform_selected_collections(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Collection
    """

    r_queues = RQueues()
    for uuid in selected_uuids:
        # para o caso da coleção, não precisamos nenhum parâmetro.
        # somente garantimos que rodamos para todos os uuids no banco
        # que deveria ser somente um.
        r_queues.enqueue('transform', 'collection', task_transform_one_collection)
예제 #39
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_selected_press_releases_identifiers(selected_ids):
    """
        Task para processar PressReleaseDataRetriever de um LISTA de IDs
        do modelo: PressRelaseIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue('sync_ids', 'press_release',
                         task_retrieve_one_press_release_identifier,
                         identifier_id)
예제 #40
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_retrieve_selected_news_identifiers(selected_ids):
    """
        Task para processar NewsIdDataRetriever de um LISTA de IDs
        do modelo: NewsIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue(
            'sync_ids', 'news',
            task_retrieve_one_news_identifier, identifier_id)
예제 #41
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_retrieve_selected_articles_identifiers(selected_ids):
    """
        Task para processar ArticleIdDataRetriever de um LISTA de IDs
        do modelo: IssueIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue(
            'sync_ids', 'article',
            task_retrieve_one_article_identifier, identifier_id)
예제 #42
0
파일: jobs.py 프로젝트: jfunez/opac_proc
def task_retrieve_selected_press_releases_identifiers(selected_ids):
    """
        Task para processar PressReleaseDataRetriever de um LISTA de IDs
        do modelo: PressRelaseIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        r_queues.enqueue(
            'sync_ids', 'press_release',
            task_retrieve_one_press_release_identifier, identifier_id)
예제 #43
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_transform_selected_articles(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Article
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.ArticleIdModel

    pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('article_pid')
    for article_pid in pids_iter:
        r_queues.enqueue('transform', 'article', task_transform_one_article, article_pid)
예제 #44
0
def task_retrive_all_articles_ids():
    retirever_class = RETRIEVERS_BY_MODEL['article']
    retriever_instance = retirever_class()
    r_queues = RQueues()

    identifiers = retriever_instance.get_data_source_identifiers()
    list_of_all_ids = [identifier for identifier in identifiers]
    list_of_list_of_ids = list(chunks(list_of_all_ids, 1000))

    for list_of_ids in list_of_list_of_ids:
        r_queues.enqueue('sync_ids', 'article',
                         task_retrive_articles_ids_by_chunks, list_of_ids)
예제 #45
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_extract_all_news():
    """
        Task para processar Extração de TODOS os registros do modelo: News
    """

    stage = 'extract'
    model = 'news'
    r_queues = RQueues()

    for lang, feed in config.RSS_NEWS_FEEDS.items():
        url = feed['url'].format(lang)
        r_queues.enqueue(stage, model, task_extract_one_news, url, lang)
예제 #46
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_extract_selected_news(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: News
    """
    get_db_connection()
    r_queues = RQueues()

    extracted_news_selected = ExtractNews.objects.filter(
        uuid__in=selected_uuids)
    for ex_news in extracted_news_selected:
        r_queues.enqueue('extract', 'news', task_extract_one_news,
                         ex_news.feed_url_used, ex_news.feed_lang)
예제 #47
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_extract_selected_press_releases(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Press Release
    """
    get_db_connection()
    r_queues = RQueues()

    extracted_press_releases_selected = ExtractPressRelease.objects.filter(
        uuid__in=selected_uuids)
    for ex_pr in extracted_press_releases_selected:
        r_queues.enqueue('extract', 'press_release',
                         task_extract_one_press_release, ex_pr.journal_acronym,
                         ex_pr.feed_url_used, ex_pr.feed_lang)
예제 #48
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_selected_collections_identifiers(selected_ids):
    """
        Task para processar CollectionIdDataRetriever de um LISTA de IDs
        do modelo: CollectionIdModel
    """

    r_queues = RQueues()
    for identifier_id in selected_ids:
        # para o caso da coleção, não precisamos nenhum parâmetro.
        # somente garantimos que rodamos para todos os uuids no banco
        # que deveria ser somente um.
        r_queues.enqueue('sync_ids', 'collection',
                         task_retrieve_one_collection_identifier,
                         identifier_id)
예제 #49
0
파일: jobs.py 프로젝트: scieloorg/opac_proc
def task_retrieve_all_news_identifiers():
    """
        Task para processar Extração de TODOS os registros do modelo: NewsIdModel
    """
    r_queues = RQueues()
    retriever_instance = NewsIdDataRetriever()

    identifiers = retriever_instance.get_data_source_identifiers()
    list_of_all_ids = [identifier for identifier in identifiers]
    list_of_list_of_ids = list(chunks(list_of_all_ids, 1000))

    for list_of_ids in list_of_list_of_ids:
        r_queues.enqueue('sync_ids', 'news',
                         task_retrieve_selected_news_identifiers, list_of_ids)
예제 #50
0
파일: cli.py 프로젝트: scieloorg/opac_proc
def produce(stage, model, action):
    """
    Enfilera as tasks para GERAR registros Differ do stage, modelo e action
    indicados
    """

    if stage == 'all':
        stages_list = ETL_STAGE_LIST
    elif stage not in ETL_STAGE_LIST:
        raise ValueError('Param: stage: %s com valor inesperado!' % stage)
    else:
        stages_list = [
            stage,
        ]

    if model == 'all':
        models_list = ETL_MODEL_NAME_LIST
    elif model not in ETL_MODEL_NAME_LIST:
        raise ValueError('Param: model: %s com valor inesperado!' % model)
    else:
        models_list = [model]

    if action == 'all':
        actions_list = ACTION_LIST
    elif action not in ACTION_LIST:
        raise ValueError('Param: action: %s com valor inesperado!' % action)
    else:
        actions_list = [
            action,
        ]

    r_queues = RQueues()
    task_fn_by_action = {
        'add': task_produce_diff_add,
        'update': task_produce_diff_update,
        'delete': task_produce_diff_delete,
    }

    for stage_ in stages_list:
        for model_ in models_list:
            for action_ in actions_list:
                task_fn = task_fn_by_action[action_]
                logger.info(
                    "[%s][%s][%s] enfilerando para producir registros diff",
                    stage_, model_, action_)
                r_queues.enqueue('sync_ids', model_, task_fn, stage_, model_)
예제 #51
0
파일: cli.py 프로젝트: scieloorg/opac_proc
def remove(stage, model, action):
    """
    Enfilera as tasks para gerar CONSUMIR registros Differ do stage, modelo e
    action indicados
    """

    if stage == 'all':
        stages_list = ETL_STAGE_LIST
    elif stage not in ETL_STAGE_LIST:
        raise ValueError('Param: stage: %s com valor inesperado!' % stage)
    else:
        stages_list = [
            stage,
        ]

    if model == 'all':
        models_list = ETL_MODEL_NAME_LIST
    elif model not in ETL_MODEL_NAME_LIST:
        raise ValueError('Param: model: %s com valor inesperado!' % stage)
    else:
        models_list = [model]

    if action == 'all':
        actions_list = ACTION_LIST
    elif model not in ACTION_LIST:
        raise ValueError('Param: model: %s com valor inesperado!' % stage)
    else:
        actions_list = [
            action,
        ]

    r_queues = RQueues()
    for stage_ in stages_list:
        for model_ in models_list:
            for action_ in actions_list:
                logger.info(
                    "[%s][%s][%s] enfilerando para remover registros diff",
                    stage_, model_, action_)
                r_queues.enqueue('sync_ids', model_,
                                 task_delete_all_diff_etl_model, stage_,
                                 model_, action_)
예제 #52
0
class ProcessDiffersBase:
    model_name = ''
    collection_acronym = None
    r_queues = RQueues()
    db = get_db_connection()

    task_produce_add = 'opac_proc.differs.produce_jobs.task_produce_diff_add'
    task_produce_update = 'opac_proc.differs.produce_jobs.task_produce_diff_update'
    task_produce_delete = 'opac_proc.differs.produce_jobs.task_produce_diff_delete'
    task_delete_selected = 'opac_proc.differs.produce_jobs.task_delete_selected_diff_etl_model'
    task_delete_all = 'opac_proc.differs.produce_jobs.task_delete_all_diff_etl_model'
    task_consume_add = 'opac_proc.differs.consumer_jobs.task_consume_diff_add'
    task_consume_update = 'opac_proc.differs.consumer_jobs.task_consume_diff_update'
    task_consume_delete = 'opac_proc.differs.consumer_jobs.task_consume_diff_delete'

    def produce(self, stage, action):
        task_fn = None
        if action == 'add':
            task_fn = self.task_produce_add
        elif action == 'update':
            task_fn = self.task_produce_update
        elif action == 'delete':
            task_fn = self.task_produce_delete
        else:
            raise ValueError(u'Param: action %s é inválido' % action)

        self.r_queues.enqueue('sync_ids', self.model_name, task_fn, stage,
                              self.model_name)  # task args

    def consume(self, stage, action):
        task_fn = None
        if action == 'add':
            task_fn = self.task_consume_add
        elif action == 'update':
            task_fn = self.task_consume_update
        elif action == 'delete':
            task_fn = self.task_consume_delete
        else:
            raise ValueError(u'Param: action %s é inválido' % action)

        self.r_queues.enqueue('sync_ids', self.model_name, task_fn, stage,
                              self.model_name)  # task args

    def delete_selected(self, stage, action, selected_uuids):
        self.r_queues.enqueue('sync_ids', self.model_name,
                              self.task_delete_selected, stage,
                              self.model_name, action,
                              selected_uuids)  # task args

    def delete_all(self, stage, action):
        self.r_queues.enqueue('sync_ids', self.model_name,
                              self.task_delete_all, stage, self.model_name,
                              action)  # task args
예제 #53
0
def enqueue_ids_data_retriever(model_name='all'):
    if model_name == 'all':
        models_list = MODEL_NAME_LIST
    else:
        models_list = [model_name]

    r_queues = RQueues()
    for model_ in models_list:
        task_fn = task_call_data_retriver_by_model
        logger.info('Enfilerando task: %s para o model: %s.' %
                    (task_fn, model_))
        create_sync_event_record(
            'sync_ids', model_, 'enqueue_ids_data_retriever',
            u'Inciando enfileramento para recuperar dados do IdModel model: %s'
            % model_name)
        r_queues.enqueue('sync_ids', model_, task_fn, model_)
        logger.info('Fim: Enfilerando task: %s para o model: %s.' %
                    (task_fn, model_))
        create_sync_event_record(
            'sync_ids', model_, 'enqueue_ids_data_retriever',
            u'Fim do enfileramento para recuperar dados do IdModel model: %s' %
            model_name)