Ejemplo n.º 1
0
def task_delete_selected_collections_identifiers(selected_ids):
    """
        Task para apagar identificadores de Coleção.
        @param:
        - selected_ids: lista de pk dos documentos a serem removidos

        Se a lista `selected_ids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_ids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    model_class = CollectionIdModel
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_ids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_ids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(
                'sync_ids', 'collection',
                task_delete_selected_collections_identifiers, uuid_as_string_list)
    else:
        documents_to_delete = model_class.objects.filter(pk__in=selected_ids)
        documents_to_delete.delete()
Ejemplo n.º 2
0
def task_journal_update(ids=None):
    get_db_connection()
    stage = 'transform'
    model = 'journal'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)
    collection = models.TransformCollection.objects.all().first()
    if ids is None:  # update all collections
        models.TransformJournal.objects.all().update(must_reprocess=True)
        for journal in models.TransformJournal.objects.all():
            issn = journal.get('scielo_issn', False) or \
                   journal.get('print_issn', False) or \
                   journal.get('eletronic_issn', False)
            if not issn:
                raise ValueError(u'Journal sem issn')
            r_queues.enqueue(
                stage, model,
                task_transform_journal, collection.acronym, issn)
    else:
        for oid in ids:
            try:
                obj = models.TransformJournal.objects.get(pk=oid)
                obj.update(must_reprocess=True)
                obj.reload()
                issn = obj.get('scielo_issn', False) or \
                    obj.get('print_issn', False) or \
                    obj.get('eletronic_issn', False)
                if not issn:
                    raise ValueError(u'Journal sem issn')
                r_queues.enqueue(
                    stage, model,
                    task_transform_journal, collection.acronym, issn)
            except Exception as e:
                logger.error('models.TransformJournal %s. pk: %s', str(e), oid)
Ejemplo n.º 3
0
def serial_retriever_article_ids(filepath):
    get_db_connection()
    with open(filepath) as fp:
        logger.info('lendo arquivo: %s', filepath)
        for line in fp:
            aid_data = json.loads(line)
            try:
                code = aid_data['code']
                new_processing_date = parse_date_str_to_datetime_obj(aid_data['processing_date'])
                art = ArticleIdModel.objects.get(article_pid=code)
            except ArticleIdModel.DoesNotExist:
                issn = parse_journal_issn_from_article_code(code)
                issue_pid = parse_issue_pid_from_article_code(code)
                new_article_data = {
                    'journal_issn': issn,
                    'issue_pid': issue_pid,
                    'article_pid': code,
                    'processing_date': aid_data['processing_date']
                }
                new_art = ArticleIdModel(**new_article_data)
                logger.info('cadastrando novo artigo: %s', aid_data)
                new_art.save()
            else:
                old_processing_date = art.processing_date
                if old_processing_date != new_processing_date:
                    # update
                    logger.info('atualizando aid: %s', code)
                    art.processing_date = new_processing_date
                    art.save()
                else:
                    logger.info(u'artigo aid: %s sem mudança de data', code)
Ejemplo n.º 4
0
def task_delete_selected_collections(selected_uuids):
    """
        Task para apagar Coleções Carregadas.
        @param:
        - selected_uuids: lista de UUIDs dos documentos a serem removidos

        Se a lista `selected_uuids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_uuids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    stage = 'load'
    model = 'collection'
    model_class = LoadCollection
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list)
    else:
        # removemos o conjunto de documentos do LoadCollection indicados pelos uuids
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()

        # convertemos os uuid para _id e filtramos esses documentos no OPAC
        register_connections()
        opac_pks = [str(uuid).replace('-', '') for uuid in selected_uuids]
        with switch_db(opac_models.Collection, OPAC_WEBAPP_DB_NAME) as opac_model:
            selected_opac_records = opac_model.objects.filter(pk__in=opac_pks)
            selected_opac_records.delete()
Ejemplo n.º 5
0
def task_delete_selected_collections(selected_uuids):
    """
        Task para apagar Coleções Transformadas.
        @param:
        - selected_uuids: lista de UUIDs dos documentos a serem removidos

        Se a lista `selected_uuids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_uuids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    stage = 'transform'
    model = 'collection'
    model_class = TransformCollection
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list)
    else:
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()
Ejemplo n.º 6
0
def task_delete_selected_diff_etl_model(stage, model_name, action, selected_uuids):
    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)
    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)
    if action not in ACTION_LIST:
        raise ValueError(u'param action: %s é inválido' % action)

    get_db_connection()
    model_class = DIFF_MODEL_CLASS_BY_NAME[model_name]
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(
                'sync_ids',
                model_name,
                task_delete_selected_diff_etl_model,
                stage, model_name, action, uuid_as_string_list)  # args da task
    else:
        documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids)
        documents_to_delete.delete()
Ejemplo n.º 7
0
def task_article_update(ids=None):
    get_db_connection()
    stage = 'transform'
    model = 'article'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)

    collection = models.TransformCollection.objects.all().first()

    if ids is None:  # update all collections
        models.TransformArticle.objects.all().update(must_reprocess=True)
        for article in models.TransformArticle.objects.all():
            r_queues.enqueue(
                stage, model,
                task_transform_article, collection.acronym, article.pid)
    else:
        for oid in ids:
            try:
                obj = models.TransformArticle.objects.get(pk=oid)
                obj.update(must_reprocess=True)
                obj.reload()
                r_queues.enqueue(
                    stage, model,
                    task_transform_article, collection.acronym, obj.pid)
            except Exception as e:
                logger.error('models.TransformArticle %s. pk: %s', str(e), oid)
Ejemplo n.º 8
0
def task_consume_diff_update(stage, model_name):
    """
    Task que consume os registros UPDATE dos diff filtrando pelos parametros:
    - @param stage: fase do ETL
    - @param model_name: nome do modelo ETL
    """

    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)

    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)

    action = 'update'
    SLICE_SIZE = 1000
    r_queues = RQueues()
    get_db_connection()
    diff_class = ETL_DIFFERS_BY_MODEL[model_name]
    diff_class_instance = diff_class()
    full_uuids_to_process = diff_class_instance.get_uuids_unapplied(stage, action)
    list_of_list_of_uuids = list(chunks(full_uuids_to_process, SLICE_SIZE))

    for list_of_uuids in list_of_list_of_uuids:
        list_of_uuids_flat = [str(uuid) for uuid in list_of_uuids]
        logger.info(u'enfilerando: consumo de UUUIDs selecionados (stage:%s, model: %s, action: %s)' % (stage, model_name, action))
        r_queues.enqueue(
            'sync_ids', model_name,
            task_differ_apply_for_selected_uuids,
            stage, model_name, action, list_of_uuids_flat)
Ejemplo n.º 9
0
def enqueue_full_populate_task_by_model(model_name='all'):
    logger.info("Inicinado: enqueue_full_populate_task_by_model para modelo: %s", model_name)
    create_sync_event_record(
        'sync_ids', model_name, 'enqueue_full_populate_task_by_model',
        u'Inciando enfileramento para preencher datas dos registros diff model: %s' % model_name)

    # setup
    get_db_connection()
    stage = 'sync_ids'
    r_queues = RQueues()
    model_class = None
    task_fn = None

    options = {
        'collection': {
            'model_class': models.ExtractCollection,
            'task_fn': task_populate_collections
        },
        'journal': {
            'model_class': models.ExtractJournal,
            'task_fn': task_populate_journals
        },
        'issue': {
            'model_class': models.ExtractIssue,
            'task_fn': task_populate_issues
        },
        'article': {
            'model_class': models.ExtractArticle,
            'task_fn': task_populate_articles
        },
        'news': {
            'model_class': models.ExtractNews,
            'task_fn': task_populate_news
        },
        'press_release': {
            'model_class': models.ExtractPressRelease,
            'task_fn': task_populate_press_release
        }
    }

    if model_name == 'all':
        for k, v in options.items():
            model_class = v['model_class']
            task_fn = v['task_fn']
            logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, k, model_class))
            r_queues.enqueue(stage, k, task_fn)
    elif model_name not in options.keys():
        raise ValueError('Param: model_name: %s inesperado' % model_name)
    else:
        model_class = options[model_name]['model_class']
        task_fn = options[model_name]['task_fn']
        logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, model_name, model_class))
        r_queues.enqueue(stage, model_name, task_fn)

    create_sync_event_record(
        'sync_ids', model_name, 'enqueue_full_populate_task_by_model',
        u'Fim do enfileramento para preencher datas dos registros diff model: %s' % model_name)

    logger.info("Fim: enqueue_full_populate_task_by_model para modelo: %s", model_name)
Ejemplo n.º 10
0
def task_populate_news(ids=None):
    get_db_connection()
    stage = 'sync_ids'
    model_name = 'news'
    model_class = models.ExtractNews
    r_queues = RQueues()
    target_fn = task_populate_one_news
    generic_task_enqueue_from_uuid_iterable(stage, model_name, model_class, r_queues, target_fn, ids)
Ejemplo n.º 11
0
def task_collection_create():
    get_db_connection()
    stage = 'load'
    model = 'collection'
    r_queues = RQueues()

    for collection in models.TransformCollection.objects.all():
        r_queues.enqueue(stage, model, task_load_collection, collection.uuid)
Ejemplo n.º 12
0
def task_populate_press_release(ids=None):
    get_db_connection()
    stage = 'sync_ids'
    model_name = 'press_release'
    model_class = models.ExtractPressRelease
    r_queues = RQueues()
    target_fn = task_populate_one_press_release
    generic_task_enqueue_from_uuid_iterable(stage, model_name, model_class, r_queues, target_fn, ids)
Ejemplo n.º 13
0
def task_collection_create():
    get_db_connection()
    stage = 'transform'
    model = 'collection'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)
    r_queues.enqueue(
        stage, model,
        task_transform_collection)
Ejemplo n.º 14
0
def delete_identifiers(model_name):
    """função que remove documentos (modelos Identifiers*) para o modelo: `model_name`"""
    if model_name not in ID_MODEL_CLASS.keys():
        raise ValueError(u'parametro: model_name: %s não é válido!' % model_name)
    get_db_connection()
    model_class = ID_MODEL_CLASS[model_name]
    objects = model_class.objects()
    logger.info(u"Removendo: %s objetos do modelo: %s" % (objects.count(), model_name))
    objects.delete()
    logger.info(u"Objetos removidos com sucesso!")
Ejemplo n.º 15
0
def task_issue_create():
    get_db_connection()
    stage = 'load'
    model = 'issue'
    r_queues = RQueues()

    for issue in models.TransformIssue.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_issue, uuid=issue.uuid)
Ejemplo n.º 16
0
def task_press_release_create():
    get_db_connection()
    stage = 'load'
    model = 'press_release'
    r_queues = RQueues()

    for press_release in models.TransformPressRelease.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_press_release, uuid=press_release.uuid)
Ejemplo n.º 17
0
def task_transform_selected_journals(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Journal
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.JournalIdModel
    issns_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('journal_issn')
    for issn in issns_iter:
        r_queues.enqueue('transform', 'journal', task_transform_one_journal, issn)
Ejemplo n.º 18
0
def task_journal_create():
    get_db_connection()
    stage = 'load'
    model = 'journal'
    r_queues = RQueues()

    for journal in models.TransformJournal.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_journal, uuid=journal.uuid)
Ejemplo n.º 19
0
def task_article_create():
    get_db_connection()
    stage = 'load'
    model = 'article'
    r_queues = RQueues()

    for article in models.TransformArticle.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_article, uuid=article.uuid)
Ejemplo n.º 20
0
def task_news_create():
    get_db_connection()
    stage = 'load'
    model = 'news'
    r_queues = RQueues()

    for news in models.TransformNews.objects.all():
        r_queues.enqueue(
            stage, model,
            task_load_news, uuid=news.uuid)
Ejemplo n.º 21
0
def task_extract_selected_issues(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Issue
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.IssueIdModel
    pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('issue_pid')
    for issue_pid in pids_iter:
        r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
Ejemplo n.º 22
0
def task_extract_selected_issues(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Issue
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.IssueIdModel
    pids_iter = source_ids_model_class.objects.filter(
        uuid__in=selected_uuids).values_list('issue_pid')
    for issue_pid in pids_iter:
        r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
Ejemplo n.º 23
0
def task_news_create():
    get_db_connection()
    stage = 'transform'
    model = 'news'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)

    for news in models.ExtractNews.objects.all():
        r_queues.enqueue(
            stage, model,
            task_transform_news, news_uuid=news.uuid)
Ejemplo n.º 24
0
def task_press_release_create():
    get_db_connection()
    stage = 'transform'
    model = 'press_release'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)

    for pr in models.ExtractPressRelease.objects.all():
        r_queues.enqueue(
            stage, model,
            task_transform_press_release, press_release_uuid=pr.uuid)
Ejemplo n.º 25
0
def task_delete_all_articles():
    # removemos todos os documentos do modelo Load Article (opac-proc)
    get_db_connection()
    all_records = LoadArticle.objects.all()
    all_records.delete()

    # removemos todos os documentos do modelo Article (opac)
    register_connections()
    with switch_db(opac_models.Article, OPAC_WEBAPP_DB_NAME) as opac_model:
        all_opac_records = opac_model.objects.all()
        all_opac_records.delete()
Ejemplo n.º 26
0
def task_delete_all_journals():
    # removemos todos os documentos do modelo Load Journal (opac-proc)
    get_db_connection()
    all_records = LoadJournal.objects.all()
    all_records.delete()

    # removemos todos os documentos do modelo Journal (opac)
    register_connections()
    with switch_db(opac_models.Journal, OPAC_WEBAPP_DB_NAME) as opac_model:
        all_opac_records = opac_model.objects.all()
        all_opac_records.delete()
Ejemplo n.º 27
0
def task_delete_all_news():
    get_db_connection()
    # removemos todos os documentos do modelo Load News (opac-proc)
    all_records = LoadNews.objects.all()
    all_records.delete()

    # removemos todos os documentos do modelo News (opac)
    register_connections()
    with switch_db(opac_models.News, OPAC_WEBAPP_DB_NAME) as opac_model:
        all_opac_records = opac_model.objects.all()
        all_opac_records.delete()
Ejemplo n.º 28
0
def task_transform_selected_articles(selected_uuids):
    """
        Task para processar Transformação de um LISTA de UUIDs do modelo: Article
    """
    get_db_connection()
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.ArticleIdModel

    pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('article_pid')
    for article_pid in pids_iter:
        r_queues.enqueue('transform', 'article', task_transform_one_article, article_pid)
Ejemplo n.º 29
0
def task_delete_all_news():
    get_db_connection()
    # removemos todos os documentos do modelo Load News (opac-proc)
    all_records = LoadNews.objects.all()
    all_records.delete()

    # removemos todos os documentos do modelo News (opac)
    register_connections()
    with switch_db(opac_models.News, OPAC_WEBAPP_DB_NAME) as opac_model:
        all_opac_records = opac_model.objects.all()
        all_opac_records.delete()
Ejemplo n.º 30
0
def task_delete_all_press_releases():
    # removemos todos os documentos do modelo Load PressRelease (opac-proc)
    get_db_connection()
    all_records = LoadPressRelease.objects.all()
    all_records.delete()

    # removemos todos os documentos do modelo PressRelease (opac)
    register_connections()
    with switch_db(opac_models.PressRelease, OPAC_WEBAPP_DB_NAME) as opac_model:
        all_opac_records = opac_model.objects.all()
        all_opac_records.delete()
Ejemplo n.º 31
0
def task_delete_all_diff_etl_model(stage, model_name, action):
    get_db_connection()
    if stage not in ETL_STAGE_LIST:
        raise ValueError(u'param stage: %s é inválido' % stage)
    if model_name not in ETL_MODEL_NAME_LIST:
        raise ValueError(u'param model_name: %s é inválido' % model_name)
    if action not in ACTION_LIST:
        raise ValueError(u'param action: %s é inválido' % action)

    model_class = DIFF_MODEL_CLASS_BY_NAME[model_name]
    diff_records = model_class.objects.filter(stage=stage, action=action)
    diff_records.delete()
Ejemplo n.º 32
0
def task_extract_selected_news(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: News
    """
    get_db_connection()
    r_queues = RQueues()

    extracted_news_selected = ExtractNews.objects.filter(uuid__in=selected_uuids)
    for ex_news in extracted_news_selected:
        r_queues.enqueue('extract', 'news',
                         task_extract_one_news,
                         ex_news.feed_url_used, ex_news.feed_lang)
Ejemplo n.º 33
0
def task_extract_selected_news(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: News
    """
    get_db_connection()
    r_queues = RQueues()

    extracted_news_selected = ExtractNews.objects.filter(
        uuid__in=selected_uuids)
    for ex_news in extracted_news_selected:
        r_queues.enqueue('extract', 'news', task_extract_one_news,
                         ex_news.feed_url_used, ex_news.feed_lang)
Ejemplo n.º 34
0
def delete_identifiers(model_name):
    """função que remove documentos (modelos Identifiers*) para o modelo: `model_name`"""
    if model_name not in ID_MODEL_CLASS.keys():
        raise ValueError(u'parametro: model_name: %s não é válido!' %
                         model_name)
    get_db_connection()
    model_class = ID_MODEL_CLASS[model_name]
    objects = model_class.objects()
    logger.info(u"Removendo: %s objetos do modelo: %s" %
                (objects.count(), model_name))
    objects.delete()
    logger.info(u"Objetos removidos com sucesso!")
Ejemplo n.º 35
0
def task_extract_selected_press_releases(selected_uuids):
    """
        Task para processar Extração de um LISTA de UUIDs do modelo: Press Release
    """
    get_db_connection()
    r_queues = RQueues()

    extracted_press_releases_selected = ExtractPressRelease.objects.filter(
        uuid__in=selected_uuids)
    for ex_pr in extracted_press_releases_selected:
        r_queues.enqueue('extract', 'press_release',
                         task_extract_one_press_release, ex_pr.journal_acronym,
                         ex_pr.feed_url_used, ex_pr.feed_lang)
Ejemplo n.º 36
0
def task_journal_create():
    get_db_connection()
    stage = 'transform'
    model = 'journal'
    r_queues = RQueues()
    r_queues.create_queues_for_stage(stage)

    collection = models.TransformCollection.objects.all().first()

    for child in collection.children_ids:
        r_queues.enqueue(
            stage, model,
            task_transform_journal, collection.acronym, child['issn'])
Ejemplo n.º 37
0
def task_delete_selected_collections(selected_uuids):
    """
        Task para apagar Coleções Carregadas.
        @param:
        - selected_uuids: lista de UUIDs dos documentos a serem removidos

        Se a lista `selected_uuids` for maior a SLICE_SIZE
            A lista será fatiada em listas de tamanho: SLICE_SIZE
        Se a lista `selected_uuids` for < a SLICE_SIZE
            Será feito uma delete direto no queryset
    """

    stage = 'load'
    model = 'collection'
    model_class = LoadCollection
    get_db_connection()
    r_queues = RQueues()
    SLICE_SIZE = 1000

    if len(selected_uuids) > SLICE_SIZE:
        list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_delete_selected_collections,
                             uuid_as_string_list)
    else:
        # removemos o conjunto de documentos do LoadCollection indicados pelos uuids
        documents_to_delete = model_class.objects.filter(
            uuid__in=selected_uuids)
        documents_to_delete.delete()

        # convertemos os uuid para _id e filtramos esses documentos no OPAC
        register_connections()
        opac_pks = [str(uuid).replace('-', '') for uuid in selected_uuids]
        with switch_db(opac_models.Collection,
                       OPAC_WEBAPP_DB_NAME) as opac_model:
            selected_opac_records = opac_model.objects.filter(pk__in=opac_pks)
            selected_opac_records.delete()
Ejemplo n.º 38
0
def task_extract_all_articles(uuids=None):
    """
        Task para processar Extração de TODOS os registros do modelo: Article
    """
    get_db_connection()
    stage = 'extract'
    model = 'article'
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.ArticleIdModel
    SLICE_SIZE = 1000

    list_of_all_uuids = source_ids_model_class.objects.all().values_list(
        'uuid')
    if len(list_of_all_uuids) <= SLICE_SIZE:
        uuid_as_string_list = [str(uuid) for uuid in list_of_all_uuids]
        r_queues.enqueue(stage, model, task_extract_selected_articles,
                         uuid_as_string_list)
    else:
        list_of_list_of_uuids = list(chunks(list_of_all_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_extract_selected_articles,
                             uuid_as_string_list)
Ejemplo n.º 39
0
def task_load_all_news():
    """
        Task para processar Carga de TODOS os registros do modelo: News
    """
    get_db_connection()
    stage = 'load'
    model = 'news'
    r_queues = RQueues()
    source_ids_model_class = identifiers_models.NewsIdModel
    SLICE_SIZE = 1000

    list_of_all_uuids = source_ids_model_class.objects.all().values_list(
        'uuid')
    if len(list_of_all_uuids) <= SLICE_SIZE:
        uuid_as_string_list = [str(uuid) for uuid in list_of_all_uuids]
        r_queues.enqueue(stage, model, task_load_selected_news,
                         uuid_as_string_list)
    else:
        list_of_list_of_uuids = list(chunks(list_of_all_uuids, SLICE_SIZE))
        for list_of_uuids in list_of_list_of_uuids:
            uuid_as_string_list = [str(uuid) for uuid in list_of_uuids]
            r_queues.enqueue(stage, model, task_load_selected_news,
                             uuid_as_string_list)
Ejemplo n.º 40
0
    def __init__(self):
        if self.model_name is None:
            raise AttributeError(u'Falta definir atributo: model_name')

        if self.id_model_class is None:
            raise AttributeError(u'Falta definir atributo: id_model_class')

        if self.ex_model_class is None:
            raise AttributeError(u'Falta definir atributo: ex_model_class')

        if self.tr_model_class is None:
            raise AttributeError(u'Falta definir atributo: tr_model_class')

        if self._db is None:
            self._db = get_db_connection()
Ejemplo n.º 41
0
    def __init__(self):
        if self.model_name is None:
            raise AttributeError(u'Falta definir atributo: model_name')

        if self.idmodel_class is None:
            raise AttributeError(u'Falta definir atributo: idmodel_class')

        if self._db is None:
            self._db = get_db_connection()

        if self.api_client is None:
            self.api_client = custom_amapi_client.ArticleMeta(
                config.ARTICLE_META_THRIFT_DOMAIN,
                config.ARTICLE_META_THRIFT_PORT,
                config.ARTICLE_META_THRIFT_TIMEOUT)
        if self.am_db_api is None:
            self.am_db_api = AMDBAPI()

        super(BaseIdDataRetriever, self).__init__()
Ejemplo n.º 42
0
class Process(object):
    stage = 'default'
    collection_acronym = None
    r_queues = RQueues()
    db = get_db_connection()

    def selected(self, selected_uuids):
        self.r_queues.enqueue(self.stage, self.model_name,
                              self.task_for_selected, selected_uuids)

    def all(self):
        self.r_queues.enqueue(self.stage, self.model_name, self.task_for_all)

    def delete_selected(self, selected_uuids):
        self.r_queues.enqueue(self.stage, self.model_name,
                              self.task_delete_selected, selected_uuids)

    def delete_all(self):
        self.r_queues.enqueue(self.stage, self.model_name,
                              self.task_delete_all)