def task_clean_etl_models_by_stage(stage='all', model_name='all'): """ task que enfilera funções para remover modelos da fase (stage) indicada. Param: - `model_name`: modelo a ser removido: "collection", "journal", etc. """ r_queues = RQueues() if stage == 'all': stages_list = STAGE_LIST else: stages_list = [stage, ] if model_name == 'all': model_name_list = ID_MODEL_CLASS.keys() else: model_name_list = [model_name] for model_name_ in model_name_list: for stage_ in stages_list: msg = u'Enfilerando task para remover o modelo: %s da stage: %s' % (model_name_, stage_) logger.info(msg) create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg) r_queues.enqueue('sync_ids', model_name_, delete_etl_models, stage_, model_name_) msg = u'Fim: Enfilerando task para remover o modelo: %s da stage: %s' % (model_name_, stage_) logger.info(msg) create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg)
def task_consume_diff_update(stage, model_name): """ Task que consume os registros UPDATE dos diff filtrando pelos parametros: - @param stage: fase do ETL - @param model_name: nome do modelo ETL """ if stage not in ETL_STAGE_LIST: raise ValueError(u'param stage: %s é inválido' % stage) if model_name not in ETL_MODEL_NAME_LIST: raise ValueError(u'param model_name: %s é inválido' % model_name) action = 'update' SLICE_SIZE = 1000 r_queues = RQueues() get_db_connection() diff_class = ETL_DIFFERS_BY_MODEL[model_name] diff_class_instance = diff_class() full_uuids_to_process = diff_class_instance.get_uuids_unapplied(stage, action) list_of_list_of_uuids = list(chunks(full_uuids_to_process, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: list_of_uuids_flat = [str(uuid) for uuid in list_of_uuids] logger.info(u'enfilerando: consumo de UUUIDs selecionados (stage:%s, model: %s, action: %s)' % (stage, model_name, action)) r_queues.enqueue( 'sync_ids', model_name, task_differ_apply_for_selected_uuids, stage, model_name, action, list_of_uuids_flat)
def task_delete_selected_collections_identifiers(selected_ids): """ Task para apagar identificadores de Coleção. @param: - selected_ids: lista de pk dos documentos a serem removidos Se a lista `selected_ids` for maior a SLICE_SIZE A lista será fatiada em listas de tamanho: SLICE_SIZE Se a lista `selected_ids` for < a SLICE_SIZE Será feito uma delete direto no queryset """ model_class = CollectionIdModel get_db_connection() r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_ids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_ids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue('sync_ids', 'collection', task_delete_selected_collections_identifiers, uuid_as_string_list) else: documents_to_delete = model_class.objects.filter(pk__in=selected_ids) documents_to_delete.delete()
def task_load_selected_journals(selected_uuids): """ Task para processar Carga de um LISTA de UUIDs do modelo: Journal """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('load', 'journal', task_load_one_journal, uuid)
def task_load_selected_articles(selected_uuids): """ Task para processar Carga de um LISTA de UUIDs do modelo: Article """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('load', 'article', task_load_one_article, uuid)
def task_clean_etl_models_by_stage(stage='all', model_name='all'): """ task que enfilera funções para remover modelos da fase (stage) indicada. Param: - `model_name`: modelo a ser removido: "collection", "journal", etc. """ r_queues = RQueues() if stage == 'all': stages_list = STAGE_LIST else: stages_list = [ stage, ] if model_name == 'all': model_name_list = ID_MODEL_CLASS.keys() else: model_name_list = [model_name] for model_name_ in model_name_list: for stage_ in stages_list: msg = u'Enfilerando task para remover o modelo: %s da stage: %s' % ( model_name_, stage_) logger.info(msg) create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg) r_queues.enqueue('sync_ids', model_name_, delete_etl_models, stage_, model_name_) msg = u'Fim: Enfilerando task para remover o modelo: %s da stage: %s' % ( model_name_, stage_) logger.info(msg) create_sync_event_record('sync_ids', model_name_, 'delete_identifiers', msg)
def task_load_selected_press_releases(selected_uuids): """ Task para processar Carga de um LISTA de UUIDs do modelo: Press Release """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('load', 'press_release', task_load_one_press_release, uuid)
def task_delete_selected_collections(selected_uuids): """ Task para apagar Coleções Carregadas. @param: - selected_uuids: lista de UUIDs dos documentos a serem removidos Se a lista `selected_uuids` for maior a SLICE_SIZE A lista será fatiada em listas de tamanho: SLICE_SIZE Se a lista `selected_uuids` for < a SLICE_SIZE Será feito uma delete direto no queryset """ stage = 'load' model = 'collection' model_class = LoadCollection get_db_connection() r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_uuids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list) else: # removemos o conjunto de documentos do LoadCollection indicados pelos uuids documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids) documents_to_delete.delete() # convertemos os uuid para _id e filtramos esses documentos no OPAC register_connections() opac_pks = [str(uuid).replace('-', '') for uuid in selected_uuids] with switch_db(opac_models.Collection, OPAC_WEBAPP_DB_NAME) as opac_model: selected_opac_records = opac_model.objects.filter(pk__in=opac_pks) selected_opac_records.delete()
def task_load_selected_news(selected_uuids): """ Task para processar Carga de um LISTA de UUIDs do modelo: News """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('load', 'news', task_load_one_news, uuid)
def task_delete_selected_diff_etl_model(stage, model_name, action, selected_uuids): if stage not in ETL_STAGE_LIST: raise ValueError(u'param stage: %s é inválido' % stage) if model_name not in ETL_MODEL_NAME_LIST: raise ValueError(u'param model_name: %s é inválido' % model_name) if action not in ACTION_LIST: raise ValueError(u'param action: %s é inválido' % action) get_db_connection() model_class = DIFF_MODEL_CLASS_BY_NAME[model_name] r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_uuids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue( 'sync_ids', model_name, task_delete_selected_diff_etl_model, stage, model_name, action, uuid_as_string_list) # args da task else: documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids) documents_to_delete.delete()
def task_delete_selected_collections(selected_uuids): """ Task para apagar Coleções Transformadas. @param: - selected_uuids: lista de UUIDs dos documentos a serem removidos Se a lista `selected_uuids` for maior a SLICE_SIZE A lista será fatiada em listas de tamanho: SLICE_SIZE Se a lista `selected_uuids` for < a SLICE_SIZE Será feito uma delete direto no queryset """ stage = 'transform' model = 'collection' model_class = TransformCollection get_db_connection() r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_uuids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue(stage, model, task_delete_selected_collections, uuid_as_string_list) else: documents_to_delete = model_class.objects.filter(uuid__in=selected_uuids) documents_to_delete.delete()
def task_delete_selected_diff_etl_model(stage, model_name, action, selected_uuids): if stage not in ETL_STAGE_LIST: raise ValueError(u'param stage: %s é inválido' % stage) if model_name not in ETL_MODEL_NAME_LIST: raise ValueError(u'param model_name: %s é inválido' % model_name) if action not in ACTION_LIST: raise ValueError(u'param action: %s é inválido' % action) get_db_connection() model_class = DIFF_MODEL_CLASS_BY_NAME[model_name] r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_uuids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_uuids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue('sync_ids', model_name, task_delete_selected_diff_etl_model, stage, model_name, action, uuid_as_string_list) # args da task else: documents_to_delete = model_class.objects.filter( uuid__in=selected_uuids) documents_to_delete.delete()
def task_clean_diff_models(stage='all', model_name='all', action='all'): """ task que enfilera funções para remover modelos Diffs. Param: - `stage`: filtro do campo stage: "extract", "transform" ou "load" - `model_name`: modelo a ser removido: "collection", "journal", etc. - `action` filtro do campo: "add" | "update" | "delete" """ r_queues = RQueues() if model_name == 'all': model_name_list = DIFF_MODEL_CLASS.keys() else: model_name_list = [model_name] if stage == 'all': stages_list = STAGE_LIST else: stages_list = [stage, ] if action == 'all': actions_list = ACTION_LIST else: actions_list = [action, ] for model_ in model_name_list: for stage_ in stages_list: for action_ in actions_list: msg = u'Enfilerando task para remover o diff model, modelo: %s, stage: %s, action: %s' % (model_, stage_, action_) logger.info(msg) create_sync_event_record('sync_ids', model_, 'delete_diff_models', msg) r_queues.enqueue('sync_ids', model_, delete_diff_models, stage_, model_, action_) msg = u'Fim: Enfilerando task para remover o diff model, modelo: %s, stage: %s, action: %s' % (model_, stage_, action_) logger.info(msg) create_sync_event_record('sync_ids', model_, 'delete_diff_models', msg)
def task_delete_selected_collections_identifiers(selected_ids): """ Task para apagar identificadores de Coleção. @param: - selected_ids: lista de pk dos documentos a serem removidos Se a lista `selected_ids` for maior a SLICE_SIZE A lista será fatiada em listas de tamanho: SLICE_SIZE Se a lista `selected_ids` for < a SLICE_SIZE Será feito uma delete direto no queryset """ model_class = CollectionIdModel get_db_connection() r_queues = RQueues() SLICE_SIZE = 1000 if len(selected_ids) > SLICE_SIZE: list_of_list_of_uuids = list(chunks(selected_ids, SLICE_SIZE)) for list_of_uuids in list_of_list_of_uuids: uuid_as_string_list = [str(uuid) for uuid in list_of_uuids] r_queues.enqueue( 'sync_ids', 'collection', task_delete_selected_collections_identifiers, uuid_as_string_list) else: documents_to_delete = model_class.objects.filter(pk__in=selected_ids) documents_to_delete.delete()
def enqueue_full_populate_task_by_model(model_name='all'): logger.info("Inicinado: enqueue_full_populate_task_by_model para modelo: %s", model_name) create_sync_event_record( 'sync_ids', model_name, 'enqueue_full_populate_task_by_model', u'Inciando enfileramento para preencher datas dos registros diff model: %s' % model_name) # setup get_db_connection() stage = 'sync_ids' r_queues = RQueues() model_class = None task_fn = None options = { 'collection': { 'model_class': models.ExtractCollection, 'task_fn': task_populate_collections }, 'journal': { 'model_class': models.ExtractJournal, 'task_fn': task_populate_journals }, 'issue': { 'model_class': models.ExtractIssue, 'task_fn': task_populate_issues }, 'article': { 'model_class': models.ExtractArticle, 'task_fn': task_populate_articles }, 'news': { 'model_class': models.ExtractNews, 'task_fn': task_populate_news }, 'press_release': { 'model_class': models.ExtractPressRelease, 'task_fn': task_populate_press_release } } if model_name == 'all': for k, v in options.items(): model_class = v['model_class'] task_fn = v['task_fn'] logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, k, model_class)) r_queues.enqueue(stage, k, task_fn) elif model_name not in options.keys(): raise ValueError('Param: model_name: %s inesperado' % model_name) else: model_class = options[model_name]['model_class'] task_fn = options[model_name]['task_fn'] logger.info("enfilerando stage: %s model_name: %s model_class: %s" % (stage, model_name, model_class)) r_queues.enqueue(stage, model_name, task_fn) create_sync_event_record( 'sync_ids', model_name, 'enqueue_full_populate_task_by_model', u'Fim do enfileramento para preencher datas dos registros diff model: %s' % model_name) logger.info("Fim: enqueue_full_populate_task_by_model para modelo: %s", model_name)
def task_collection_create(): get_db_connection() stage = 'load' model = 'collection' r_queues = RQueues() for collection in models.TransformCollection.objects.all(): r_queues.enqueue(stage, model, task_load_collection, collection.uuid)
def task_load_selected_collections(selected_uuids): """ Task para processar Carga de um LISTA de UUIDs do modelo: Collection """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('load', 'collection', task_load_one_collection, uuid)
def task_collection_create(): get_db_connection() stage = 'transform' model = 'collection' r_queues = RQueues() r_queues.create_queues_for_stage(stage) r_queues.enqueue( stage, model, task_transform_collection)
def task_retrieve_selected_news_identifiers(selected_ids): """ Task para processar NewsIdDataRetriever de um LISTA de IDs do modelo: NewsIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue('sync_ids', 'news', task_retrieve_one_news_identifier, identifier_id)
def task_journal_create(): get_db_connection() stage = 'load' model = 'journal' r_queues = RQueues() for journal in models.TransformJournal.objects.all(): r_queues.enqueue( stage, model, task_load_journal, uuid=journal.uuid)
def task_extract_selected_issues(selected_uuids): """ Task para processar Extração de um LISTA de UUIDs do modelo: Issue """ get_db_connection() r_queues = RQueues() source_ids_model_class = identifiers_models.IssueIdModel pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('issue_pid') for issue_pid in pids_iter: r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
def task_retrieve_selected_journals_identifiers(selected_ids): """ Task para processar JournalIdDataRetriever de um LISTA de IDs do modelo: JournalIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue('sync_ids', 'journal', task_retrieve_one_journal_identifier, identifier_id)
def task_retrieve_selected_articles_identifiers(selected_ids): """ Task para processar ArticleIdDataRetriever de um LISTA de IDs do modelo: IssueIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue('sync_ids', 'article', task_retrieve_one_article_identifier, identifier_id)
def task_transform_selected_news(selected_uuids): """ Task para processar Transformação de um LISTA de UUIDs do modelo: News """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('transform', 'news', task_transform_one_news, uuid)
def task_transform_selected_press_releases(selected_uuids): """ Task para processar Transformação de um LISTA de UUIDs do modelo: Press Release """ r_queues = RQueues() for uuid in selected_uuids: r_queues.enqueue('transform', 'press_release', task_transform_one_press_release, uuid)
def task_issue_create(): get_db_connection() stage = 'load' model = 'issue' r_queues = RQueues() for issue in models.TransformIssue.objects.all(): r_queues.enqueue( stage, model, task_load_issue, uuid=issue.uuid)
def task_transform_selected_journals(selected_uuids): """ Task para processar Transformação de um LISTA de UUIDs do modelo: Journal """ get_db_connection() r_queues = RQueues() source_ids_model_class = identifiers_models.JournalIdModel issns_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('journal_issn') for issn in issns_iter: r_queues.enqueue('transform', 'journal', task_transform_one_journal, issn)
def task_press_release_create(): get_db_connection() stage = 'load' model = 'press_release' r_queues = RQueues() for press_release in models.TransformPressRelease.objects.all(): r_queues.enqueue( stage, model, task_load_press_release, uuid=press_release.uuid)
def task_article_create(): get_db_connection() stage = 'load' model = 'article' r_queues = RQueues() for article in models.TransformArticle.objects.all(): r_queues.enqueue( stage, model, task_load_article, uuid=article.uuid)
def task_news_create(): get_db_connection() stage = 'load' model = 'news' r_queues = RQueues() for news in models.TransformNews.objects.all(): r_queues.enqueue( stage, model, task_load_news, uuid=news.uuid)
def task_retrieve_selected_journals_identifiers(selected_ids): """ Task para processar JournalIdDataRetriever de um LISTA de IDs do modelo: JournalIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue( 'sync_ids', 'journal', task_retrieve_one_journal_identifier, identifier_id)
def task_extract_selected_issues(selected_uuids): """ Task para processar Extração de um LISTA de UUIDs do modelo: Issue """ get_db_connection() r_queues = RQueues() source_ids_model_class = identifiers_models.IssueIdModel pids_iter = source_ids_model_class.objects.filter( uuid__in=selected_uuids).values_list('issue_pid') for issue_pid in pids_iter: r_queues.enqueue('extract', 'issue', task_extract_one_issue, issue_pid)
def task_transform_selected_collections(selected_uuids): """ Task para processar Transformação de um LISTA de UUIDs do modelo: Collection """ r_queues = RQueues() for uuid in selected_uuids: # para o caso da coleção, não precisamos nenhum parâmetro. # somente garantimos que rodamos para todos os uuids no banco # que deveria ser somente um. r_queues.enqueue('transform', 'collection', task_transform_one_collection)
def task_retrieve_selected_press_releases_identifiers(selected_ids): """ Task para processar PressReleaseDataRetriever de um LISTA de IDs do modelo: PressRelaseIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue('sync_ids', 'press_release', task_retrieve_one_press_release_identifier, identifier_id)
def task_retrieve_selected_news_identifiers(selected_ids): """ Task para processar NewsIdDataRetriever de um LISTA de IDs do modelo: NewsIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue( 'sync_ids', 'news', task_retrieve_one_news_identifier, identifier_id)
def task_retrieve_selected_articles_identifiers(selected_ids): """ Task para processar ArticleIdDataRetriever de um LISTA de IDs do modelo: IssueIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue( 'sync_ids', 'article', task_retrieve_one_article_identifier, identifier_id)
def task_retrieve_selected_press_releases_identifiers(selected_ids): """ Task para processar PressReleaseDataRetriever de um LISTA de IDs do modelo: PressRelaseIdModel """ r_queues = RQueues() for identifier_id in selected_ids: r_queues.enqueue( 'sync_ids', 'press_release', task_retrieve_one_press_release_identifier, identifier_id)
def task_transform_selected_articles(selected_uuids): """ Task para processar Transformação de um LISTA de UUIDs do modelo: Article """ get_db_connection() r_queues = RQueues() source_ids_model_class = identifiers_models.ArticleIdModel pids_iter = source_ids_model_class.objects.filter(uuid__in=selected_uuids).values_list('article_pid') for article_pid in pids_iter: r_queues.enqueue('transform', 'article', task_transform_one_article, article_pid)
def task_retrive_all_articles_ids(): retirever_class = RETRIEVERS_BY_MODEL['article'] retriever_instance = retirever_class() r_queues = RQueues() identifiers = retriever_instance.get_data_source_identifiers() list_of_all_ids = [identifier for identifier in identifiers] list_of_list_of_ids = list(chunks(list_of_all_ids, 1000)) for list_of_ids in list_of_list_of_ids: r_queues.enqueue('sync_ids', 'article', task_retrive_articles_ids_by_chunks, list_of_ids)
def task_extract_all_news(): """ Task para processar Extração de TODOS os registros do modelo: News """ stage = 'extract' model = 'news' r_queues = RQueues() for lang, feed in config.RSS_NEWS_FEEDS.items(): url = feed['url'].format(lang) r_queues.enqueue(stage, model, task_extract_one_news, url, lang)
def task_extract_selected_news(selected_uuids): """ Task para processar Extração de um LISTA de UUIDs do modelo: News """ get_db_connection() r_queues = RQueues() extracted_news_selected = ExtractNews.objects.filter( uuid__in=selected_uuids) for ex_news in extracted_news_selected: r_queues.enqueue('extract', 'news', task_extract_one_news, ex_news.feed_url_used, ex_news.feed_lang)
def task_extract_selected_press_releases(selected_uuids): """ Task para processar Extração de um LISTA de UUIDs do modelo: Press Release """ get_db_connection() r_queues = RQueues() extracted_press_releases_selected = ExtractPressRelease.objects.filter( uuid__in=selected_uuids) for ex_pr in extracted_press_releases_selected: r_queues.enqueue('extract', 'press_release', task_extract_one_press_release, ex_pr.journal_acronym, ex_pr.feed_url_used, ex_pr.feed_lang)
def task_retrieve_selected_collections_identifiers(selected_ids): """ Task para processar CollectionIdDataRetriever de um LISTA de IDs do modelo: CollectionIdModel """ r_queues = RQueues() for identifier_id in selected_ids: # para o caso da coleção, não precisamos nenhum parâmetro. # somente garantimos que rodamos para todos os uuids no banco # que deveria ser somente um. r_queues.enqueue('sync_ids', 'collection', task_retrieve_one_collection_identifier, identifier_id)
def task_retrieve_all_news_identifiers(): """ Task para processar Extração de TODOS os registros do modelo: NewsIdModel """ r_queues = RQueues() retriever_instance = NewsIdDataRetriever() identifiers = retriever_instance.get_data_source_identifiers() list_of_all_ids = [identifier for identifier in identifiers] list_of_list_of_ids = list(chunks(list_of_all_ids, 1000)) for list_of_ids in list_of_list_of_ids: r_queues.enqueue('sync_ids', 'news', task_retrieve_selected_news_identifiers, list_of_ids)
def produce(stage, model, action): """ Enfilera as tasks para GERAR registros Differ do stage, modelo e action indicados """ if stage == 'all': stages_list = ETL_STAGE_LIST elif stage not in ETL_STAGE_LIST: raise ValueError('Param: stage: %s com valor inesperado!' % stage) else: stages_list = [ stage, ] if model == 'all': models_list = ETL_MODEL_NAME_LIST elif model not in ETL_MODEL_NAME_LIST: raise ValueError('Param: model: %s com valor inesperado!' % model) else: models_list = [model] if action == 'all': actions_list = ACTION_LIST elif action not in ACTION_LIST: raise ValueError('Param: action: %s com valor inesperado!' % action) else: actions_list = [ action, ] r_queues = RQueues() task_fn_by_action = { 'add': task_produce_diff_add, 'update': task_produce_diff_update, 'delete': task_produce_diff_delete, } for stage_ in stages_list: for model_ in models_list: for action_ in actions_list: task_fn = task_fn_by_action[action_] logger.info( "[%s][%s][%s] enfilerando para producir registros diff", stage_, model_, action_) r_queues.enqueue('sync_ids', model_, task_fn, stage_, model_)
def remove(stage, model, action): """ Enfilera as tasks para gerar CONSUMIR registros Differ do stage, modelo e action indicados """ if stage == 'all': stages_list = ETL_STAGE_LIST elif stage not in ETL_STAGE_LIST: raise ValueError('Param: stage: %s com valor inesperado!' % stage) else: stages_list = [ stage, ] if model == 'all': models_list = ETL_MODEL_NAME_LIST elif model not in ETL_MODEL_NAME_LIST: raise ValueError('Param: model: %s com valor inesperado!' % stage) else: models_list = [model] if action == 'all': actions_list = ACTION_LIST elif model not in ACTION_LIST: raise ValueError('Param: model: %s com valor inesperado!' % stage) else: actions_list = [ action, ] r_queues = RQueues() for stage_ in stages_list: for model_ in models_list: for action_ in actions_list: logger.info( "[%s][%s][%s] enfilerando para remover registros diff", stage_, model_, action_) r_queues.enqueue('sync_ids', model_, task_delete_all_diff_etl_model, stage_, model_, action_)
class ProcessDiffersBase: model_name = '' collection_acronym = None r_queues = RQueues() db = get_db_connection() task_produce_add = 'opac_proc.differs.produce_jobs.task_produce_diff_add' task_produce_update = 'opac_proc.differs.produce_jobs.task_produce_diff_update' task_produce_delete = 'opac_proc.differs.produce_jobs.task_produce_diff_delete' task_delete_selected = 'opac_proc.differs.produce_jobs.task_delete_selected_diff_etl_model' task_delete_all = 'opac_proc.differs.produce_jobs.task_delete_all_diff_etl_model' task_consume_add = 'opac_proc.differs.consumer_jobs.task_consume_diff_add' task_consume_update = 'opac_proc.differs.consumer_jobs.task_consume_diff_update' task_consume_delete = 'opac_proc.differs.consumer_jobs.task_consume_diff_delete' def produce(self, stage, action): task_fn = None if action == 'add': task_fn = self.task_produce_add elif action == 'update': task_fn = self.task_produce_update elif action == 'delete': task_fn = self.task_produce_delete else: raise ValueError(u'Param: action %s é inválido' % action) self.r_queues.enqueue('sync_ids', self.model_name, task_fn, stage, self.model_name) # task args def consume(self, stage, action): task_fn = None if action == 'add': task_fn = self.task_consume_add elif action == 'update': task_fn = self.task_consume_update elif action == 'delete': task_fn = self.task_consume_delete else: raise ValueError(u'Param: action %s é inválido' % action) self.r_queues.enqueue('sync_ids', self.model_name, task_fn, stage, self.model_name) # task args def delete_selected(self, stage, action, selected_uuids): self.r_queues.enqueue('sync_ids', self.model_name, self.task_delete_selected, stage, self.model_name, action, selected_uuids) # task args def delete_all(self, stage, action): self.r_queues.enqueue('sync_ids', self.model_name, self.task_delete_all, stage, self.model_name, action) # task args
def enqueue_ids_data_retriever(model_name='all'): if model_name == 'all': models_list = MODEL_NAME_LIST else: models_list = [model_name] r_queues = RQueues() for model_ in models_list: task_fn = task_call_data_retriver_by_model logger.info('Enfilerando task: %s para o model: %s.' % (task_fn, model_)) create_sync_event_record( 'sync_ids', model_, 'enqueue_ids_data_retriever', u'Inciando enfileramento para recuperar dados do IdModel model: %s' % model_name) r_queues.enqueue('sync_ids', model_, task_fn, model_) logger.info('Fim: Enfilerando task: %s para o model: %s.' % (task_fn, model_)) create_sync_event_record( 'sync_ids', model_, 'enqueue_ids_data_retriever', u'Fim do enfileramento para recuperar dados do IdModel model: %s' % model_name)