def extract_source(source, entities, config, output_dir): """ Full JSON BrAPI source extraction process """ source_name = source['schema:identifier'] action = 'extract-' + source_name log_file = get_file_path([config['log-dir'], action], ext='.log', recreate=True) logger = create_logger(action, log_file, config['options']['verbose']) pool = ThreadPool(10) logger.info("Extracting BrAPI {}...".format(source_name)) try: # Initialize JSON merge stores for (entity_name, entity) in entities.items(): entity['store'] = MergeStore(source['schema:identifier'], entity['name']) # Fetch server implemented calls if 'implemented-calls' not in source: source['implemented-calls'] = get_implemented_calls(source, logger) # Fetch entities lists fetch_all_list(source, logger, entities, pool) # Detail entities fetch_all_details(source, logger, entities, pool) # Link entities (internal links, internal object links and external object links) fetch_all_links(source, logger, entities) # Detail entities (for object that might have been discovered by links) fetch_all_details(source, logger, entities, pool) remove_internal_objects(entities) logger.info("SUCCEEDED Extracting BrAPI {}.".format(source_name)) except: logger.debug(traceback.format_exc()) shutil.rmtree(output_dir) output_dir = output_dir + '-failed' logger.info( "FAILED Extracting BrAPI {}.\n" "=> Check the logs ({}) and data ({}) for more details.".format( source_name, log_file, output_dir)) pool.close() # Save to file logger.info("Saving BrAPI {} to '{}'...".format(source_name, output_dir)) for (entity_name, entity) in entities.items(): entity['store'].save(output_dir) entity['store'].clear()
def load_source(source, config, source_bulk_dir, log_dir): """ Full Elasticsearch documents indexing """ source_name = source['schema:identifier'] action = 'load-elasticsearch-' + source_name log_file = get_file_path([log_dir, action], ext='.log', recreate=True) logger = create_logger(source_name, log_file, config['verbose']) load_config = config['load-elasticsearch'] es_client = init_es_client(load_config['url'], logger) logger.info("Loading '{}' into elasticsearch '{}'...".format(source_bulk_dir, load_config['url'])) try: if not os.path.exists(source_bulk_dir): raise FileNotFoundError( 'No such file or directory: \'{}\'.\n' 'Please make sure you have run the BrAPI extraction and Elasticsearch document transformation' ' before trying to launch the transformation process.' .format(source_bulk_dir)) bulk_files = list(list_entity_files(source_bulk_dir)) all_document_types = set(map(first, bulk_files)) document_types = load_config.get('document-types') or all_document_types document_types = document_types.intersection(all_document_types) index_by_document = dict() logger.info("Preparing index with template mapping...") timestamp = int(time.time()) for document_type in document_types: base_index_name = replace_template( load_config['index-template'], {'source': source['schema:identifier'], 'documentType': document_type} ).lower() create_template(es_client, load_config, document_type, base_index_name, logger) index_name = base_index_name + '-d' + str(timestamp) create_index(es_client, index_name, logger) index_by_document[document_type] = base_index_name, index_name logger.info("Bulk indexing...") for document_type, file_path in bulk_files: if document_type in index_by_document: base_index_name, index_name = index_by_document[document_type] bulk_index(es_client, index_name, file_path, logger) logger.info("Creating index aliases and deleting old indices...") for document_type, (base_index_name, index_name) in index_by_document.items(): create_alias(es_client, index_name, base_index_name, logger) new_index, *old_indices = get_indices(es_client, base_index_name) for old_index in old_indices[1:]: delete_index(es_client, old_index, logger) logger.info("SUCCEEDED Loading {}.".format(source_name)) except Exception as e: logger.debug(traceback.format_exc()) logger.debug(getattr(e, 'long_message', '')) logger.info("FAILED Loading {} Elasticsearch documents.\n" "=> Check the logs ({}) for more details." .format(source_name, log_file))