def processing_redis_content(self, request_id: str, key_prefix: str,
                              redis_db: RedisDB):
     with self.lock:
         keys_found = redis_db.get_key_by_pattern(
             key_pattern=f'{key_prefix}{self.config.key_separator}*')
         task = self.all_tasks[request_id]
         for key in keys_found:
             cardinality = redis_db.get_cardinality(key=key)
             task.map_redis_key_to_cardinality[key] = cardinality
def delete_redis_keys_prefix(prefix: str, redis_db: RedisDB,
                             log: Logger) -> int:
    redis_driver = redis_db.get_driver()
    num_deleted = 0
    log.debug(f'Deleting keys with a prefix of: {prefix}')
    for key in redis_driver.scan_iter(prefix):
        redis_driver.delete(key)
        num_deleted += 1
    log.debug(f'Deleted {num_deleted} keys from Redis.')
    return num_deleted
Exemple #3
0
 def __init__(self, env: EnvProvider, progress_monitor: ProgressMonitor,
              event_dispatcher: EventDispatcher):
     self.event_dispatcher = event_dispatcher
     self.progress_monitor = progress_monitor
     self.is_ready = False
     self.log = log_helper.get_logger(logger_name=__name__)
     # noinspection PyBroadException
     try:
         self.redis_db = RedisDB(event_dispatcher=self.event_dispatcher,
                                 env=env)
         self.data_and_model_provider = env.data_and_model_provider
         self.thread_pool = ThreadPool()
         self.data_to_graph_translator = env.data_to_graph_entities_provider
         self.multi_helper = MultiHelper(config=env.config)
         self.im = IngestionManager(env=env,
                                    multi_helper=self.multi_helper,
                                    event_dispatcher=self.event_dispatcher)
         self.is_ready = True
     except Exception as the_exception:
         self.log.error(the_exception, exc_info=True)
         self.is_ready = False
    def writing_from_redis_into_neo(self, result, redis_db: RedisDB):

        with self.lock:
            source_name = result['source_name']
            processed_keys = result['processed_keys']
            request_id = result['request_id']

            self.log.info(f'Writing to neo4j: {source_name}')
            self.log.admin({
                Field.writing_to_neo: source_name,
                Field.request_id: request_id
            })

            redis_sizes = {}
            for key in processed_keys:
                redis_sizes[key] = redis_db.get_cardinality(key=key)
                self.log.debug(f'{key} size in redis = {redis_sizes[key]}')
Exemple #5
0
 def __init__(self, env: EnvProvider, multi_helper: MultiHelper,
              event_dispatcher: EventDispatcher):
     self.is_ready = False
     self.event_dispatcher = event_dispatcher
     self.log = log_helper.get_logger(logger_name=self.__class__.__name__)
     self.config = env.config
     try:
         self.neo_db: NeoDB = NeoDB(config=self.config,
                                    event_dispatcher=self.event_dispatcher)
         self.redis_db: RedisDB = RedisDB(
             env=env, event_dispatcher=self.event_dispatcher)
         self.multi_helper: MultiHelper = multi_helper
         self.is_ready = True
     except Exception as the_exception:
         self.log.error(the_exception, exc_info=True)
         self.is_ready = False
     IngestionManager.supported_operations = (
         self.config.nodes_ingestion_operation,
         self.config.edges_ingestion_operation)
Exemple #6
0
 def all_redis_keys():
     r = RedisDB(event_dispatcher=event_dispatcher, env=env)
     all_keys = r.get_key_by_pattern(key_pattern='*', return_list=True)
     return f'All redis keys: {all_keys}'
Exemple #7
0
class Coordinator:
    def __init__(self, env: EnvProvider, progress_monitor: ProgressMonitor,
                 event_dispatcher: EventDispatcher):
        self.event_dispatcher = event_dispatcher
        self.progress_monitor = progress_monitor
        self.is_ready = False
        self.log = log_helper.get_logger(logger_name=__name__)
        # noinspection PyBroadException
        try:
            self.redis_db = RedisDB(event_dispatcher=self.event_dispatcher,
                                    env=env)
            self.data_and_model_provider = env.data_and_model_provider
            self.thread_pool = ThreadPool()
            self.data_to_graph_translator = env.data_to_graph_entities_provider
            self.multi_helper = MultiHelper(config=env.config)
            self.im = IngestionManager(env=env,
                                       multi_helper=self.multi_helper,
                                       event_dispatcher=self.event_dispatcher)
            self.is_ready = True
        except Exception as the_exception:
            self.log.error(the_exception, exc_info=True)
            self.is_ready = False

    def process_request(self, request: dict):

        now = datetime.datetime.now()
        string_timestamp = f'{now.year}_{now.month}_{now.day}_{now.hour}_{now.minute}_{now.second}'

        request_type = request['request_type']
        request_id = f"{request['request_id']}_{string_timestamp}"

        self.log.info(f'Progress-Report-ID: {request_id}')

        # Progress-0: Incoming request.
        self.event_dispatcher.dispatch_event(event=GiraffeEvent(
            request_id=request_id,
            event_type=GiraffeEventType.STARTED_PROCESSING_REQUEST,
            message=f'Starting processing request id: {request_id}',
            arguments={
                'request_type': request_type,
                'request_content': str(request)
            }))

        if request_type == 'white_list':
            file_path = request['file_path']
            source_descriptions = file_path
            validate_is_file(file_path=file_path)
            with open(file_path, 'r') as white_list:
                content = white_list.readlines()
                self.log.admin({Field.white_list_content: content})

        else:
            error_message = f'Currently, only {request_type} request_type supported.'
            not_implemented = NotImplementedError(
                f'Currently, only {request_type} request_type supported.')
            self.event_dispatcher.dispatch_event(
                event=GiraffeEvent(request_id=request_id,
                                   event_type=GiraffeEventType.ERROR,
                                   message=error_message,
                                   arguments={
                                       'message': error_message,
                                       'exception': not_implemented
                                   }))
            raise not_implemented

        # Progress-1: Fetching pairs of data/model for requested data-sources.
        self.event_dispatcher.dispatch_event(event=GiraffeEvent(
            request_id=request_id,
            event_type=GiraffeEventType.FETCHING_DATA_AND_MODELS,
            message=
            f'Fetching pairs of data/model for requested data-sources [{request_id}]',
            arguments={
                'request_id': request_id,
                'source_description': source_descriptions
            },
        ))

        try:
            data_and_models = self.data_and_model_provider.get_data_and_model_for(
                source_descriptions=source_descriptions)
        except GiraffeException as not_implemented:
            message = f'Failed loading data and models for request: {request_id}'
            self.event_dispatcher.dispatch_event(event=GiraffeEvent(
                request_id=request_id,
                event_type=GiraffeEventType.ERROR,
                message=message,
                arguments={
                    'request_id': request_id,
                    'message': message,
                    'exception': not_implemented
                },
            ))
            raise not_implemented  # Rethrowing

        # Progress-2: Finished fetching pairs of data/model for requested data-sources.
        self.event_dispatcher.dispatch_event(event=GiraffeEvent(
            request_id=request_id,
            event_type=GiraffeEventType.FINISHED_FETCHING_DATA_AND_MODELS,
            message=
            f'Finished fetching pairs of data/model for requested data-sources [{request_id}]',
            arguments={
                'request_id': request_id,
                'data_models': data_and_models
            }))

        all_futures = []
        for data_and_model in data_and_models:
            # Progress-3: Writing all graph-elements into redis (concurrently).
            self.event_dispatcher.dispatch_event(event=GiraffeEvent(
                request_id=request_id,
                event_type=GiraffeEventType.WRITING_GRAPH_ELEMENTS_INTO_REDIS,
                message=
                f'Writing graph-element into redis (concurrently) [{data_and_model.source_name}]',
                arguments={
                    'request_id': request_id,
                    'source_name': data_and_model.source_name
                }))

            translation_id = f'{request_id}_{data_and_model.source_name}'
            translated_graph_entities = self.data_to_graph_translator.translate(
                request=translation_request({
                    'src_df': data_and_model.data,
                    'model': data_and_model.graph_model,
                    'streaming_id': translation_id
                }))

            future = self.multi_helper.run_in_separate_thread(
                function=self.redis_db.write_translator_result_to_redis,
                entry_dict=translated_graph_entities,
                source_name=data_and_model.source_name,
                request_id=request_id,
            )
            all_futures.append(future)

        parallel_results = MultiHelper.wait_on_futures(iterable=all_futures)

        # Progress-4: Graph elements are ready (in redis) to be pushed into neo4j.
        self.event_dispatcher.dispatch_event(event=GiraffeEvent(
            request_id=request_id,
            event_type=GiraffeEventType.REDIS_IS_READY_FOR_CONSUMPTION,
            message=
            f'Graph elements are ready (in redis) to be pushed into neo4j [{request_id}]',
            arguments={
                'request_id': request_id,
                'parallel_results': parallel_results
            }))

        for not_implemented in parallel_results.exceptions:
            self.event_dispatcher.dispatch_event(event=GiraffeEvent(
                request_id=request_id,
                event_type=GiraffeEventType.ERROR,
                message='Failure on writing to redis.',
                arguments={
                    'request_id': request_id,
                    'message': 'Failure on writing to redis.',
                    'exception': not_implemented
                }))

        if not parallel_results.all_ok:
            raise parallel_results.exceptions[0]

        for details in parallel_results.results:
            source_name = details['source_name']
            processed_keys = details['processed_keys']
            request_id: str = details['request_id']

            self.event_dispatcher.dispatch_event(event=GiraffeEvent(
                request_id=request_id,
                event_type=GiraffeEventType.WRITING_FROM_REDIS_TO_NEO,
                message=
                f'Graph elements are ready (in redis) to be pushed into neo4j [{request_id}]',
                arguments={
                    'request_id': request_id,
                    'source_name': source_name,
                    'processed_keys': processed_keys,
                    'details': details,
                    'redis_db': self.redis_db
                }))

            translation_id = f'{request_id}_{source_name}'
            self.im.process_redis_content(request_id=request_id,
                                          translation_id=translation_id)

            self.event_dispatcher.dispatch_event(event=GiraffeEvent(
                request_id=request_id,
                event_type=GiraffeEventType.DELETING_REDIS_KEYS,
                message=
                f'Deleting done-with keys from redis [{processed_keys}]',
                arguments={
                    'request_id': request_id,
                    'keys': processed_keys
                }))
            self.redis_db.delete_keys(keys=processed_keys)
            self.log.info(f'{source_name} is ready.')
            self.log.admin({
                Field.request_id: request_id,
                Field.ready: source_name
            })

        # Progress-5: Finished writing all redis content into neo4j.
        self.event_dispatcher.dispatch_event(event=GiraffeEvent(
            request_id=request_id,
            event_type=GiraffeEventType.DONE_PROCESSING_REQUEST,
            message=f'Done processing request: {request_id}',
            arguments={'request_id': request_id}))
        return request_id

    def processing_success_callback(self, finished_request_id: str):
        self.log.info(f'Finished processing request: {finished_request_id}')
        self.log.admin(
            {Field.finished_processing_request: finished_request_id})
        self.progress_monitor.dump_to_hard_drive_and_fluent()

    def processing_error_callback(self, exception: BaseException):
        import traceback
        self.log.error(
            f'Unhandled exception while handling client request: {exception}')
        self.log.error(''.join(traceback.format_tb(exception.__traceback__)))
        self.progress_monitor.dump_to_hard_drive_and_fluent()
Exemple #8
0
def redis_db() -> RedisDB:
    return RedisDB()