コード例 #1
0
 def get_from_change_name(cls, change_name, **kwargs):
     driver = kwargs.get('driver', None)
     if not driver:
         from toll_booth.alg_obj.aws.sapper.leech_driver import LeechDriver
         driver = LeechDriver(
             table_name=kwargs.get('table_name', 'VdGraphObjects'))
     results = driver.get_changelog_types(category=change_name)
     return cls.get_from_change_identifiers(results)
コード例 #2
0
 def __init__(self, monitor_order, **kwargs):
     identifier_stem = IdentifierStem.from_raw(
         monitor_order.identifier_stem)
     self._identifier_stem = identifier_stem
     self._id_source = monitor_order.id_source
     self._leech_driver = LeechDriver(**kwargs)
     self._local_setup = self._leech_driver.get_field_value_setup(
         self._identifier_stem)
     self._sample_size = kwargs.get('sample_size', 1000)
コード例 #3
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
 def __init__(self, propagation_id, id_source, **kwargs):
     self._propagation_id = propagation_id
     self._id_source = id_source
     self._leech_driver = LeechDriver(table_name='VdGraphObjects')
     self._driving_identifier_stem = None
     self._extracted_identifier_stem = None
     self._propagation_identifier_stem = None
     self._context = kwargs['context']
     self._change_types = ChangeTypes.get(leech_driver=self._leech_driver)
コード例 #4
0
ファイル: lizards.py プロジェクト: AlgernonSolutions/leech
 def __init__(self, *, identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     self._identifier_stem = identifier_stem
     self._object_type = identifier_stem.object_type
     self._schema_entry = SchemaVertexEntry.retrieve(self._object_type)
     self._leech_driver = LeechDriver()
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         identifier_stem)
     self._extraction_profile = self._generate_extraction_profile()
     self._extraction_queue = kwargs.get(
         'extraction_queue', ForgeQueue.get_for_extraction_queue(**kwargs))
     self._sample_size = kwargs.get('sample_size', 1000)
コード例 #5
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
 def __init__(self, identifier_stem, driving_identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     driving_identifier_stem = IdentifierStem.from_raw(
         driving_identifier_stem)
     self._spore_id = uuid.uuid4().hex
     self._identifier_stem = identifier_stem
     self._driving_identifier_stem = driving_identifier_stem
     self._leech_driver = LeechDriver(table_name='VdGraphObjects')
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         driving_identifier_stem)
     self._schema_entry = SchemaVertexEntry.retrieve(
         driving_identifier_stem.object_type)
     self._sample_size = kwargs.get('sample_size', 1000)
     self._extraction_profile = self._generate_extraction_profile()
     self._driving_vertex_regulator = VertexRegulator.get_for_object_type(
         driving_identifier_stem.object_type)
コード例 #6
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
 def __init__(self, propagation_id, id_source, **kwargs):
     self._propagation_id = propagation_id
     self._id_source = id_source
     self._leech_driver = LeechDriver(table_name='VdGraphObjects')
     self._leech_scanner = DynamoScanner(table_name='VdGraphObjects')
     self._transform_queue = kwargs.get(
         'transform_queue',
         ForgeQueue.get_for_transform_queue(swarm=False,
                                            auto_send_threshold=1,
                                            **kwargs))
     self._change_types = ChangeTypes.get(leech_driver=self._leech_driver)
     self._driving_identifier_stem = None
     self._extracted_identifier_stem = None
     self._schema_entry = None
     self._mapping = None
     self._local_max_value = None
     self._enriched_data = None
     self._context = kwargs['context']
     self._results = {}
     self._checked_emp_ids = {}
コード例 #7
0
def _set_changed_ids(change_type, **kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem
    from toll_booth.alg_obj.graph.ogm.regulators import VertexRegulator
    from toll_booth.alg_obj.aws.sapper.leech_driver import LeechDriver
    from botocore.exceptions import ClientError

    id_values = kwargs['id_values']
    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    driving_vertex_regulator = VertexRegulator.get_for_object_type(
        driving_identifier_stem.object_type)
    leech_driver = LeechDriver(
        table_name=kwargs.get('table_name', 'VdGraphObjects'))
    for id_value in id_values:
        object_data = driving_identifier_stem.for_extractor
        object_data['id_value'] = id_value
        potential_vertex = driving_vertex_regulator.create_potential_vertex(
            object_data)
        try:
            if change_type == 'new':
                leech_driver.set_assimilated_vertex(
                    potential_vertex,
                    False,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            if change_type == 'link':
                leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    driving_identifier_stem.get('id_source'),
                    False,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            if change_type == 'unlink':
                leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    driving_identifier_stem.get('id_source'),
                    True,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            raise NotImplementedError(
                'could not find operation to perform for changed_ids type: %s'
                % change_type)
        except ClientError as e:
            if e.response['Error']['Code'] != 'ConditionalCheckFailedException':
                raise e
コード例 #8
0
 def test_get_changelog_types(self):
     driver = LeechDriver()
     changelog_types = driver.get_changelog_types(categories_only=True)
     print()
コード例 #9
0
 def test_get_changelog_types(self):
     driver = LeechDriver()
     changelog_types = driver.get_changelog_types(category='Changes')
     print()
コード例 #10
0
 def test_get_changelog_types(self):
     driver = LeechDriver()
     changelog_types = driver.get_changelog_types()
     print()
コード例 #11
0
class MonitorMouse:
    def __init__(self, monitor_order, **kwargs):
        identifier_stem = IdentifierStem.from_raw(
            monitor_order.identifier_stem)
        self._identifier_stem = identifier_stem
        self._id_source = monitor_order.id_source
        self._leech_driver = LeechDriver(**kwargs)
        self._local_setup = self._leech_driver.get_field_value_setup(
            self._identifier_stem)
        self._sample_size = kwargs.get('sample_size', 1000)

    def monitor(self):
        field_identifier_stems = self._derive_value_field_stems()
        remote_value_field_max_min = self._get_remote_value_field_max_mins(
            field_identifier_stems)
        extraction_orders = []
        for identifier_stem in field_identifier_stems:
            remote_value_max = remote_value_field_max_min[identifier_stem][
                'max']
            try:
                local_value_field_max = self._get_local_value_field_max(
                    identifier_stem)
            except MissingFieldValuesException:
                local_value_field_max = remote_value_field_max_min[
                    identifier_stem]['min']
            extraction_orders.extend(
                self._generate_extraction_order(identifier_stem,
                                                remote_value_max,
                                                local_value_field_max))

    def _generate_extraction_order(self, identifier_stem, remote_value_max,
                                   local_value_max):
        extraction_orders = []
        if remote_value_max > local_value_max:
            missing_range = range(local_value_max + 1, remote_value_max + 1)
            missing_range = missing_range[:self._sample_size]
            for entry in missing_range:
                extraction_orders.append(
                    ExtractObjectOrder(
                        identifier_stem,
                        entry,
                    ))

    def _get_local_value_field_max(self, identifier_stem):
        return self._leech_driver.query_index_value_max(identifier_stem)

    def _get_remote_value_field_max_mins(self, identifier_stems):
        function_name = self._local_setup['extractor_names']['field_values']
        step_args = {
            'id_source': self._id_source,
            'identifier_stems': identifier_stems
        }
        return StageManager.run_field_value_query(function_name, step_args)

    def _derive_value_field_stems(self):
        stems = []
        paired_identifiers = self._identifier_stem.paired_identifiers
        field_values = self._local_setup['field_values']
        for field_value in field_values:
            field_names = ['id_source', 'id_type', 'id_name']
            named_fields = OrderedDict()
            for field_name in field_names:
                named_fields[field_name] = paired_identifiers[field_name]
            named_fields['data_dict_id'] = field_value
            field_identifier_stem = IdentifierStem('vertex', 'FieldValue',
                                                   named_fields)
            stems.append(field_identifier_stem)
        return stems
コード例 #12
0
ファイル: lizards.py プロジェクト: AlgernonSolutions/leech
class MonitorLizard:
    @xray_recorder.capture('lizard_init')
    def __init__(self, *, identifier_stem, **kwargs):
        identifier_stem = IdentifierStem.from_raw(identifier_stem)
        self._identifier_stem = identifier_stem
        self._object_type = identifier_stem.object_type
        self._schema_entry = SchemaVertexEntry.retrieve(self._object_type)
        self._leech_driver = LeechDriver()
        self._extractor_setup = self._leech_driver.get_extractor_setup(
            identifier_stem)
        self._extraction_profile = self._generate_extraction_profile()
        self._extraction_queue = kwargs.get(
            'extraction_queue', ForgeQueue.get_for_extraction_queue(**kwargs))
        self._sample_size = kwargs.get('sample_size', 1000)

    @xray_recorder.capture('lizard_monitor')
    def monitor(self):
        remote_ids = self._get_current_remote_max_min_id()
        try:
            max_local_id = self._get_current_local_max_id()
            max_remote_id = remote_ids['max']
        except EmptyIndexException:
            max_local_id = remote_ids['min']
            max_remote_id = remote_ids['max']
        extraction_orders = []
        if max_remote_id > max_local_id:
            id_range = range(max_local_id + 1, max_remote_id + 1)
            id_range = id_range[:self._sample_size]
            already_working, not_working = self._leech_driver.mark_ids_as_working(
                id_range, identifier_stem=self._identifier_stem)
            for id_value in not_working:
                extraction_orders.append(
                    self._generate_extraction_order(id_value))
            orders_to_be_sent = len(self._extraction_queue)
            self._send_extraction_orders(extraction_orders)
            logging.info(
                f'completed monitoring for {self._schema_entry.entry_name}, '
                f'remote: {max_remote_id}, local: {max_local_id}, '
                f'{orders_to_be_sent} extraction orders to be sent, '
                f'{len(already_working)} values are already being processed')
            return
        logging.info(
            f'completed monitoring for {self._schema_entry.entry_name}, '
            f'remote: {max_remote_id}, local: {max_local_id}, '
            f'no new objects found')

    @xray_recorder.capture('lizard_send_extraction_order')
    def _send_extraction_orders(self, extraction_orders):
        self._extraction_queue.add_orders(extraction_orders)
        self._extraction_queue.push_orders()

    @xray_recorder.capture('lizard_generate_extraction_order')
    def _generate_extraction_order(self, missing_id_value):
        extractor_name = self._extractor_setup['extraction']
        extraction_profile = self._extraction_profile
        return ExtractObjectOrder(self._identifier_stem, missing_id_value,
                                  extractor_name, extraction_profile,
                                  self._schema_entry)

    @xray_recorder.capture('lizard_generate_extraction_properties')
    def _generate_extraction_profile(self):
        extraction_properties = self._identifier_stem.for_extractor
        schema_extraction_properties = self._schema_entry.extract[
            self._extractor_setup['type']]
        extraction_properties.update(
            schema_extraction_properties.extraction_properties)
        return extraction_properties

    @xray_recorder.capture('lizard_get_remote_max')
    def _get_current_remote_max_min_id(self):
        id_values = StageManager.run_index_query(
            self._extractor_setup['index_query'], self._extraction_profile)
        return id_values

    @xray_recorder.capture('lizard_get_local_max')
    def _get_current_local_max_id(self):
        return self._leech_driver.query_index_value_max(self._identifier_stem)
コード例 #13
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
class Mushroom:
    def __init__(self, propagation_id, id_source, **kwargs):
        self._propagation_id = propagation_id
        self._id_source = id_source
        self._leech_driver = LeechDriver(table_name='VdGraphObjects')
        self._leech_scanner = DynamoScanner(table_name='VdGraphObjects')
        self._transform_queue = kwargs.get(
            'transform_queue',
            ForgeQueue.get_for_transform_queue(swarm=False,
                                               auto_send_threshold=1,
                                               **kwargs))
        self._change_types = ChangeTypes.get(leech_driver=self._leech_driver)
        self._driving_identifier_stem = None
        self._extracted_identifier_stem = None
        self._schema_entry = None
        self._mapping = None
        self._local_max_value = None
        self._enriched_data = None
        self._context = kwargs['context']
        self._results = {}
        self._checked_emp_ids = {}

    def fruit(self):
        cascade_args = {'propagation_id': self._propagation_id}
        logging.info(
            'beginning a vertex driven extraction for propagation_id: %s' %
            self._propagation_id)
        for id_value in self._leech_driver.get_creep_id_values(**cascade_args):
            logging.info('beginning the extraction for id_value: %s' %
                         id_value)
            cascade_args['id_value'] = id_value
            for change_category in self._leech_driver.get_creep_categories(
                    change_types=self._change_types, **cascade_args):
                logging.info(
                    'beginning the extraction for id_value: %s, category: %s' %
                    (id_value, str(change_category)))
                cascade_args['change_category'] = change_category
                for change_type in self._leech_driver.get_creep_actions(
                        **cascade_args):
                    logging.info(
                        'beginning the extraction for id_value: %s, category: %s, action: %s'
                        % (id_value, str(change_category), str(change_type)))
                    cascade_args['change_action'] = change_type
                    for entry in self._leech_driver.get_creep_changes(
                            **cascade_args):
                        logging.debug(
                            'beginning the extraction for id_value: %s, category: %s, action: %s, entry: %s'
                            % (id_value, str(change_category),
                               str(change_type), str(entry)))
                        self._populate_common_fields(entry)
                        self._check_enriched_data(id_value, change_category,
                                                  change_type)
                        try:
                            fruit = self._fruit(
                                entry['remote_change'],
                                id_value=id_value,
                                change_category=change_category,
                                change_type=change_type,
                                context=self._context)
                            self._store_fruit(change_category, change_type,
                                              fruit)
                            logging.debug(
                                'completed the extraction for id_value: %s, '
                                'category: %s, action: %s, entry: %s' %
                                (id_value, str(change_category),
                                 str(change_type), str(entry)))
                            self._set_fruit(fruit, entry['identifier_stem'])
                        except InsufficientOperationTimeException:
                            self._transform_queue.push_orders()
                            return False
                    logging.info(
                        'completed the extraction for id_value: %s, category: %s, action: %s'
                        % (id_value, str(change_category), str(change_type)))
                logging.info(
                    'completed the extraction for id_value: %s, category: %s' %
                    (id_value, str(change_category)))
            logging.info('completed the extraction for id_value: %s' %
                         id_value)
        logging.info(
            'completed a vertex driven extraction for propagation_id: %s' %
            self._propagation_id)
        return True

    @metered
    def _fruit(self, remote_change, **kwargs):
        change_log_data = self._generate_change_log(remote_change, **kwargs)
        return change_log_data

    def _generate_change_log(self, remote_change, **kwargs):
        change_category = kwargs['change_category']
        change_type = kwargs['change_type']
        change_date_utc = remote_change['UTCDate']
        extracted_data = self._build_change_log_extracted_data(
            remote_change, self._mapping)
        id_source = self._driving_identifier_stem.get('id_source')
        source_data = {
            'change_date_utc': extracted_data['change_date_utc'],
            'change_description': extracted_data['change_description'],
            'change_date': extracted_data['change_date'],
            'action': extracted_data['action'],
            'action_id':
            change_category.get_action_id(extracted_data['action']),
            'id_source': id_source,
            'id_type': 'ChangeLog',
            'id_name': 'change_date_utc',
            'by_emp_id': self._enriched_data.get_by_emp_id(change_date_utc)
        }
        returned_data = {'source': source_data}
        changed_target = self._build_changed_targets(id_source, extracted_data,
                                                     change_type)
        if changed_target:
            returned_data['changed_target'] = changed_target
        change_target = self._enriched_data.get_change_target(change_date_utc)
        if change_target is not None:
            returned_data['change_target'] = change_target
        return returned_data

    def _set_fruit(self, change_object, creep_identifier_stem):
        id_value = change_object['source']['change_date_utc']
        identifier_stem = self._calculate_change_log_identifier_stem(
            change_object)
        mark_args = (id_value, creep_identifier_stem, identifier_stem,
                     change_object)
        is_working_already = self._mark_objects_as_working(*mark_args)
        if not is_working_already:
            order_args = (identifier_stem, id_value, change_object,
                          self._schema_entry)
            transform_order = TransformObjectOrder(*order_args)
            self._transform_queue.add_order(transform_order)

    def _mark_objects_as_working(self, id_value, creep_identifier_stem,
                                 identifier_stem, extracted_data):
        put_args = (self._propagation_id, creep_identifier_stem,
                    extracted_data)
        put_kwargs = {'identifier_stem': identifier_stem, 'id_value': id_value}
        return self._leech_driver.mark_fruited_vertex(*put_args, **put_kwargs)

    @classmethod
    def _calculate_change_log_identifier_stem(cls, extracted_data):
        pairs = {
            'id_source': extracted_data['source']['id_source'],
            'id_type': extracted_data['source']['id_type'],
            'id_name': extracted_data['source']['id_name']
        }
        identifier_stem = IdentifierStem('vertex', 'ChangeLog', pairs)
        return identifier_stem

    def perform_enrichment(self, driving_id_value, change_category,
                           change_action, **kwargs):
        mule_team = CredibleMuleTeam(self._id_source)
        enrichment_args = {
            'driving_id_type': self._driving_identifier_stem.get('id_type'),
            'driving_id_name': self._driving_identifier_stem.get('id_name'),
            'driving_id_value': driving_id_value,
            'local_max_value': self._local_max_value,
            'category_id': change_category.category_id,
            'action_id': int(change_action.action_id),
            'get_details': kwargs['get_details'],
            'get_emp_ids': kwargs['get_emp_ids'],
            'checked_emp_ids': self._checked_emp_ids
        }
        enriched_data = mule_team.enrich_data(**enrichment_args)
        return enriched_data

    @classmethod
    def _split_record_id(cls, field_value, **kwargs):
        record_id, record_type = cls._split_entry(field_value)
        return {'record_id': record_id, 'record_type': record_type}

    @classmethod
    def _split_entry(cls, field_value):
        non_numeric_inside = re.compile(
            '(?P<outside>[\w\s]+?)\s*\((?P<inside>(?=[a-zA-Z\s])[\w\s\d]+)\)')
        numeric_inside = re.compile(
            '(?P<outside>[\w\s]+?)\s*\((?P<inside>[\d]+)\)')
        no_parenthesis_number = re.compile('^((?![()])\d)*$')
        has_numeric_inside = numeric_inside.search(field_value)
        has_non_numeric_inside = non_numeric_inside.search(field_value)
        is_just_number = no_parenthesis_number.search(field_value) is not None
        if has_numeric_inside:
            id_type = has_numeric_inside.group('outside')
            id_value = has_numeric_inside.group('inside')
            return Decimal(id_value), id_type
        if has_non_numeric_inside:
            id_value = has_non_numeric_inside.group('outside')
            id_type = has_non_numeric_inside.group('inside')
            try:
                return Decimal(id_value), id_type
            except InvalidOperation:
                return id_type, id_value
        if is_just_number:
            return Decimal(field_value), None
        return field_value, None

    @classmethod
    def _convert_datetime_utc(cls, field_value):
        from toll_booth.alg_obj.utils import convert_credible_fe_datetime_to_python
        return convert_credible_fe_datetime_to_python(field_value, True)

    @classmethod
    def _convert_datetime(cls, field_value):
        from toll_booth.alg_obj.utils import convert_credible_fe_datetime_to_python
        return convert_credible_fe_datetime_to_python(field_value, False)

    def _generate_mapping(self):
        for extractor in self._schema_entry.extract.values():
            extraction_properties = extractor.extraction_properties
            mapping = extraction_properties['mapping']
            id_source_mapping = mapping.get(self._id_source,
                                            mapping['default'])
            object_mapping = id_source_mapping[
                self._driving_identifier_stem.get('id_type')]
            return object_mapping

    def _populate_common_fields(self, entry):
        if not self._extracted_identifier_stem:
            self._extracted_identifier_stem = IdentifierStem.from_raw(
                entry['extracted_identifier_stem'])
        if not self._driving_identifier_stem:
            self._driving_identifier_stem = IdentifierStem.from_raw(
                entry['driving_identifier_stem'])
        if not self._schema_entry:
            self._schema_entry = SchemaVertexEntry.retrieve(
                self._extracted_identifier_stem.object_type)
        if not self._mapping:
            self._mapping = self._generate_mapping()
        if not self._local_max_value:
            self._local_max_value = entry['local_max_value']
        return

    def _check_enriched_data(self, id_value, change_category, change_type):
        if self._enriched_data is None:
            enriched_data = EnrichedData(self, id_value, change_category,
                                         change_type)
            enriched_data.update_enriched_data(id_value, change_category,
                                               change_type)
            self._enriched_data = enriched_data
        if not self._enriched_data.is_current(id_value, change_category,
                                              change_type):
            self._enriched_data.update_enriched_data(id_value, change_category,
                                                     change_type)

    def _store_fruit(self, change_category, change_type, fruit):
        if str(change_category) not in self._results:
            self._results[str(change_category)] = {}
        if str(change_type) not in self._results[str(change_category)]:
            self._results[str(change_category)][str(change_type)] = []
        self._results[str(change_category)][str(change_type)].append(fruit)

    @classmethod
    def _build_changed_targets(cls, id_source, extracted_data, change_type):
        changed_target = []
        client_id = extracted_data.get('client_id', None)
        clientvisit_id = extracted_data.get('clientvisit_id', None)
        if client_id and client_id != 0:
            changed_target.append({
                'id_source': id_source,
                'id_type': 'Clients',
                'id_name': 'client_id',
                'id_value': Decimal(client_id)
            })
        if clientvisit_id and clientvisit_id != '0':
            changed_target.append({
                'id_source': id_source,
                'id_type': 'ClientVisit',
                'id_name': 'clientvisit_id',
                'id_value': Decimal(clientvisit_id)
            })
        if extracted_data.get('record', None):
            record = extracted_data.get('record')
            id_type = record['record_type']
            if id_type is None:
                id_type = change_type.id_type
            if id_type not in ['ClientVisit', 'Clients', 'unspecified']:
                id_name = change_type.id_name
                changed_target.append({
                    'id_source': id_source,
                    'id_type': id_type,
                    'id_name': id_name,
                    'id_value': Decimal(record['record_id'])
                })
            if id_type == 'unspecified':
                static_values = change_type.change_target
                if static_values == 'dynamic':
                    raise RuntimeError(
                        'could not determine the id_type and id_name for: %s' %
                        extracted_data)
                static_values['id_value'] = Decimal(record['record_id'])
                static_values['id_source'] = id_source
                changed_target.append(static_values)
        return changed_target

    def _build_change_log_extracted_data(self, remote_change, mapping):
        extracted_data = {}
        for field_name, field_value in remote_change.items():
            if field_name in mapping:
                row_mapping = mapping[field_name]
                field_name = row_mapping['name']
                mutation = row_mapping['mutation']
                if mutation and field_value:
                    field_value = getattr(self, '_' + mutation)(field_value)
                extracted_data[field_name] = field_value
        return extracted_data
コード例 #14
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
class Spore:
    def __init__(self, identifier_stem, driving_identifier_stem, **kwargs):
        identifier_stem = IdentifierStem.from_raw(identifier_stem)
        driving_identifier_stem = IdentifierStem.from_raw(
            driving_identifier_stem)
        self._spore_id = uuid.uuid4().hex
        self._identifier_stem = identifier_stem
        self._driving_identifier_stem = driving_identifier_stem
        self._leech_driver = LeechDriver(table_name='VdGraphObjects')
        self._extractor_setup = self._leech_driver.get_extractor_setup(
            driving_identifier_stem)
        self._schema_entry = SchemaVertexEntry.retrieve(
            driving_identifier_stem.object_type)
        self._sample_size = kwargs.get('sample_size', 1000)
        self._extraction_profile = self._generate_extraction_profile()
        self._driving_vertex_regulator = VertexRegulator.get_for_object_type(
            driving_identifier_stem.object_type)

    def propagate(self):
        remote_id_values = self._perform_remote_monitor_extraction()
        local_id_values = self._perform_local_monitor_extraction()
        self._manage_driven_ids(remote_id_values, local_id_values)
        self._mark_propagation(remote_id_values)
        return {
            'propagation_id': self._spore_id,
            'id_source': self._driving_identifier_stem.get('id_source')
        }

    def _perform_remote_monitor_extraction(self):
        step_args = self._extractor_setup.copy()
        step_args.update(self._driving_identifier_stem.for_extractor)
        step_args.update(self._schema_entry.extract[
            self._extractor_setup['type']].extraction_properties)
        manager_args = (self._extractor_setup['monitor_extraction'], step_args)
        remote_id_values = StageManager.run_monitoring_extraction(
            *manager_args)
        return set(remote_id_values)

    def _perform_local_monitor_extraction(self):
        identifier_stem = self._driving_identifier_stem
        vertex_regulator = self._driving_vertex_regulator
        local_id_values = self._leech_driver.get_local_id_values(
            identifier_stem, vertex_regulator=vertex_regulator)
        return local_id_values

    def _generate_extraction_profile(self):
        extraction_properties = self._driving_identifier_stem.for_extractor
        schema_extraction_properties = self._schema_entry.extract[
            self._extractor_setup['type']]
        extraction_properties.update(
            schema_extraction_properties.extraction_properties)
        return extraction_properties

    def _mark_propagation(self, remote_id_values, **kwargs):
        change_types = ChangeTypes.get(leech_driver=self._leech_driver)
        kwargs['change_types'] = change_types
        self._leech_driver.mark_propagated_vertexes(
            self._spore_id, self._identifier_stem,
            self._driving_identifier_stem, remote_id_values, **kwargs)

    def _manage_driven_ids(self, remote_id_values, local_id_values):
        local_linked_values = local_id_values['linked']
        newly_linked_id_values = remote_id_values - local_linked_values
        unlinked_id_values = local_linked_values - remote_id_values
        new_id_values = remote_id_values - local_id_values['all']
        self._put_new_ids(new_id_values)
        self._unlink_old_ids(unlinked_id_values)
        self._link_new_ids(newly_linked_id_values)

    def _unlink_old_ids(self, id_values):
        for id_value in id_values:
            object_data = self._identifier_stem.for_extractor
            object_data['id_value'] = id_value
            potential_vertex = self._driving_vertex_regulator.create_potential_vertex(
                object_data)
            try:
                self._leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    self._identifier_stem.get('id_source'),
                    True,
                    identifier_stem=self._identifier_stem,
                    id_value=id_value)
            except ClientError as e:
                if e.response['Error'][
                        'Code'] != 'ConditionalCheckFailedException':
                    raise e

    def _link_new_ids(self, id_values):
        for id_value in id_values:
            object_data = self._identifier_stem.for_extractor
            object_data['id_value'] = id_value
            potential_vertex = self._driving_vertex_regulator.create_potential_vertex(
                object_data)
            try:
                self._leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    self._identifier_stem.get('id_source'),
                    False,
                    identifier_stem=self._identifier_stem,
                    id_value=id_value)
            except ClientError as e:
                if e.response['Error'][
                        'Code'] != 'ConditionalCheckFailedException':
                    raise e

    def _put_new_ids(self, id_values):
        for id_value in id_values:
            object_data = self._identifier_stem.for_extractor
            object_data['id_value'] = id_value
            potential_vertex = self._driving_vertex_regulator.create_potential_vertex(
                object_data)
            try:
                self._leech_driver.set_assimilated_vertex(
                    potential_vertex,
                    False,
                    identifier_stem=self._identifier_stem,
                    id_value=id_value)
            except ClientError as e:
                if e.response['Error'][
                        'Code'] != 'ConditionalCheckFailedException':
                    raise e
コード例 #15
0
ファイル: fungi.py プロジェクト: AlgernonSolutions/leech
class Mycelium:
    def __init__(self, propagation_id, id_source, **kwargs):
        self._propagation_id = propagation_id
        self._id_source = id_source
        self._leech_driver = LeechDriver(table_name='VdGraphObjects')
        self._driving_identifier_stem = None
        self._extracted_identifier_stem = None
        self._propagation_identifier_stem = None
        self._context = kwargs['context']
        self._change_types = ChangeTypes.get(leech_driver=self._leech_driver)

    def creep(self):
        with CredibleFrontEndDriver(self._id_source) as driver:
            for entry in self._leech_driver.get_propagated_vertexes(
                    self._propagation_id):
                if not self._driving_identifier_stem:
                    self._driving_identifier_stem = IdentifierStem.from_raw(
                        entry['driving_identifier_stem'])
                if not self._extracted_identifier_stem:
                    self._extracted_identifier_stem = IdentifierStem.from_raw(
                        entry['extracted_identifier_stem'])
                if not self._propagation_identifier_stem:
                    self._propagation_identifier_stem = IdentifierStem.from_raw(
                        entry['identifier_stem'])
                try:
                    self._creep(
                        entry,
                        identifier_stem=self._extracted_identifier_stem,
                        driving_identifier_stem=self._driving_identifier_stem,
                        context=self._context,
                        driver=driver)
                except InsufficientOperationTimeException:
                    return False
        return {
            'propagation_id': self._propagation_id,
            'id_source': self._id_source
        }

    @metered
    def _creep(self, entry, **kwargs):
        driving_identifier_stem = kwargs['driving_identifier_stem']
        driver = kwargs['driver']
        identifier_stem = IdentifierStem.from_raw(entry['identifier_stem'])
        id_value = entry['driving_id_value']
        category = identifier_stem.get('category')
        change_category = self._change_types.get_category_by_name(category)
        logging.info(
            f'started the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
        )
        local_max_value = self._get_local_max_value(id_value, change_category)
        extraction_args = {
            'driving_id_type': driving_identifier_stem.get('id_type'),
            'driving_id_name': driving_identifier_stem.get('id_name'),
            'driving_id_value': id_value,
            'local_max_value': local_max_value,
            'category_id': change_category.category_id,
            'driving_identifier_stem': driving_identifier_stem,
            'identifier_stem': identifier_stem,
            'category': change_category
        }
        remote_changes = driver.get_change_logs(**extraction_args)
        logging.info(
            f'completed the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
        )
        self._mark_creep_vertexes(remote_changes, **extraction_args)

    def _get_local_max_value(self, driving_id_value, change_type):
        id_source = self._driving_identifier_stem.get('id_source')
        id_type = self._driving_identifier_stem.get('id_type')
        change_stem = f'#{id_source}#{id_type}#{driving_id_value}#{change_type}'
        try:
            local_max_value = self._leech_driver.scan_index_value_max(
                change_stem)
        except EmptyIndexException:
            local_max_value = None
        return local_max_value

    def _mark_creep_vertexes(self, remote_changes, category, **kwargs):
        clerks = ClerkSwarm(self._leech_driver.table_name, 'mark_creep')
        driving_id_value = kwargs['driving_id_value']
        propagation_identifier_stem = kwargs['identifier_stem']
        for remote_change in remote_changes:
            action = remote_change['Action']
            action_id = category.get_action_id(action)
            change_date_utc = remote_change['UTCDate']
            record = remote_change['Record']
            pairs = OrderedDict()
            pairs['category'] = str(category)
            pairs['id_value'] = driving_id_value
            pairs['action'] = str(action)
            pairs['record'] = record
            pairs['done_by'] = remote_change['Done By']
            pairs['change_date_utc'] = str(change_date_utc.timestamp())
            pairs.update(self._driving_identifier_stem.paired_identifiers)
            pending_write = kwargs.copy()
            pending_write['identifier_stem'] = str(
                IdentifierStem('creep', 'ChangeLog', pairs))
            pending_write['sid_value'] = self._propagation_id
            pending_write['propagation_id'] = self._propagation_id
            pending_write['propagation_identifier_stem'] = str(
                propagation_identifier_stem)
            pending_write[
                'creep_identifier'] = f'#{str(driving_id_value)}#{str(category)}#{str(action)}#'
            pending_write['remote_change'] = json.dumps(remote_change,
                                                        cls=AlgEncoder)
            pending_write['driving_identifier_stem'] = str(
                self._driving_identifier_stem)
            pending_write['extracted_identifier_stem'] = str(
                self._extracted_identifier_stem)
            pending_write['category'] = str(category)
            pending_write['action'] = str(action)
            pending_write['action_id'] = action_id
            try:
                clerks.add_pending_write(pending_write)
            except RuntimeError:
                pass
        clerks.send()
        self._leech_driver.delete_propagated_vertex(
            self._propagation_id, self._propagation_identifier_stem)