예제 #1
0
 def creep(self):
     with CredibleFrontEndDriver(self._id_source) as driver:
         for entry in self._leech_driver.get_propagated_vertexes(
                 self._propagation_id):
             if not self._driving_identifier_stem:
                 self._driving_identifier_stem = IdentifierStem.from_raw(
                     entry['driving_identifier_stem'])
             if not self._extracted_identifier_stem:
                 self._extracted_identifier_stem = IdentifierStem.from_raw(
                     entry['extracted_identifier_stem'])
             if not self._propagation_identifier_stem:
                 self._propagation_identifier_stem = IdentifierStem.from_raw(
                     entry['identifier_stem'])
             try:
                 self._creep(
                     entry,
                     identifier_stem=self._extracted_identifier_stem,
                     driving_identifier_stem=self._driving_identifier_stem,
                     context=self._context,
                     driver=driver)
             except InsufficientOperationTimeException:
                 return False
     return {
         'propagation_id': self._propagation_id,
         'id_source': self._id_source
     }
예제 #2
0
def work_remote_id_change_type(**kwargs):
    from toll_booth.alg_obj.forge.extractors.credible_fe import CredibleFrontEndDriver
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    identifier_stem = IdentifierStem.from_raw(kwargs['identifier_stem'])
    changelog_types = kwargs['changelog_types']
    change_category = changelog_types.categories[kwargs['category_id']]
    with CredibleFrontEndDriver(
            driving_identifier_stem.get('id_source')) as driver:
        extraction_args = {
            'driving_id_type': driving_identifier_stem.get('id_type'),
            'driving_id_name': driving_identifier_stem.get('id_name'),
            'driving_id_value': kwargs['id_value'],
            'local_max_value': kwargs['local_max_value'],
            'category_id': change_category.category_id,
            'driving_identifier_stem': driving_identifier_stem,
            'identifier_stem': identifier_stem,
            'category': change_category
        }
        remote_changes = driver.get_change_logs(**extraction_args)
        sorted_changes = {}
        for remote_change in remote_changes:
            change_action = remote_change['Action']
            if change_action not in sorted_changes:
                sorted_changes[change_action] = []
            sorted_changes[change_action].append(remote_change)
        return {'change_actions': sorted_changes}
예제 #3
0
 def _populate_common_fields(self, entry):
     if not self._extracted_identifier_stem:
         self._extracted_identifier_stem = IdentifierStem.from_raw(
             entry['extracted_identifier_stem'])
     if not self._driving_identifier_stem:
         self._driving_identifier_stem = IdentifierStem.from_raw(
             entry['driving_identifier_stem'])
     if not self._schema_entry:
         self._schema_entry = SchemaVertexEntry.retrieve(
             self._extracted_identifier_stem.object_type)
     if not self._mapping:
         self._mapping = self._generate_mapping()
     if not self._local_max_value:
         self._local_max_value = entry['local_max_value']
     return
def correct():
    corrected = []
    admin_file_name = os.path.dirname(__file__)
    admin_directory = os.path.dirname(admin_file_name)
    schema_file_path = os.path.join(admin_directory, 'starters',
                                    'change_types.json')
    schema = {}
    with open(schema_file_path) as test:
        test_schema = json.load(test)
        for entry in test_schema:
            schema[entry['change_action']] = entry
    soup = bs4.BeautifulSoup(test_html, features='html.parser')
    options = soup.find_all('option')

    for option in options:
        change_id = option.attrs.get('value')
        change_action = option.text
        if not change_id or not change_action:
            continue
        action_id = int(change_id)
        schema_entry = schema.get(change_action)
        if not schema_entry:
            continue
        schema_entry['action_id'] = action_id
        identifier_stem = IdentifierStem.from_raw(
            schema_entry['identifier_stem'])
        pairs = identifier_stem.paired_identifiers
        pairs['action_id'] = action_id
        corrected_stem = IdentifierStem('vertex', 'ChangeLogType', pairs)
        sid_value = corrected_stem.for_dynamo
        schema_entry['identifier_stem'] = str(corrected_stem)
        schema_entry['sid_value'] = sid_value
        corrected.append(schema_entry)
    string_corrected = json.dumps(corrected)
    print(string_corrected)
예제 #5
0
 def _extract_change_logs(cls, driver, id_value, local_max_values,
                          **kwargs):
     change_logs = []
     identifier_stem = kwargs['identifier_stem']
     driving_stem = IdentifierStem.from_raw(
         identifier_stem.get('identifier_stem'))
     driving_id_type = driving_stem.get('id_type')
     driving_id_name = driving_stem.get('id_name')
     id_source = kwargs['id_source']
     mapping = kwargs['mapping']
     id_source_mapping = mapping.get(id_source, mapping['default'])
     object_mapping = id_source_mapping[driving_id_type]
     for category_id, local_max_value in local_max_values.items():
         extraction_args = {
             'driving_id_type': driving_id_type,
             'driving_id_name': driving_id_name,
             'driving_id_value': id_value,
             'category_id': category_id,
             'local_max_value': local_max_value
         }
         source_extraction = driver.get_change_logs(**extraction_args)
         # change_detail_extraction = driver.get_change_details(**extraction_args)
         emp_ids = driver.get_emp_ids(**extraction_args)
         # for change_date, entry in source_extraction.items():
         #   entry['User'] = emp_ids[change_date]
         formatted_extraction = cls._format_change_log_data(
             identifier_stem,
             source_extraction,
             object_mapping=object_mapping,
             driver=driver)
         change_logs.extend(formatted_extraction)
예제 #6
0
def graph_links(**kwargs):
    from toll_booth.alg_obj.graph.ogm.ogm import Ogm
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem
    from toll_booth.alg_obj.graph import InternalId
    from toll_booth.alg_obj.graph.ogm.regulators import PotentialVertex

    edges = []
    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')
    internal_id = InternalId(''.join(['IdSource', id_source])).id_value
    identifier_stem = IdentifierStem('vertex', 'IdSource',
                                     {'id_source': id_source})
    potential_vertex = PotentialVertex('IdSource', internal_id,
                                       {'id_source': id_source},
                                       identifier_stem, id_source, 'id_source')

    vertexes = [potential_vertex]

    link_histories = kwargs.get('new_link_histories', [])
    new_links = kwargs.get('new_links', [])
    new_unlinks = kwargs.get('new_unlinks', [])
    for entry in link_histories:
        vertexes.append(entry.potential_vertex)
        edges.append(entry.generate_edge(entry.most_recent_link))
    edges.extend(x[0].generate_edge(x[1]) for x in new_links)
    edges.extend(x[0].generate_edge(x[1]) for x in new_unlinks)
    ogm = Ogm(**kwargs)
    ogm.graph_objects(vertexes, edges)
예제 #7
0
def get_enrichment_for_change_action(**kwargs):
    from toll_booth.alg_obj.forge.extractors.credible_fe.mule_team import CredibleMuleTeam
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')
    changelog_types = kwargs['changelog_types']
    action_id = kwargs['action_id']
    change_action = changelog_types[str(action_id)]
    category_id = changelog_types.get_category_id_from_action_id(
        str(action_id))
    if change_action.is_static and change_action.has_details is False and not change_action.entity_type:
        empty_data = {'change_detail': {}, 'by_emp_ids': {}, 'entity_ids': {}}
        return {'enriched_data': empty_data}
    mule_team = CredibleMuleTeam(id_source)
    enrichment_args = {
        'driving_id_type': driving_identifier_stem.get('id_type'),
        'driving_id_name': driving_identifier_stem.get('id_name'),
        'driving_id_value': kwargs['id_value'],
        'local_max_value': kwargs['local_max_value'],
        'category_id': category_id,
        'action_id': int(action_id),
        'get_details': change_action.has_details is True,
        'get_by_emp_ids': change_action.is_static is False,
        'get_entity_ids': change_action.entity_type,
        'checked_emp_ids': None
    }
    enriched_data = mule_team.enrich_data(**enrichment_args)
    return {'enriched_data': enriched_data}
예제 #8
0
 def __init__(self, identifier_stem, driving_identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     driving_identifier_stem = IdentifierStem.from_raw(
         driving_identifier_stem)
     self._spore_id = uuid.uuid4().hex
     self._identifier_stem = identifier_stem
     self._driving_identifier_stem = driving_identifier_stem
     self._leech_driver = LeechDriver(table_name='VdGraphObjects')
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         driving_identifier_stem)
     self._schema_entry = SchemaVertexEntry.retrieve(
         driving_identifier_stem.object_type)
     self._sample_size = kwargs.get('sample_size', 1000)
     self._extraction_profile = self._generate_extraction_profile()
     self._driving_vertex_regulator = VertexRegulator.get_for_object_type(
         driving_identifier_stem.object_type)
예제 #9
0
 def _creep(self, entry, **kwargs):
     driving_identifier_stem = kwargs['driving_identifier_stem']
     driver = kwargs['driver']
     identifier_stem = IdentifierStem.from_raw(entry['identifier_stem'])
     id_value = entry['driving_id_value']
     category = identifier_stem.get('category')
     change_category = self._change_types.get_category_by_name(category)
     logging.info(
         f'started the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
     )
     local_max_value = self._get_local_max_value(id_value, change_category)
     extraction_args = {
         'driving_id_type': driving_identifier_stem.get('id_type'),
         'driving_id_name': driving_identifier_stem.get('id_name'),
         'driving_id_value': id_value,
         'local_max_value': local_max_value,
         'category_id': change_category.category_id,
         'driving_identifier_stem': driving_identifier_stem,
         'identifier_stem': identifier_stem,
         'category': change_category
     }
     remote_changes = driver.get_change_logs(**extraction_args)
     logging.info(
         f'completed the extraction for id_value: {id_value}, change_category_id: {change_category.category_id}'
     )
     self._mark_creep_vertexes(remote_changes, **extraction_args)
예제 #10
0
def pull_schema_entry(**kwargs):
    from toll_booth.alg_obj.graph.schemata.schema import Schema
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    identifier_stem = IdentifierStem.from_raw(kwargs['identifier_stem'])
    schema = Schema.retrieve(**kwargs)
    schema_entry = schema[identifier_stem.object_type]
    return {'schema_entry': schema_entry, 'schema': schema}
예제 #11
0
 def __init__(self, monitor_order, **kwargs):
     identifier_stem = IdentifierStem.from_raw(
         monitor_order.identifier_stem)
     self._identifier_stem = identifier_stem
     self._id_source = monitor_order.id_source
     self._leech_driver = LeechDriver(**kwargs)
     self._local_setup = self._leech_driver.get_field_value_setup(
         self._identifier_stem)
     self._sample_size = kwargs.get('sample_size', 1000)
예제 #12
0
def get_local_ids(**kwargs):
    from toll_booth.alg_obj.graph.index_manager.index_manager import IndexManager
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = kwargs['driving_identifier_stem']
    driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
    index_driver = IndexManager.from_graph_schema(kwargs['schema'], **kwargs)
    local_id_values = index_driver.get_local_id_values(driving_identifier_stem)
    return {'local_id_values': local_id_values}
예제 #13
0
 def __init__(self, identifier_stem, id_value, **kwargs):
     object_type = kwargs.get('object_type', None)
     if identifier_stem:
         identifier_stem = IdentifierStem.from_raw(identifier_stem)
         object_type = identifier_stem.object_type
     self._identifier_stem = identifier_stem
     self._id_value = id_value
     self._dynamo_parameters = DynamoParameters(identifier_stem, id_value)
     self._object_properties = kwargs.get('object_properties', {})
     self._object_type = object_type
예제 #14
0
 def test_monitor_extraction(self, specified_identifier_stem):
     schema_entry = SchemaVertexEntry.retrieve(specified_identifier_stem.object_type)
     driving_stem = IdentifierStem.from_raw(specified_identifier_stem.retrieve('identifier_stem'))
     extraction_profile = schema_entry.extract['CredibleFrontEndExtractor'].extraction_properties
     extraction_profile.update(driving_stem.for_extractor)
     extraction_profile.update({
         'identifier_stems': [{'identifier_stem': specified_identifier_stem, 'id_value': None}],
         'id_source': specified_identifier_stem.retrieve('id_source')
     })
     results = CredibleFrontEndExtractor.extract(**extraction_profile)
     print()
예제 #15
0
 def __init__(self, *, identifier_stem, **kwargs):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     self._identifier_stem = identifier_stem
     self._object_type = identifier_stem.object_type
     self._schema_entry = SchemaVertexEntry.retrieve(self._object_type)
     self._leech_driver = LeechDriver()
     self._extractor_setup = self._leech_driver.get_extractor_setup(
         identifier_stem)
     self._extraction_profile = self._generate_extraction_profile()
     self._extraction_queue = kwargs.get(
         'extraction_queue', ForgeQueue.get_for_extraction_queue(**kwargs))
     self._sample_size = kwargs.get('sample_size', 1000)
예제 #16
0
 def get_extractor_function_names(self, identifier_stem):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     params = DynamoParameters(identifier_stem.for_dynamo, identifier_stem)
     results = self._table.get_item(Key=params.as_key)
     try:
         extractor_function_names = results['Item'][
             'extractor_function_names']
         return extractor_function_names
     except KeyError:
         raise MissingExtractionInformation(
             'could not find extractor information for identifier stem %s' %
             identifier_stem)
예제 #17
0
def generate_remote_id_change_data(**kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    remote_change = kwargs['remote_change']
    changelog_types = kwargs['changelog_types']
    action_id = kwargs['action_id']
    change_action = changelog_types[str(action_id)]
    enriched_data = kwargs['enriched_data']
    change_date_utc = remote_change['UTCDate']
    extracted_data = _build_change_log_extracted_data(remote_change,
                                                      kwargs['mapping'])
    id_source = driving_identifier_stem.get('id_source')
    by_emp_id = enriched_data['emp_ids'].get(change_date_utc,
                                             kwargs['id_value'])
    fungal_stem = FungalStem.from_identifier_stem(driving_identifier_stem,
                                                  kwargs['id_value'],
                                                  change_action.category)
    source_data = {
        'change_date_utc': extracted_data['change_date_utc'],
        'change_description': extracted_data['change_description'],
        'change_date': extracted_data['change_date'],
        'fungal_stem': str(fungal_stem),
        'action': extracted_data['action'],
        'action_id': str(action_id),
        'id_source': id_source,
        'id_type': 'ChangeLog',
        'id_name': 'change_date_utc',
        'by_emp_id': by_emp_id
    }
    returned_data = {
        'source':
        source_data,
        'by_emp_id_target': [{
            'id_source': id_source,
            'id_type': 'Employees',
            'id_value': by_emp_id
        }],
        'change_target': [],
        'changed_target': []
    }
    changed_targets = _build_changed_targets(id_source, extracted_data,
                                             change_action)
    if changed_targets:
        returned_data['changed_target'].extend(changed_targets)
    change_details = enriched_data.get('change_detail', {})
    change_detail_target = change_details.get(change_date_utc, None)
    if change_detail_target is not None:
        returned_data['change_target'].extend(change_detail_target)
    return {'change_data': returned_data}
예제 #18
0
def fungus(execution_id, **kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    subtask_name = 'command_fungi'
    decisions = kwargs['decisions']
    subtask_identifier = f'f-{execution_id}'
    task_args = kwargs['task_args']
    identifier_stem = IdentifierStem.from_raw(
        "#vertex#ChangeLog#{\"id_source\": \"MBI\"}#")
    driving_identifier_stem = IdentifierStem.from_raw(
        "#vertex#ExternalId#{\"id_source\": \"MBI\", \"id_type\": \"Employees\", \"id_name\": \"emp_id\"}#"
    )
    task_args.add_argument_value(
        subtask_name, {
            'identifier_stem': identifier_stem,
            'driving_identifier_stem': driving_identifier_stem
        })
    fungal_signature = SubtaskSignature(subtask_identifier, subtask_name,
                                        **kwargs)
    results = fungal_signature(**kwargs)
    if not results:
        return
    decisions.append(CompleteWork())
예제 #19
0
def _build_remote_id_extractor(**kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = kwargs['driving_identifier_stem']
    schema = kwargs['schema']
    driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
    schema_entry = schema[driving_identifier_stem.object_type]
    extractor_setup = {
        'id_type': driving_identifier_stem.get('id_type'),
        'type': 'CredibleFrontEndExtractor'
    }
    extractor_setup.update(driving_identifier_stem.for_extractor)
    extractor_setup.update(
        schema_entry.extract[extractor_setup['type']].extraction_properties)
    return extractor_setup
예제 #20
0
 def _run_single_extract(cls, identifier, **kwargs):
     extracted_data = {}
     id_source = kwargs['id_source']
     id_value = identifier['id_value']
     identifier_stem = identifier['identifier_stem']
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     kwargs['identifier_stem'] = identifier_stem
     object_type = identifier_stem.object_type
     with CredibleFrontEndDriver(id_source) as driver:
         if object_type == 'ExternalId':
             source_extraction = driver.get_ext_id(identifier_stem)
         if object_type == 'ChangeLog':
             local_max_values = identifier['local_max_values']
             return cls._extract_change_logs(driver, id_value,
                                             local_max_values, **kwargs)
예제 #21
0
 def test_get_full_change_logs(self, monitored_object_identifier_stem):
     identifier_stem = monitored_object_identifier_stem[0]
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     id_value = monitored_object_identifier_stem[1]
     object_type = identifier_stem.object_type
     schema_entry = SchemaVertexEntry.retrieve(object_type)
     kwargs = {
         'identifier_stems': [{'identifier_stem': identifier_stem, 'id_value': id_value}],
         'id_source': identifier_stem.retrieve('id_source')
     }
     extraction_profile = schema_entry.extract['CredibleFrontEndExtractor']
     kwargs.update(extraction_profile.extraction_properties)
     kwargs.update(identifier_stem.for_extractor)
     results = CredibleFrontEndExtractor.extract(**kwargs)
     print(results)
예제 #22
0
def build_mapping(**kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')
    schema = kwargs['schema']
    schema_entry = schema[driving_identifier_stem.object_type]
    fungal_extractor = schema_entry.extract['CredibleFrontEndExtractor']
    extraction_properties = fungal_extractor.extraction_properties
    mapping = extraction_properties['mapping']
    id_source_mapping = mapping.get(id_source, mapping['default'])
    mapping = id_source_mapping[driving_identifier_stem.get('id_type')]

    return {'mapping': mapping}
예제 #23
0
def _set_changed_ids(change_type, **kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem
    from toll_booth.alg_obj.graph.ogm.regulators import VertexRegulator
    from toll_booth.alg_obj.aws.sapper.leech_driver import LeechDriver
    from botocore.exceptions import ClientError

    id_values = kwargs['id_values']
    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    driving_vertex_regulator = VertexRegulator.get_for_object_type(
        driving_identifier_stem.object_type)
    leech_driver = LeechDriver(
        table_name=kwargs.get('table_name', 'VdGraphObjects'))
    for id_value in id_values:
        object_data = driving_identifier_stem.for_extractor
        object_data['id_value'] = id_value
        potential_vertex = driving_vertex_regulator.create_potential_vertex(
            object_data)
        try:
            if change_type == 'new':
                leech_driver.set_assimilated_vertex(
                    potential_vertex,
                    False,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            if change_type == 'link':
                leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    driving_identifier_stem.get('id_source'),
                    False,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            if change_type == 'unlink':
                leech_driver.set_link_object(
                    potential_vertex.internal_id,
                    driving_identifier_stem.get('id_source'),
                    True,
                    identifier_stem=driving_identifier_stem,
                    id_value=id_value)
                continue
            raise NotImplementedError(
                'could not find operation to perform for changed_ids type: %s'
                % change_type)
        except ClientError as e:
            if e.response['Error']['Code'] != 'ConditionalCheckFailedException':
                raise e
예제 #24
0
def post_process_get_encounters(**kwargs):
    from toll_booth.alg_obj.forge.extractors.credible_fe import CredibleFrontEndDriver
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    encounter_id = kwargs['id_value']
    id_type = kwargs['id_type']
    if id_type != 'ClientVisit':
        raise NotImplementedError(
            f'cannot post process an encounter off id_type: {id_type}')
    driving_identifier_stem = IdentifierStem.from_raw(
        kwargs['driving_identifier_stem'])
    id_source = driving_identifier_stem.get('id_source')

    with CredibleFrontEndDriver(id_source) as driver:
        results = driver.retrieve_client_encounter(encounter_id)
        return {'encounter_results': results}
예제 #25
0
 def mark_fruited_vertex(self, propagation_id, creep_identifier_stem, extracted_data, leech_record):
     creep_identifier_stem = IdentifierStem.from_raw(creep_identifier_stem)
     try:
         self._table.update_item(**leech_record.for_vertex_driven_seed(extracted_data))
         working = False
     except ClientError as e:
         if e.response['Error']['Code'] != 'ConditionalCheckFailedException':
             raise e
         working = True
     self._table.delete_item(
         Key={
             'sid_value': str(propagation_id),
             'identifier_stem': str(creep_identifier_stem)
         }
     )
     return working
예제 #26
0
def extraction_order(request):
    from toll_booth.alg_obj.graph.schemata.schema_entry import SchemaEntry
    from toll_booth.alg_obj.forge.comms.orders import ExtractObjectOrder
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem

    params = request.param
    identifier_stem = IdentifierStem.from_raw(params[1])
    schema_entry = SchemaEntry.retrieve(params[0])
    extraction_properties = identifier_stem.for_extractor
    schema_extraction_properties = schema_entry.extract[params[3]]
    extraction_properties.update(
        schema_extraction_properties.extraction_properties)
    id_value = params[2]
    extractor_function_name = params[4]
    return ExtractObjectOrder(identifier_stem, id_value,
                              extractor_function_name, extraction_properties,
                              schema_entry)
예제 #27
0
 def get_field_value_setup(self, identifier_stem):
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     params = DynamoParameters(identifier_stem.for_dynamo, identifier_stem)
     results = self._table.get_item(
         Key=params.as_key
     )
     try:
         field_values = results['Item']['field_values']
     except KeyError:
         raise MissingFieldValuesException(
             'could not find field values for identifier stem %s' % identifier_stem)
     try:
         extractor_function_names = results['Item']['extractor_function_names']
     except KeyError:
         raise MissingExtractionInformation(
             'could not find extractor names for identifier stem %s' % identifier_stem
         )
     return {'field_values': field_values, 'extractor_names': extractor_function_names}
예제 #28
0
 def _extract(self, stalled_object):
     identifier_stem = stalled_object['identifier_stem']
     identifier_stem = IdentifierStem.from_raw(identifier_stem)
     extractor_names = self._driver.get_extractor_function_names(
         identifier_stem)
     schema_entry = SchemaVertexEntry.retrieve(
         stalled_object['object_type'])
     schema_extraction_properties = schema_entry.extract[
         extractor_names['type']]
     extraction_properties = identifier_stem.for_extractor
     extraction_properties.update(
         schema_extraction_properties.extraction_properties)
     extractor_name = extractor_names['extraction']
     extraction_order = ExtractObjectOrder(identifier_stem,
                                           stalled_object['id_value'],
                                           extractor_name,
                                           extraction_properties,
                                           schema_entry)
     self._extraction_queue.add_order(extraction_order)
예제 #29
0
def get_local_max_change_type_value(**kwargs):
    from toll_booth.alg_obj.graph.ogm.regulators import IdentifierStem
    from toll_booth.alg_obj.graph.index_manager.index_manager import IndexManager

    driving_identifier_stem = kwargs['driving_identifier_stem']
    id_value = kwargs['id_value']
    category_id = kwargs['category_id']
    changelog_types = kwargs['changelog_types']
    driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
    id_source = driving_identifier_stem.get('id_source')
    id_type = driving_identifier_stem.get('id_type')
    change_category = changelog_types.categories[category_id]
    change_stem = f'#{id_source}#{id_type}#{id_value}#{change_category}'
    index_manager = IndexManager.from_graph_schema(kwargs['schema'], **kwargs)
    try:
        local_max_value = index_manager.query_object_max(change_stem)
    except EmptyIndexException:
        local_max_value = None
    return {'local_max_value': local_max_value}
예제 #30
0
 def mark_propagated_vertexes(self, propagation_id, identifier_stem, driving_identifier_stem, driving_id_values, **kwargs):
     driving_identifier_stem = IdentifierStem.from_raw(driving_identifier_stem)
     driving_pairs = driving_identifier_stem.paired_identifiers
     change_types = kwargs['change_types']
     with self._table.batch_writer() as writer:
         for id_value in driving_id_values:
             for change_category in change_types.categories.values():
                 change_pairs = driving_pairs.copy()
                 change_pairs['id_value'] = id_value
                 change_pairs['category'] = str(change_category)
                 change_identifier_stem = IdentifierStem('propagation', identifier_stem.object_type, change_pairs)
                 change = {
                     'identifier_stem': str(change_identifier_stem),
                     'sid_value': str(propagation_id),
                     'propagation_id': str(propagation_id),
                     'driving_identifier_stem': str(driving_identifier_stem),
                     'extracted_identifier_stem': str(identifier_stem),
                     'driving_id_value': id_value
                 }
                 writer.put_item(Item=change)