def test_set_transform_results(self, test_transform_results, dynamo_test_environment):
     function_name = 'set_transform_results'
     dynamo_driver = LeechDriver(table_name=blank_table_name)
     test_source_vertex = test_transform_results[0]
     test_potentials = test_transform_results[1]
     test_id_value = test_source_vertex.id_value
     test_identifier_stem = test_source_vertex.identifier_stem
     test_internal_id = test_source_vertex.internal_id
     dynamo_driver.set_transform_results(
         test_source_vertex, test_potentials,
         identifier_stem=test_identifier_stem,
         id_value=test_id_value)
     disposition = 'working'
     if not test_potentials:
         disposition = 'graphing'
     test_args = (function_name, test_id_value, test_identifier_stem, dynamo_test_environment)
     test_kwargs = {
         'stage_name': 'transformation',
         'disposition': disposition,
         'internal_id': test_internal_id
     }
     self._assert_dynamo_call(*test_args, **test_kwargs)
     attribute_values = dynamo_test_environment.call_args_list[0][0][1]['ExpressionAttributeValues']
     self._assert_object_properties_creation(test_source_vertex.object_type, attribute_values[':v'])
     self._assert_potentials_creation(attribute_values[':ps'])
Esempio n. 2
0
 def __init__(self, metal_order, **kwargs):
     self._assimilate_order = metal_order
     self._source_vertex = metal_order.source_vertex
     self._potential_vertex = metal_order.potential_vertex
     self._rule_entry = metal_order.rule_entry
     self._extracted_data = metal_order.extracted_data
     self._dynamo_driver = LeechDriver()
Esempio n. 3
0
 def __init__(self, metal_order, **kwargs):
     self._extraction_order = metal_order
     self._extraction_function_name = metal_order.extraction_function_name
     self._extraction_properties = metal_order.extraction_properties
     self._schema_entry = metal_order.schema_entry
     self._dynamo_driver = LeechDriver()
     self._transform_queue = kwargs.get(
         'transform_queue', ForgeQueue.get_for_transform_queue(**kwargs))
 def test_mark_object_as_graphed(self, test_id, dynamo_test_environment):
     function_name = 'mark_object_as_graphed'
     test_identifier_stem = test_id[0]
     test_id_value = test_id[1]
     dynamo_driver = LeechDriver(table_name=blank_table_name)
     dynamo_driver.mark_object_as_graphed(identifier_stem=test_identifier_stem, id_value=test_id_value)
     self._assert_dynamo_call(function_name, test_id_value, test_identifier_stem, dynamo_test_environment,
                              stage_name='graphing')
 def __init__(self, metal_order, **kwargs):
     self._transform_order = metal_order
     self._assimilation_queue = kwargs.get(
         'assimilate_queue',
         ForgeQueue.get_for_assimilation_queue(**kwargs))
     self._extracted_data = metal_order.extracted_data
     self._schema_entry = metal_order.schema_entry
     self._source_vertex_data = metal_order.extracted_data['source']
     self._dynamo_driver = LeechDriver()
Esempio n. 6
0
 def __init__(self, **kwargs):
     self._scanner = DynamoScanner(kwargs.get('index_name', 'stalled'))
     self._driver = LeechDriver()
     self._load_graph_orders = []
     self._graph_counter = 0
     self._load_queue_url = os.getenv(
         'LOAD_URL',
         'https://sqs.us-east-1.amazonaws.com/803040539655/load')
     self._extract_queue_url = os.getenv(
         'EXTRACT_URL',
         'https://sqs.us-east-1.amazonaws.com/803040539655/extract')
     self._load_queue = boto3.resource('sqs').Queue(self._load_queue_url)
     self._extraction_queue = ForgeQueue.get_for_extraction_queue()
 def test_set_stub_assimilated_vertex(self, stub_potential_vertex, dynamo_test_environment):
     function_name = 'set_assimilated_vertex'
     dynamo_driver = LeechDriver(table_name=blank_table_name)
     results = dynamo_driver.set_assimilated_vertex(stub_potential_vertex, True)
     internal_id = None
     id_value = None
     identifier_stem = IdentifierStem.for_stub(stub_potential_vertex)
     if stub_potential_vertex.is_internal_id_set:
         internal_id = stub_potential_vertex.internal_id
     if stub_potential_vertex.is_id_value_set:
         id_value = stub_potential_vertex.id_value
     self._assert_dynamo_call(
         function_name, id_value, identifier_stem, dynamo_test_environment, stage_name='assimilation',
         internal_id=internal_id, id_value=id_value, object_type=stub_potential_vertex.object_type
     )
 def set_assimilation_result(self, test_assimilation_results, dynamo_test_environment, counter):
     function_name = 'set_assimilation_results'
     dynamo_driver = LeechDriver(table_name=blank_table_name)
     test_assimilation_result = test_assimilation_results[2]
     test_edge_type = test_assimilation_results[1]
     test_source_vertex = test_assimilation_results[0]
     test_identifier_stem = test_source_vertex.identifier_stem
     test_id_value = test_source_vertex.id_value
     dynamo_driver.set_assimilation_results(
         test_edge_type, test_assimilation_result,
         identifier_stem=test_identifier_stem,
         id_value=test_id_value
     )
     self._assert_dynamo_call(function_name, test_id_value, test_identifier_stem, dynamo_test_environment,
                              stage_name='assimilation', edge_type=test_edge_type, counter=counter)
     identified_vertexes = dynamo_test_environment.call_args[0][1]['ExpressionAttributeValues'][':iv']
     self._assert_identified_vertexes_creation(identified_vertexes, test_assimilation_results)
Esempio n. 9
0
def load(*args, **kwargs):
    logging.info('starting a load task with args/kwargs: %s/%s' % (args, kwargs))
    task_args = kwargs['task_args']
    dynamo_driver = LeechDriver(**task_args)
    key_fields = task_args['keys']
    keys = {
        'identifier_stem': key_fields['identifier_stem'],
        'id_value': key_fields['sid_value']
    }
    potential_object = dynamo_driver.get_object(**keys)
    ogm = Ogm(**task_args)
    graph_results = ogm.graph_object(potential_object)
    try:
        dynamo_driver.mark_object_as_graphed(
            identifier_stem=potential_object['source'].identifier_stem,
            id_value=potential_object['source'].id_value)
    except ClientError as e:
        if e.response['Error']['Code'] != 'ConditionalCheckFailedException':
            raise e
        logging.warning(
            'attempted to mark a vertex as graphing, '
            'but it appears this step has already happened, no changes to be made' % potential_object)
    return graph_results
Esempio n. 10
0
 def test_mark_ids_as_working(self, test_working_ids, dynamo_test_environment):
     function_name = 'mark_ids_as_working'
     dynamo_driver = LeechDriver(table_name=blank_table_name)
     test_id_range = test_working_ids[1]
     test_identifier_stem = test_working_ids[0]
     results = dynamo_driver.mark_ids_as_working(test_id_range, identifier_stem=test_identifier_stem)
     assert results == ([], list(test_id_range))
     assert dynamo_test_environment.called is True
     assert dynamo_test_environment.call_count == len(test_id_range)
     for boto_call in dynamo_test_environment.call_args_list:
         dynamo_commands = boto_call[0]
         dynamo_args = dynamo_commands[0]
         dynamo_kwargs = dynamo_commands[1]
         assert dynamo_args == 'UpdateItem'
         assert dynamo_kwargs['Key']['identifier_stem'] == str(test_identifier_stem)
         assert int(dynamo_kwargs['Key']['sid_value']) in test_id_range
         update_expression = dynamo_kwargs['UpdateExpression']
         update_names = dynamo_kwargs['ExpressionAttributeNames']
         update_values = dynamo_kwargs['ExpressionAttributeValues']
         self._assert_update_expression_creation(function_name, update_expression)
         self._assert_attribute_names_creation(function_name, update_names)
         self._assert_attribute_values_creation(function_name, update_values, id_value_range=test_id_range,
                                                object_type=test_identifier_stem.object_type,
                                                stage_name='assimilation')