def test_format_item_no_special_formatting(self): formatter = GraphQLResultFormatter(expand_history=False) formatter._flatten_edge = MagicMock() result = list(formatter.format_item('item')) formatter._flatten_edge.assert_called_with('item') self.assertEqual([formatter._flatten_edge.return_value], result)
def test_format_item_expand_history(self): formatter = GraphQLResultFormatter(expand_history=True) formatter._expand_history = MagicMock(return_value=iter(['a', 'b'])) result = list(formatter.format_item('item')) self.assertEqual(['a', 'b'], result) formatter._expand_history.assert_called_with('item')
def __init__(self, host, query, catalogue, collection, expand_history=False, sort=None, unfold=False, row_formatter=None, cross_relations=False, secure_user=None): """Constructor Lazy loading, Just register host and query and wait for the iterator to be called to load the data :param host: :param query: :param catalogue: :param collection: """ self.host = host self.secure_user = secure_user self.url = self.host + (GRAPHQL_SECURE_ENDPOINT if self.secure_user else GRAPHQL_PUBLIC_ENDPOINT) self.catalogue = catalogue self.collection = collection self.schema_collection_name = f'{self.catalogue}{self.collection.title()}' self.end_cursor = "" self.query = self._update_query(query, NUM_RECORDS) self.has_next_page = True self.gob_model = GOBModel().get_collection(self.catalogue, self.collection) self.formatter = GraphQLResultFormatter(expand_history, sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations)
def test_undouble(self): # Generate random lists of integers, plus add empty list testcases = [[random.randint(0, 10) for _ in range(random.randint(0, 50))] for _ in range(20)] + [[]] formatter = GraphQLResultFormatter() for testcase in testcases: self.assertEqual(sorted(list(set(testcase))), sorted(formatter._undouble(testcase)))
def test_format_item_with_unfold(self): formatter = GraphQLResultFormatter(unfold=True) formatter._box_item = MagicMock() formatter._box_item.return_value = ['a', 'b', 'c'] formatter._flatten_edge = lambda x: 'flattened_' + x result = list(formatter.format_item('item')) self.assertEqual(['flattened_a', 'flattened_b', 'flattened_c'], result)
def test_expand_history(self, mock_convert_to_history_rows): expected_result = [i for i in range(5)] mock_convert_to_history_rows.return_value = expected_result formatter = GraphQLResultFormatter() formatter._flatten_edge = MagicMock() result = list(formatter._expand_history('edge')) formatter._flatten_edge.assert_called_with('edge') mock_convert_to_history_rows.assert_called_with(formatter._flatten_edge.return_value) self.assertEqual(expected_result, result)
def test_set_value_for_all(self): formatter = GraphQLResultFormatter() lst = [{ 'item': 'A', 'value': 'someval', }, { 'item': 'B', 'value': 'some other val' }] formatter._set_value_for_all(lst, 'value', 'new value') self.assertEqual( [{ 'item': 'A', 'value': 'new value', }, { 'item': 'B', 'value': 'new value' }], lst )
def __init__(self, host, query, unfold=False, sort=None, row_formatter=None, cross_relations=False, batch_size=None, secure_user=None): self.host = host self.query = query self.secure_user = secure_user self.url = self.host + (STREAMING_GRAPHQL_SECURE_ENDPOINT if self.secure_user else STREAMING_GRAPHQL_PUBLIC_ENDPOINT) self.batch_size = batch_size self.formatter = GraphQLResultFormatter( sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations) self.current_page = None
def test_format_item_with_sorter(self): formatter = GraphQLResultFormatter(sort=True) formatter._box_item = MagicMock(return_value=['a', 'b', 'c']) formatter.sorter = MagicMock() formatter.sorter.sort_items = lambda x: x[1] formatter._flatten_edge = lambda x: 'flattened_' + x result = next(formatter.format_item('item')) self.assertEqual('flattened_b', result)
class GraphQL: sorter = None def __init__(self, host, query, catalogue, collection, expand_history=False, sort=None, unfold=False, row_formatter=None, cross_relations=False, secure_user=None): """Constructor Lazy loading, Just register host and query and wait for the iterator to be called to load the data :param host: :param query: :param catalogue: :param collection: """ self.host = host self.secure_user = secure_user self.url = self.host + (GRAPHQL_SECURE_ENDPOINT if self.secure_user else GRAPHQL_PUBLIC_ENDPOINT) self.catalogue = catalogue self.collection = collection self.schema_collection_name = f'{self.catalogue}{self.collection.title()}' self.end_cursor = "" self.query = self._update_query(query, NUM_RECORDS) self.has_next_page = True self.gob_model = GOBModel().get_collection(self.catalogue, self.collection) self.formatter = GraphQLResultFormatter(expand_history, sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations) def __repr__(self): """Representation Provide for a readable representation """ return f'GraphQL {self.schema_collection_name}' def __iter__(self): """Iteration method Reads pages and return enitities in each page until no pages left (next == None) Raises: AssertionError: if endpoint cannot be read :return: """ num_records = NUM_RECORDS while self.has_next_page: start = time.time() print(f"Request {num_records} rows...") response = requests.post(self.url, json={'query': self.query}, secure_user=self.secure_user) end = time.time() duration = round(end - start, 2) # Adjust number of records to get to the target duration correction = TARGET_DURATION / duration num_records = max(int(num_records * correction), 1) print(f"Request data end ({duration} secs), records set to {num_records}") assert response.ok, f"API Response not OK for query {self.query}" data = response.json() # Update the cursor and has_next_page self.end_cursor = data['data'][self.schema_collection_name]['pageInfo']['endCursor'] self.has_next_page = data['data'][self.schema_collection_name]['pageInfo']['hasNextPage'] if self.has_next_page: self.query = self._update_query(self.query, num_records) for edge in data['data'][self.schema_collection_name]['edges']: yield from self.formatter.format_item(edge) def _update_query(self, query, num_records): """Updates a graphql query for pagination Adds the first and after parameters and the pageInfo node :return: updated query """ # First check if the query has a filter filters = re.search(f'{self.schema_collection_name}\\s*\\((.+)?\\)', query) if filters: # check if the query has a filter on 'first' match = re.search('first:\\s?(([\\d]+)?)', query) if match: # adjust number of records to request query = query.replace(match[1], f"{num_records}", 1) # Try to find the after parameter, or else add it match = re.search('after:\\s?("([a-zA-Z\\d=]+)?")', query) if match: query = query.replace(match[1], f'"{self.end_cursor}"') else: append_string = f', after: "{self.end_cursor}")' if 'first' in filters[0] \ else f', first: {num_records}, after: "{self.end_cursor}")' filters_end = filters.span()[1] query = query[:filters_end-1] + append_string + query[filters_end:] else: # Add first and after parameter after the main collection query = query.replace(self.schema_collection_name, f'{self.schema_collection_name}(first: {num_records}, after: "{self.end_cursor}")') # Add pageInfo if it doesn't exist if not re.search('pageInfo', query): match = list(re.finditer('}', query)) """ Add pageInfo at the correct level of the query { collection { edges { node { } } pageInfo {} } } """ pageInfo = 'pageInfo { endCursor, hasNextPage }' query = query[:match[-2].span()[0]] + pageInfo + query[match[-2].span()[0]:] return query
class GraphQLStreaming: def __init__(self, host, query, unfold=False, sort=None, row_formatter=None, cross_relations=False, batch_size=None, secure_user=None): self.host = host self.query = query self.secure_user = secure_user self.url = self.host + (STREAMING_GRAPHQL_SECURE_ENDPOINT if self.secure_user else STREAMING_GRAPHQL_PUBLIC_ENDPOINT) self.batch_size = batch_size self.formatter = GraphQLResultFormatter( sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations) self.current_page = None def _execute_query(self, query): yield from post_stream(self.url, {'query': query}, secure_user=self.secure_user) def _query_all(self): """Query on the input query as is. Don't add pagination. :return: """ for item in self._execute_query(self.query): # Formatter may return multiple rows for one item, for example when 'unfold' is set to True. Hence the # double yield from (the first yield from being in _execute_query) yield from self.formatter.format_item(json_loads(item)) def _add_pagination_to_query(self, query: str, after: str, batch_size: int): existing_arguments_pattern = re.compile(r'^(\s*{\s*)(\w+)\s*\((.*)\)') existing_arguments = existing_arguments_pattern.search(query) if existing_arguments is not None: # Transform arguments to dictionary arguments = { k.strip(): v.strip() for k, v in [arg.split(':') for arg in existing_arguments[3].split(',')] } # Set first and after, overwrite if already exist arguments['first'] = batch_size if after is not None: arguments['after'] = after args_string = ", ".join( [f'{k}: {v}' for k, v in arguments.items()]) paginated_query = existing_arguments_pattern.sub( f'\\g<1>\\g<2>({args_string})', query, count=1) else: after_str = f", after: {after}" if after is not None else "" # Query does not have any arguments. Add arguments after first word paginated_query = re.sub(r'(\w+)', f'\\g<0>(first: {batch_size}{after_str})', query, count=1) # Add cursor to query if not yet exists on root level. if not re.search( r'^\s*{\s*\w+\s*\(?[^\n]*\)?\s*{\s*edges\s*{\s*node\s*{[^{]*cursor', paginated_query): return re.sub(r'(node\s*{)(\s*)(\w*)', '\\g<1>\\g<2>cursor\\g<2>\\g<3>', paginated_query, count=1) return paginated_query def _query_page(self, after: str): page_query = self._add_pagination_to_query(self.query, after, self.batch_size) yield from self._execute_query(page_query) def _query_paginated(self): last_item = None while True: items = self._query_page(last_item) result_cnt = 0 for item in items: result_cnt += 1 for formatted_item in self.formatter.format_item( json_loads(item)): last_item = formatted_item['cursor'] yield formatted_item if result_cnt == 0: break def __iter__(self): if self.batch_size is None: yield from self._query_all() else: yield from self._query_paginated()
def test_sort_item_nested(self): item = { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [{ 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'nestedreference': { 'edges': [ { 'node': { 'sort2key': 'C' } }, { 'node': { 'sort2key': 'A' } }, { 'node': { 'sort2key': 'B' } }, ] } }, }, { 'node': { 'rk11': 'rv1', 'rk22': 'rv2', 'sortkey': 'A', 'nestedreference': { 'edges': [ { 'node': { 'sort2key': 'F' } }, { 'node': { 'sort2key': 'D' } }, { 'node': { 'sort2key': 'E' } }, ] } }, }, { 'node': { 'rk111': 'rv1', 'rk222': 'rv2', 'sortkey': 'A', 'nestedreference': { 'edges': [ { 'node': { 'sort2key': 'H' } }, { 'node': { 'sort2key': 'I' } }, { 'node': { 'sort2key': 'G' } }, ] } }, }] } } } # Use formatter to box item; otherwise the argument would be very long formatter = GraphQLResultFormatter() items = formatter._box_item(item) expected_result = { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [{ 'node': { 'rk111': 'rv1', 'rk222': 'rv2', 'sortkey': 'A', 'nestedreference': { 'edges': [ { 'node': { 'sort2key': 'I' } }, ] } }, }] } } } sorters = { 'reference.sortkey': lambda x, y: x < y, 'reference.nestedreference.sort2key': lambda x, y: x > y, } sorter = GraphQlResultSorter(sorters) self.assertEqual(expected_result, sorter.sort_items(items))
def test_flatten_edge(self): formatter = GraphQLResultFormatter() edge = { 'node': { 'reference': { 'edges': [ { 'node': { 'value': 'value' } } ] } } } nested_edge = { "node": { "value": "value", "reference": { "edges": [ { "node": { "value": "value", "nested_reference": { "edges": [ { "node": { "nested_value": "value" } } ] } } } ] }, "empty_reference": { "edges": [ { "node": { "empty_nested_reference": { "edges": [] } } } ] } } } expected_result = {'reference': [{'value': 'value'}]} result = formatter._flatten_edge(edge) assert (expected_result == result) expected_result = { 'value': 'value', 'reference': [{ 'value': 'value', 'nested_reference': [{'nested_value': 'value'}] }], 'nested_reference': [{'nested_value': 'value'}], 'empty_reference': [{'empty_nested_reference': []}], 'empty_nested_reference': [], } result = formatter._flatten_edge(nested_edge) assert (expected_result == result)
def test_box_item(self): item = { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B' }, }, { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'C' }, }, { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'A' }, } ] } } } expected_result = [ { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B' } } ] } }, }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'C' } } ] } }, }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'A' } } ] } }, } ] formatter = GraphQLResultFormatter() result = formatter._box_item(item) self.assertEqual(expected_result, result)
def test_format_item_with_row_formatter(self): row_formatter = lambda x: 'formatted_row(' + x + ')' formatter = GraphQLResultFormatter(row_formatter=row_formatter) formatter._flatten_edge = lambda x: 'flattened(' + x + ')' self.assertEqual(['flattened(formatted_row(a))'], list(formatter.format_item('a')))
def test_box_item_nested_references_same_level_cross_relations(self): item = { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrB', } }, { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrA', } } ] }, 'reference2': { 'edges': [ { 'node': { 'r2k1': 'r2v1' } }, { 'node': { 'r2k2': 'r2v2', } } ] } }, }, ] } } } expected_result = [ { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrB', } }, ] }, 'reference2': { 'edges': [ { 'node': { 'r2k1': 'r2v1' } }, ] } }, }, ] } } }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrA', } } ] }, 'reference2': { 'edges': [ { 'node': { 'r2k1': 'r2v1' } }, ] } }, }, ] } } }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrB', } }, ] }, 'reference2': { 'edges': [ { 'node': { 'r2k2': 'r2v2', } } ] } }, }, ] } } }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrA', } } ] }, 'reference2': { 'edges': [ { 'node': { 'r2k2': 'r2v2', } } ] } }, }, ] } } }, ] formatter = GraphQLResultFormatter(cross_relations=True) formatter._undouble = MagicMock(side_effect=lambda x: x) result = formatter._box_item(item) self.assertEqual(expected_result, result)
def test_box_item_specific(self): item = { 'node': { 'identificatie': '0363200000403263', 'huisnummer': 17, 'huisletter': None, 'huisnummertoevoeging': None, 'postcode': '1015NR', 'ligtAanOpenbareruimte': { 'edges': [ { 'node': { 'naam': 'Eerste Anjeliersdwarsstraat' } } ] }, 'ligtInWoonplaats': { 'edges': [ { 'node': { 'naam': 'Amsterdam' } } ] } } } expected_result = [{ 'node': { 'identificatie': '0363200000403263', 'huisnummer': 17, 'huisletter': None, 'huisnummertoevoeging': None, 'postcode': '1015NR', 'ligtAanOpenbareruimte': { 'edges': [ { 'node': { 'naam': 'Eerste Anjeliersdwarsstraat' } } ] }, 'ligtInWoonplaats': { 'edges': [ { 'node': { 'naam': 'Amsterdam' } } ] } } }] formatter = GraphQLResultFormatter() formatter._undouble = MagicMock(side_effect=lambda x: x) result = formatter._box_item(item) self.assertEqual(len(expected_result), len(result)) self.assertEqual(expected_result, result)
def test_box_item_nested_references(self): item = { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrB', } }, { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrA', } } ] } }, }, { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'C' }, }, { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'A' }, } ] } } } expected_result = [ { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrB', } }, ] } } } ] } }, }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'B', 'reference': { 'edges': [ { 'node': { 'rrk1': 'rrv1', 'rrk2': 'rrv2', 'sortkey': 'rrA', } }, ] } } } ] } }, }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'C' } } ] } }, }, { 'node': { 'k1': 'v1', 'k2': 'v2', 'reference': { 'edges': [ { 'node': { 'rk1': 'rv1', 'rk2': 'rv2', 'sortkey': 'A' } } ] } }, } ] formatter = GraphQLResultFormatter() formatter._undouble = MagicMock(side_effect=lambda x: x) result = formatter._box_item(item) self.assertEqual(expected_result, result)