def test_format_item_expand_history(self): formatter = GraphQLResultFormatter(expand_history=True) formatter._expand_history = MagicMock(return_value=iter(['a', 'b'])) result = list(formatter.format_item('item')) self.assertEqual(['a', 'b'], result) formatter._expand_history.assert_called_with('item')
def test_format_item_no_special_formatting(self): formatter = GraphQLResultFormatter(expand_history=False) formatter._flatten_edge = MagicMock() result = list(formatter.format_item('item')) formatter._flatten_edge.assert_called_with('item') self.assertEqual([formatter._flatten_edge.return_value], result)
def test_format_item_with_unfold(self): formatter = GraphQLResultFormatter(unfold=True) formatter._box_item = MagicMock() formatter._box_item.return_value = ['a', 'b', 'c'] formatter._flatten_edge = lambda x: 'flattened_' + x result = list(formatter.format_item('item')) self.assertEqual(['flattened_a', 'flattened_b', 'flattened_c'], result)
def test_format_item_with_sorter(self): formatter = GraphQLResultFormatter(sort=True) formatter._box_item = MagicMock(return_value=['a', 'b', 'c']) formatter.sorter = MagicMock() formatter.sorter.sort_items = lambda x: x[1] formatter._flatten_edge = lambda x: 'flattened_' + x result = next(formatter.format_item('item')) self.assertEqual('flattened_b', result)
class GraphQLStreaming: def __init__(self, host, query, unfold=False, sort=None, row_formatter=None, cross_relations=False, batch_size=None, secure_user=None): self.host = host self.query = query self.secure_user = secure_user self.url = self.host + (STREAMING_GRAPHQL_SECURE_ENDPOINT if self.secure_user else STREAMING_GRAPHQL_PUBLIC_ENDPOINT) self.batch_size = batch_size self.formatter = GraphQLResultFormatter( sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations) self.current_page = None def _execute_query(self, query): yield from post_stream(self.url, {'query': query}, secure_user=self.secure_user) def _query_all(self): """Query on the input query as is. Don't add pagination. :return: """ for item in self._execute_query(self.query): # Formatter may return multiple rows for one item, for example when 'unfold' is set to True. Hence the # double yield from (the first yield from being in _execute_query) yield from self.formatter.format_item(json_loads(item)) def _add_pagination_to_query(self, query: str, after: str, batch_size: int): existing_arguments_pattern = re.compile(r'^(\s*{\s*)(\w+)\s*\((.*)\)') existing_arguments = existing_arguments_pattern.search(query) if existing_arguments is not None: # Transform arguments to dictionary arguments = { k.strip(): v.strip() for k, v in [arg.split(':') for arg in existing_arguments[3].split(',')] } # Set first and after, overwrite if already exist arguments['first'] = batch_size if after is not None: arguments['after'] = after args_string = ", ".join( [f'{k}: {v}' for k, v in arguments.items()]) paginated_query = existing_arguments_pattern.sub( f'\\g<1>\\g<2>({args_string})', query, count=1) else: after_str = f", after: {after}" if after is not None else "" # Query does not have any arguments. Add arguments after first word paginated_query = re.sub(r'(\w+)', f'\\g<0>(first: {batch_size}{after_str})', query, count=1) # Add cursor to query if not yet exists on root level. if not re.search( r'^\s*{\s*\w+\s*\(?[^\n]*\)?\s*{\s*edges\s*{\s*node\s*{[^{]*cursor', paginated_query): return re.sub(r'(node\s*{)(\s*)(\w*)', '\\g<1>\\g<2>cursor\\g<2>\\g<3>', paginated_query, count=1) return paginated_query def _query_page(self, after: str): page_query = self._add_pagination_to_query(self.query, after, self.batch_size) yield from self._execute_query(page_query) def _query_paginated(self): last_item = None while True: items = self._query_page(last_item) result_cnt = 0 for item in items: result_cnt += 1 for formatted_item in self.formatter.format_item( json_loads(item)): last_item = formatted_item['cursor'] yield formatted_item if result_cnt == 0: break def __iter__(self): if self.batch_size is None: yield from self._query_all() else: yield from self._query_paginated()
def test_format_item_with_row_formatter(self): row_formatter = lambda x: 'formatted_row(' + x + ')' formatter = GraphQLResultFormatter(row_formatter=row_formatter) formatter._flatten_edge = lambda x: 'flattened(' + x + ')' self.assertEqual(['flattened(formatted_row(a))'], list(formatter.format_item('a')))
class GraphQL: sorter = None def __init__(self, host, query, catalogue, collection, expand_history=False, sort=None, unfold=False, row_formatter=None, cross_relations=False, secure_user=None): """Constructor Lazy loading, Just register host and query and wait for the iterator to be called to load the data :param host: :param query: :param catalogue: :param collection: """ self.host = host self.secure_user = secure_user self.url = self.host + (GRAPHQL_SECURE_ENDPOINT if self.secure_user else GRAPHQL_PUBLIC_ENDPOINT) self.catalogue = catalogue self.collection = collection self.schema_collection_name = f'{self.catalogue}{self.collection.title()}' self.end_cursor = "" self.query = self._update_query(query, NUM_RECORDS) self.has_next_page = True self.gob_model = GOBModel().get_collection(self.catalogue, self.collection) self.formatter = GraphQLResultFormatter(expand_history, sort=sort, unfold=unfold, row_formatter=row_formatter, cross_relations=cross_relations) def __repr__(self): """Representation Provide for a readable representation """ return f'GraphQL {self.schema_collection_name}' def __iter__(self): """Iteration method Reads pages and return enitities in each page until no pages left (next == None) Raises: AssertionError: if endpoint cannot be read :return: """ num_records = NUM_RECORDS while self.has_next_page: start = time.time() print(f"Request {num_records} rows...") response = requests.post(self.url, json={'query': self.query}, secure_user=self.secure_user) end = time.time() duration = round(end - start, 2) # Adjust number of records to get to the target duration correction = TARGET_DURATION / duration num_records = max(int(num_records * correction), 1) print(f"Request data end ({duration} secs), records set to {num_records}") assert response.ok, f"API Response not OK for query {self.query}" data = response.json() # Update the cursor and has_next_page self.end_cursor = data['data'][self.schema_collection_name]['pageInfo']['endCursor'] self.has_next_page = data['data'][self.schema_collection_name]['pageInfo']['hasNextPage'] if self.has_next_page: self.query = self._update_query(self.query, num_records) for edge in data['data'][self.schema_collection_name]['edges']: yield from self.formatter.format_item(edge) def _update_query(self, query, num_records): """Updates a graphql query for pagination Adds the first and after parameters and the pageInfo node :return: updated query """ # First check if the query has a filter filters = re.search(f'{self.schema_collection_name}\\s*\\((.+)?\\)', query) if filters: # check if the query has a filter on 'first' match = re.search('first:\\s?(([\\d]+)?)', query) if match: # adjust number of records to request query = query.replace(match[1], f"{num_records}", 1) # Try to find the after parameter, or else add it match = re.search('after:\\s?("([a-zA-Z\\d=]+)?")', query) if match: query = query.replace(match[1], f'"{self.end_cursor}"') else: append_string = f', after: "{self.end_cursor}")' if 'first' in filters[0] \ else f', first: {num_records}, after: "{self.end_cursor}")' filters_end = filters.span()[1] query = query[:filters_end-1] + append_string + query[filters_end:] else: # Add first and after parameter after the main collection query = query.replace(self.schema_collection_name, f'{self.schema_collection_name}(first: {num_records}, after: "{self.end_cursor}")') # Add pageInfo if it doesn't exist if not re.search('pageInfo', query): match = list(re.finditer('}', query)) """ Add pageInfo at the correct level of the query { collection { edges { node { } } pageInfo {} } } """ pageInfo = 'pageInfo { endCursor, hasNextPage }' query = query[:match[-2].span()[0]] + pageInfo + query[match[-2].span()[0]:] return query