예제 #1
0
    def test_format_item_expand_history(self):
        formatter = GraphQLResultFormatter(expand_history=True)
        formatter._expand_history = MagicMock(return_value=iter(['a', 'b']))

        result = list(formatter.format_item('item'))
        self.assertEqual(['a', 'b'], result)
        formatter._expand_history.assert_called_with('item')
예제 #2
0
    def test_format_item_no_special_formatting(self):
        formatter = GraphQLResultFormatter(expand_history=False)
        formatter._flatten_edge = MagicMock()

        result = list(formatter.format_item('item'))
        formatter._flatten_edge.assert_called_with('item')
        self.assertEqual([formatter._flatten_edge.return_value], result)
예제 #3
0
    def test_format_item_with_unfold(self):
        formatter = GraphQLResultFormatter(unfold=True)
        formatter._box_item = MagicMock()
        formatter._box_item.return_value = ['a', 'b', 'c']
        formatter._flatten_edge = lambda x: 'flattened_' + x

        result = list(formatter.format_item('item'))
        self.assertEqual(['flattened_a', 'flattened_b', 'flattened_c'], result)
예제 #4
0
    def test_format_item_with_sorter(self):
        formatter = GraphQLResultFormatter(sort=True)
        formatter._box_item = MagicMock(return_value=['a', 'b', 'c'])
        formatter.sorter = MagicMock()
        formatter.sorter.sort_items = lambda x: x[1]
        formatter._flatten_edge = lambda x: 'flattened_' + x

        result = next(formatter.format_item('item'))

        self.assertEqual('flattened_b', result)
예제 #5
0
class GraphQLStreaming:
    def __init__(self,
                 host,
                 query,
                 unfold=False,
                 sort=None,
                 row_formatter=None,
                 cross_relations=False,
                 batch_size=None,
                 secure_user=None):
        self.host = host
        self.query = query
        self.secure_user = secure_user
        self.url = self.host + (STREAMING_GRAPHQL_SECURE_ENDPOINT
                                if self.secure_user else
                                STREAMING_GRAPHQL_PUBLIC_ENDPOINT)
        self.batch_size = batch_size

        self.formatter = GraphQLResultFormatter(
            sort=sort,
            unfold=unfold,
            row_formatter=row_formatter,
            cross_relations=cross_relations)

        self.current_page = None

    def _execute_query(self, query):
        yield from post_stream(self.url, {'query': query},
                               secure_user=self.secure_user)

    def _query_all(self):
        """Query on the input query as is. Don't add pagination.

        :return:
        """
        for item in self._execute_query(self.query):
            # Formatter may return multiple rows for one item, for example when 'unfold' is set to True. Hence the
            # double yield from (the first yield from being in _execute_query)
            yield from self.formatter.format_item(json_loads(item))

    def _add_pagination_to_query(self, query: str, after: str,
                                 batch_size: int):
        existing_arguments_pattern = re.compile(r'^(\s*{\s*)(\w+)\s*\((.*)\)')
        existing_arguments = existing_arguments_pattern.search(query)

        if existing_arguments is not None:
            # Transform arguments to dictionary
            arguments = {
                k.strip(): v.strip()
                for k, v in
                [arg.split(':') for arg in existing_arguments[3].split(',')]
            }

            # Set first and after, overwrite if already exist
            arguments['first'] = batch_size

            if after is not None:
                arguments['after'] = after

            args_string = ", ".join(
                [f'{k}: {v}' for k, v in arguments.items()])
            paginated_query = existing_arguments_pattern.sub(
                f'\\g<1>\\g<2>({args_string})', query, count=1)

        else:
            after_str = f", after: {after}" if after is not None else ""
            # Query does not have any arguments. Add arguments after first word
            paginated_query = re.sub(r'(\w+)',
                                     f'\\g<0>(first: {batch_size}{after_str})',
                                     query,
                                     count=1)

        # Add cursor to query if not yet exists on root level.
        if not re.search(
                r'^\s*{\s*\w+\s*\(?[^\n]*\)?\s*{\s*edges\s*{\s*node\s*{[^{]*cursor',
                paginated_query):
            return re.sub(r'(node\s*{)(\s*)(\w*)',
                          '\\g<1>\\g<2>cursor\\g<2>\\g<3>',
                          paginated_query,
                          count=1)

        return paginated_query

    def _query_page(self, after: str):
        page_query = self._add_pagination_to_query(self.query, after,
                                                   self.batch_size)
        yield from self._execute_query(page_query)

    def _query_paginated(self):
        last_item = None
        while True:
            items = self._query_page(last_item)
            result_cnt = 0

            for item in items:
                result_cnt += 1

                for formatted_item in self.formatter.format_item(
                        json_loads(item)):
                    last_item = formatted_item['cursor']
                    yield formatted_item

            if result_cnt == 0:
                break

    def __iter__(self):
        if self.batch_size is None:
            yield from self._query_all()
        else:
            yield from self._query_paginated()
예제 #6
0
    def test_format_item_with_row_formatter(self):
        row_formatter = lambda x: 'formatted_row(' + x + ')'
        formatter = GraphQLResultFormatter(row_formatter=row_formatter)
        formatter._flatten_edge = lambda x: 'flattened(' + x + ')'

        self.assertEqual(['flattened(formatted_row(a))'], list(formatter.format_item('a')))
예제 #7
0
class GraphQL:
    sorter = None

    def __init__(self, host, query, catalogue, collection, expand_history=False, sort=None, unfold=False,
                 row_formatter=None, cross_relations=False, secure_user=None):
        """Constructor

        Lazy loading, Just register host and query and wait for the iterator to be called
        to load the data

        :param host:
        :param query:
        :param catalogue:
        :param collection:
        """
        self.host = host
        self.secure_user = secure_user
        self.url = self.host + (GRAPHQL_SECURE_ENDPOINT if self.secure_user else GRAPHQL_PUBLIC_ENDPOINT)
        self.catalogue = catalogue
        self.collection = collection
        self.schema_collection_name = f'{self.catalogue}{self.collection.title()}'
        self.end_cursor = ""
        self.query = self._update_query(query, NUM_RECORDS)
        self.has_next_page = True
        self.gob_model = GOBModel().get_collection(self.catalogue, self.collection)

        self.formatter = GraphQLResultFormatter(expand_history, sort=sort, unfold=unfold, row_formatter=row_formatter,
                                                cross_relations=cross_relations)

    def __repr__(self):
        """Representation

        Provide for a readable representation
        """
        return f'GraphQL {self.schema_collection_name}'

    def __iter__(self):
        """Iteration method

        Reads pages and return enitities in each page until no pages left (next == None)

        Raises:
            AssertionError: if endpoint cannot be read

        :return:
        """
        num_records = NUM_RECORDS
        while self.has_next_page:
            start = time.time()
            print(f"Request {num_records} rows...")
            response = requests.post(self.url, json={'query': self.query}, secure_user=self.secure_user)
            end = time.time()
            duration = round(end - start, 2)
            # Adjust number of records to get to the target duration
            correction = TARGET_DURATION / duration
            num_records = max(int(num_records * correction), 1)
            print(f"Request data end ({duration} secs), records set to {num_records}")
            assert response.ok, f"API Response not OK for query {self.query}"
            data = response.json()

            # Update the cursor and has_next_page
            self.end_cursor = data['data'][self.schema_collection_name]['pageInfo']['endCursor']
            self.has_next_page = data['data'][self.schema_collection_name]['pageInfo']['hasNextPage']

            if self.has_next_page:
                self.query = self._update_query(self.query, num_records)

            for edge in data['data'][self.schema_collection_name]['edges']:
                yield from self.formatter.format_item(edge)

    def _update_query(self, query, num_records):
        """Updates a graphql query for pagination

        Adds the first and after parameters and the pageInfo node

        :return: updated query
        """
        # First check if the query has a filter
        filters = re.search(f'{self.schema_collection_name}\\s*\\((.+)?\\)', query)
        if filters:

            # check if the query has a filter on 'first'
            match = re.search('first:\\s?(([\\d]+)?)', query)
            if match:
                # adjust number of records to request
                query = query.replace(match[1], f"{num_records}", 1)

            # Try to find the after parameter, or else add it
            match = re.search('after:\\s?("([a-zA-Z\\d=]+)?")', query)
            if match:
                query = query.replace(match[1], f'"{self.end_cursor}"')
            else:
                append_string = f', after: "{self.end_cursor}")' if 'first' in filters[0] \
                    else f', first: {num_records}, after: "{self.end_cursor}")'
                filters_end = filters.span()[1]
                query = query[:filters_end-1] + append_string + query[filters_end:]
        else:
            # Add first and after parameter after the main collection
            query = query.replace(self.schema_collection_name,
                                  f'{self.schema_collection_name}(first: {num_records}, after: "{self.end_cursor}")')

        # Add pageInfo if it doesn't exist
        if not re.search('pageInfo', query):
            match = list(re.finditer('}', query))
            """
            Add pageInfo at the correct level of the query
            {
                collection {
                    edges {
                        node {

                        }
                    }
                    pageInfo {}
                }
            }
            """
            pageInfo = 'pageInfo { endCursor, hasNextPage }'
            query = query[:match[-2].span()[0]] + pageInfo + query[match[-2].span()[0]:]

        return query