def test_bad_variable_in_query(): """It should thrown a NonValidEndpointVariable exception if bad variable in endpoint""" query = {'url': '/stuff/%(thing)s/foo'} params = {} nosql_apply_parameters_to_query(query, params) with pytest.raises(NonValidVariable) as err: nosql_apply_parameters_to_query(query, params, handle_errors=True) assert str(err.value) == 'Non valid variable thing'
def test_nosql_apply_parameters_to_query_dot(): """It should handle both `x["y"]` and `x.y`""" query1 = {'facet': '{{ facet.value }}', 'sort': '{{ rank[0] }}', 'rows': '{{ bibou[0].value }}'} query2 = { 'facet': '{{ facet["value"] }}', 'sort': '{{ rank[0] }}', 'rows': '{{ bibou[0]["value"] }}', } parameters = {'facet': {'value': 'auteur'}, 'rank': ['rang'], 'bibou': [{'value': 50}]} res1 = nosql_apply_parameters_to_query(query1, parameters) res2 = nosql_apply_parameters_to_query(query2, parameters) assert res1 == res2 == {'facet': 'auteur', 'sort': 'rang', 'rows': 50}
def get_cache_key( self, data_source: Optional[ToucanDataSource] = None, permissions: Optional[dict] = None, offset: int = 0, limit: Optional[int] = None, ) -> str: """ Generate a unique identifier (str) for a given connector's configuration (if no parameters are supplied) or for a given couple connector/query configuration (if `data_source` parameter is supplied). This identifier will then be used as a cache key. """ unique_identifier = { 'connector': self.get_unique_identifier(), 'permissions': nosql_apply_parameters_to_query(permissions, data_source.parameters) if data_source else permissions, 'offset': offset, 'limit': limit, } if data_source is not None: unique_identifier['datasource'] = self._get_unique_datasource_identifier(data_source) json_uid = JsonWrapper.dumps(unique_identifier, sort_keys=True, default=hash) string_uid = str(uuid.uuid3(uuid.NAMESPACE_OID, json_uid)) return string_uid
def test_apply_parameter_to_query_do_nothing(): """ It should do nothing """ query = [{'$match': {'domain': 'yo', 'cat': 1, 'step': '2'}}] res = nosql_apply_parameters_to_query(query, None) assert res == query
def build_query_repositories(organization: str) -> str: """ Builds the GraphQL query to retrieve a list of repositories from Github's API :param organization: the organization name from which the repositories data will be extracted :return: graphql query with the sanitized organization name """ return nosql_apply_parameters_to_query( """query repositories($cursor: String) { organization(login: "******") { repositories(first: 90, orderBy: {field: PUSHED_AT, direction: DESC}, after: $cursor) { nodes { name } pageInfo { hasNextPage endCursor } } } }""", {'organization': organization}, )
def build_query_teams(organization: str) -> str: """ Builds the GraphQL query to retrieve a list of teams from Github's API :param organization: the organization name from which the teams data will be extracted :return: graphql query with the sanitized organization name """ return nosql_apply_parameters_to_query( """query teams($cursor: String) { organization(login: "******") { teams(first: 90, orderBy: {field: NAME, direction: ASC}, after: $cursor) { nodes { slug } pageInfo { endCursor hasNextPage } } } rateLimit { remaining resetAt } } """, {'organization': organization}, )
def _retrieve_data(self, data_source: MicroStrategyDataSource) -> pd.DataFrame: """Retrieves cube or report data, flattens return dataframe""" if data_source.dataset == Dataset.search: return self._retrieve_metadata(data_source) client = Client(self.base_url, self.project_id, self.username, self.password) query_func = getattr(client, data_source.dataset) if not data_source.viewfilter: results = query_func( id=data_source.id, offset=data_source.offset, limit=data_source.limit ) else: results = query_func(id=data_source.id, limit=0) dfn = get_definition(results) data_source.viewfilter = nosql_apply_parameters_to_query( data_source.viewfilter, data_source.parameters ) viewfilter = fill_viewfilter_with_ids(data_source.viewfilter, dfn) results = query_func( id=data_source.id, viewfilter=viewfilter, offset=data_source.offset, limit=data_source.limit, ) # Get a list of attributes and metrics attributes = get_attr_names(results) metrics = get_metric_names(results) # get data based on attributes and metrics rows = flatten_json(results['result']['data']['root'], attributes, metrics) return json_normalize(rows)
def _retrieve_data(self, data_source: LightspeedDataSource) -> pd.DataFrame: endpoint = nosql_apply_parameters_to_query(data_source.endpoint, data_source.parameters) data = self.bearer_oauth_get_endpoint(endpoint) data = jq(data_source.filter).transform(data) return pd.DataFrame(data)
def test_apply_parameter_to_query_int_param(): """ It should work when a paramters is an int """ query = [{'$match': {'domain': '%(param1)s', 'cat': '%(param2)s'}}] parameters = {'param1': 'yo', 'param2': 1} expected = [{'$match': {'domain': 'yo', 'cat': 1}}] assert nosql_apply_parameters_to_query(query, parameters) == expected
def _retrieve_data(self, data_source: RevinateDataSource) -> pd.DataFrame: """ Primary function and point of entry """ endpoint = data_source.endpoint endpoint = nosql_apply_parameters_to_query( query=endpoint, parameters=data_source.params) result = self._run_fetch(endpoint, jq_filter=data_source.filter) return pd.DataFrame(result)
def normalize_query(query, parameters): query = nosql_apply_parameters_to_query(query, parameters) if isinstance(query, dict): query = [{'$match': query}] for stage in query: # Allow ordered sorts if '$sort' in stage and isinstance(stage['$sort'], list): stage['$sort'] = SON([x.popitem() for x in stage['$sort']]) return query
def _retrieve_data(self, data_source: AircallDataSource) -> pd.DataFrame: endpoint = nosql_apply_parameters_to_query(data_source.endpoint, data_source.parameters) query = nosql_apply_parameters_to_query(data_source.query, data_source.parameters) limit = float('inf') if data_source.limit == -1 else data_source.limit current_page = 1 is_last_page = False data = [] while limit > 0 and not is_last_page: per_page = PER_PAGE if limit > PER_PAGE else limit # data = [], current_page = 1, limit = 60 page_data, is_last_page = self._get_page_data( endpoint, query, data_source.filter, current_page, per_page) # data = [{...}, ..., {...}], current_page = 2, limit = 10 data += page_data current_page += 1 limit -= per_page return pd.DataFrame(data)
def _render_query(self, data_source): query = nosql_apply_parameters_to_query( data_source.dict(by_alias=True), data_source.parameters, handle_errors=True) if self.template: template = { k: v for k, v in self.template.dict(by_alias=True).items() if v } for k in query.keys() & template.keys(): if query[k]: template[k].update(query[k]) query[k] = template[k] return query
def get_df(self, data_source: HttpAPIDataSource) -> pd.DataFrame: if self.auth: session = self.auth.get_session() else: session = Session() query = nosql_apply_parameters_to_query(data_source.dict(), data_source.parameters) if self.template: template = {k: v for k, v in self.template.dict().items() if v} for k in query.keys() & template.keys(): if query[k]: template[k].update(query[k]) query[k] = template[k] return pd.DataFrame(self.do_request(query, session))
def _retrieve_data(self, data_source: RokDataSource) -> pd.DataFrame: # Endpoint depends on the authentication mode endpoint = f'{self.host}/graphql' date_viewid_parameters = { 'start_date': data_source.start_date, 'end_date': data_source.end_date, 'viewId': data_source.viewId, } if data_source.parameters: parameters = {**data_source.parameters, **date_viewid_parameters} else: parameters = date_viewid_parameters data_source.query = nosql_apply_parameters_to_query( data_source.query, parameters) if self.authenticated_with_token: if not data_source.live_data: raise InvalidAuthenticationMethodError( """Request with ROK token is not possible while not in live data mode. Change the connector configuration to live data""" ) if not self.secret: raise NoROKSecretAvailableError('secrets not defined') res = self.retrieve_data_with_jwt(data_source, endpoint) else: endpoint = f'{endpoint}?DatabaseName={data_source.database}' # First retrieve the authentication token rok_token = self.retrieve_token_with_password( data_source.database, endpoint) # Then retrieve the data payload = {'query': data_source.query} res = requests.post(endpoint, json=payload, headers={ 'Token': rok_token }).json() if 'errors' in res: raise ValueError(str(res['errors'])) return pd.DataFrame(transform_with_jq(res, data_source.filter))
def test_apply_params_with_missing_param(): tests = [ ( {'domain': 'blah', 'country': {'$ne': '%(country)s'}, 'city': '%(city)s'}, # query {'city': 'Paris'}, # params {'domain': 'blah', 'country': {}, 'city': 'Paris'}, ), # expected ( [{'$match': {'country': '%(country)s', 'city': 'Test'}}, {'$match': {'b': 1}}], {'city': 'Paris'}, [{'$match': {'city': 'Test'}}, {'$match': {'b': 1}}], ), ({'code': '%(city)s_%(country)s', 'domain': 'Test'}, {'city': 'Paris'}, {'domain': 'Test'}), ( {'code': '%(city)s_%(country)s', 'domain': 'Test'}, {'city': 'Paris', 'country': 'France'}, {'code': 'Paris_France', 'domain': 'Test'}, ), ( {'domain': 'blah', 'country': {'$ne': '{{country}}'}, 'city': '{{city}}'}, {'city': 'Paris'}, {'domain': 'blah', 'country': {}, 'city': 'Paris'}, ), ( [{'$match': {'country': '{{country["name"]}}', 'city': 'Test'}}, {'$match': {'b': 1}}], {'city': 'Paris'}, [{'$match': {'city': 'Test'}}, {'$match': {'b': 1}}], ), ( {'code': '{{city}}_{{country[0]}}', 'domain': 'Test'}, {'city': 'Paris'}, {'domain': 'Test'}, ), ( {'code': '{{city}}_{{country}}', 'domain': 'Test'}, {'city': 'Paris', 'country': 'France'}, {'code': 'Paris_France', 'domain': 'Test'}, ), ({'code': '{{city}}_{{country}}', 'domain': 'Test'}, None, {'domain': 'Test'}), ] for (query, params, expected) in tests: assert nosql_apply_parameters_to_query(query, params) == expected
def get_df(self, data_source): client = pymongo.MongoClient(self.uri, ssl=self.ssl) col = client[self.database][data_source.collection] if isinstance(data_source.query, str): data_source.query = {'domain': data_source.query} data_source.query = handle_missing_params(data_source.query, data_source.parameters) data_source.query = nosql_apply_parameters_to_query( data_source.query, data_source.parameters) data = [] if isinstance(data_source.query, dict): data = col.find(data_source.query) elif isinstance(data_source.query, list): data = col.aggregate(data_source.query) df = pd.DataFrame(list(data)) client.close() return df
def test_apply_parameter_to_query_in_expression(): """ It sould work when a parameter is in an expression (e.g. OData) """ query = { 'entity': 'books', 'query': { '$filter': "title eq '%(title)s'", '$top': "%(top)s" } } parameters = {"title": "the overstory", "top": 3} expected = { 'entity': 'books', 'query': { '$filter': "title eq 'the overstory'", '$top': 3 } } assert nosql_apply_parameters_to_query(query, parameters) == expected
def get_df(self, data_source: GoogleAnalyticsDataSource) -> pd.DataFrame: credentials = ServiceAccountCredentials.from_json_keyfile_dict( self.credentials.dict(), self.scope ) service = build(API, VERSION, credentials=credentials) report_request = ReportRequest(**nosql_apply_parameters_to_query( data_source.report_request.dict(), data_source.parameters )) report = get_query_results(service, report_request) reports_data = [pd.DataFrame(get_dict_from_response(report, report_request.dateRanges))] while 'nextPageToken' in report: report_request.pageToken = report['nextPageToken'] report = get_query_results(service, report_request) reports_data.append(pd.DataFrame( get_dict_from_response(report, report_request.dateRanges))) return pd.concat(reports_data)
def _retrieve_data(self, data_source: ElasticsearchDataSource) -> pd.DataFrame: data_source.body = nosql_apply_parameters_to_query( data_source.body, data_source.parameters) connection_params = [] for host in self.hosts: parsed_url = urlparse(host.url) h = {"host": parsed_url.hostname} if parsed_url.path and parsed_url.path != "/": h["url_prefix"] = parsed_url.path if parsed_url.scheme == "https": h["port"] = host.port or 443 h["use_ssl"] = True elif host.port: h["port"] = host.port if host.username or host.password: h["http_auth"] = f"{host.username}:{host.password}" if host.headers: h['headers'] = host.headers connection_params.append(h) esclient = Elasticsearch(connection_params, send_get_body_as=self.send_get_body_as) response = getattr(esclient, data_source.search_method)(index=data_source.index, body=data_source.body) if data_source.search_method == SearchMethod.msearch: res = [] # Body alternate index and query `[index, query, index, query...]` queries = data_source.body[1::2] for query, data in zip(queries, response['responses']): res += _read_response(data) else: res = _read_response(response) df = json_normalize(res) return df
def build_query_members(organization: str, name: str) -> str: """ Builds the GraphQL query to retrieve a list of team members from Github's API :param organization: the organization name from which the members data will be extracted :param team the team name from which the members data will be extracted :return: graphql query with sanitized organization and team names """ return nosql_apply_parameters_to_query( """ query members($cursor: String){ organization(login: "******") { team(slug: "%(team)s"){ members(first: 100, orderBy: {field: LOGIN, direction: ASC}, after: $cursor) { edges { node { login } } pageInfo { hasNextPage endCursor } } } } } """, { 'organization': organization, 'team': name }, )
def test_nosql_apply_parameters_to_query(query, params, expected): assert nosql_apply_parameters_to_query(query, params) == expected
def build_query_pr(organization: str, name: str) -> str: """ Builds the GraphQL query to retrieve a list of pull requests from Github's API :param organization: the organization name from which the pull requests data will be extracted :param name a str representing the repository to extract the PRs from :return: graphql query with the sanitized organization name """ return nosql_apply_parameters_to_query( """query pr($cursor: String) { organization(login: "******") { repository(name: "%(repo_name)s") { name pullRequests(orderBy: {field: CREATED_AT, direction: DESC}, first: 100, after: $cursor) { nodes { createdAt mergedAt deletions additions title state labels(orderBy: {field: NAME, direction: ASC}, last: 10) { edges { node { name } } } commits(first: 1) { edges { node { commit { author { user { login } } } } } } } pageInfo { hasNextPage endCursor } } } } rateLimit { remaining resetAt } }""", { 'organization': organization, 'repo_name': name }, )
def test_apply_parameter_to_query(): """ It should work render all parameters """ tests = [ ( {'$match': {'domain': 'truc', 'indic': '{{my_indic[0]*my_indic[1]}}'}}, # query {'my_indic': [5, 6]}, # params {'$match': {'domain': 'truc', 'indic': 30}}, ), # expected ( {'$match': {'domain': 'truc', 'indic': '{%if my_indic%}1{%else%}2{%endif%}'}}, {'my_indic': False}, {'$match': {'domain': 'truc', 'indic': 2}}, ), ( { '$match': { '$and': [ {'domain': 'truc', 'indic0': '{{my_indic[0]}}'}, {'indic1': '{{my_indic[1]}}', 'indic2': 'yo_{{my_indic[2]}}'}, {'indic_list': '{{my_indic}}'}, ] } }, {'my_indic': ['0', 1, '2']}, { '$match': { '$and': [ {'domain': 'truc', 'indic0': '0'}, {'indic1': 1, 'indic2': 'yo_2'}, {'indic_list': ['0', 1, '2']}, ] } }, ), ( { '$match': { '$and': [ {'domain': 'truc', 'indic0': '%(my_indic_0)s'}, {'indic1': '%(my_indic_1)s', 'indic2': 'yo_%(my_indic_2)s'}, {'indic_list': '%(my_indic)s'}, ] } }, {'my_indic_0': '0', 'my_indic_1': 1, 'my_indic_2': '2', 'my_indic': ['0', 1, '2']}, { '$match': { '$and': [ {'domain': 'truc', 'indic0': '0'}, {'indic1': 1, 'indic2': 'yo_2'}, {'indic_list': ['0', 1, '2']}, ] } }, ), ( { '$match': { '$and': [ {'domain': 'truc', 'indic0': '{{my_indic["zero"]}}'}, {'indic1': '{{my_indic["one"]}}', 'indic2': 'yo_{{my_indic["two"]}}'}, {'indic_list': '{{my_indic}}'}, ] } }, {'my_indic': {'zero': '0', 'one': 1, 'two': '2'}}, { '$match': { '$and': [ {'domain': 'truc', 'indic0': '0'}, {'indic1': 1, 'indic2': 'yo_2'}, {'indic_list': {'zero': '0', 'one': 1, 'two': '2'}}, ] } }, ), ( {'data': '%(fakirQuery)s'}, { 'fakirQuery': '[{"values":["bibou"],"chartParam":"test","type":"test","name":"test"}]' }, {'data': '[{"values":["bibou"],"chartParam":"test","type":"test","name":"test"}]'}, ), ({'data': 1}, {}, {'data': 1}), ({'data': '1'}, {}, {'data': '1'}), ] for (query, params, expected) in tests: assert nosql_apply_parameters_to_query(query, params) == expected