Esempio n. 1
0
def test_bad_variable_in_query():
    """It should thrown a NonValidEndpointVariable exception if bad variable in endpoint"""
    query = {'url': '/stuff/%(thing)s/foo'}
    params = {}
    nosql_apply_parameters_to_query(query, params)
    with pytest.raises(NonValidVariable) as err:
        nosql_apply_parameters_to_query(query, params, handle_errors=True)
    assert str(err.value) == 'Non valid variable thing'
Esempio n. 2
0
def test_nosql_apply_parameters_to_query_dot():
    """It should handle both `x["y"]` and `x.y`"""
    query1 = {'facet': '{{ facet.value }}', 'sort': '{{ rank[0] }}', 'rows': '{{ bibou[0].value }}'}
    query2 = {
        'facet': '{{ facet["value"] }}',
        'sort': '{{ rank[0] }}',
        'rows': '{{ bibou[0]["value"] }}',
    }
    parameters = {'facet': {'value': 'auteur'}, 'rank': ['rang'], 'bibou': [{'value': 50}]}
    res1 = nosql_apply_parameters_to_query(query1, parameters)
    res2 = nosql_apply_parameters_to_query(query2, parameters)
    assert res1 == res2 == {'facet': 'auteur', 'sort': 'rang', 'rows': 50}
Esempio n. 3
0
    def get_cache_key(
        self,
        data_source: Optional[ToucanDataSource] = None,
        permissions: Optional[dict] = None,
        offset: int = 0,
        limit: Optional[int] = None,
    ) -> str:
        """
        Generate a unique identifier (str) for a given connector's configuration
        (if no parameters are supplied) or for a given couple connector/query
        configuration (if `data_source` parameter is supplied).
        This identifier will then be used as a cache key.
        """
        unique_identifier = {
            'connector': self.get_unique_identifier(),
            'permissions': nosql_apply_parameters_to_query(permissions, data_source.parameters)
            if data_source
            else permissions,
            'offset': offset,
            'limit': limit,
        }

        if data_source is not None:
            unique_identifier['datasource'] = self._get_unique_datasource_identifier(data_source)
        json_uid = JsonWrapper.dumps(unique_identifier, sort_keys=True, default=hash)
        string_uid = str(uuid.uuid3(uuid.NAMESPACE_OID, json_uid))
        return string_uid
Esempio n. 4
0
def test_apply_parameter_to_query_do_nothing():
    """
    It should do nothing
    """
    query = [{'$match': {'domain': 'yo', 'cat': 1, 'step': '2'}}]
    res = nosql_apply_parameters_to_query(query, None)
    assert res == query
Esempio n. 5
0
def build_query_repositories(organization: str) -> str:
    """
    Builds the GraphQL query to retrieve a list of repositories
    from Github's API
    :param organization: the organization name from which the
    repositories data will be extracted
    :return: graphql query with the sanitized organization name
    """
    return nosql_apply_parameters_to_query(
        """query repositories($cursor: String) {
          organization(login: "******") {
            repositories(first: 90, orderBy: {field: PUSHED_AT, direction: DESC},
             after: $cursor) {
              nodes {
                name
              }
              pageInfo {
                hasNextPage
                endCursor
              }
            }
        }
    }""",
        {'organization': organization},
    )
Esempio n. 6
0
def build_query_teams(organization: str) -> str:
    """
    Builds the GraphQL query to retrieve a list of teams
    from Github's API
    :param organization: the organization name from which the
    teams data will be extracted
    :return: graphql query with the sanitized organization name
    """
    return nosql_apply_parameters_to_query(
        """query teams($cursor: String) {
              organization(login: "******") {
                teams(first: 90, orderBy: {field: NAME, direction: ASC},
                 after: $cursor) {
                  nodes {
                    slug
                  }
                  pageInfo {
                    endCursor
                    hasNextPage
                  }
                }
              }
              rateLimit {
                remaining
                resetAt
              }
            }
            """,
        {'organization': organization},
    )
Esempio n. 7
0
    def _retrieve_data(self, data_source: MicroStrategyDataSource) -> pd.DataFrame:
        """Retrieves cube or report data, flattens return dataframe"""
        if data_source.dataset == Dataset.search:
            return self._retrieve_metadata(data_source)

        client = Client(self.base_url, self.project_id, self.username, self.password)

        query_func = getattr(client, data_source.dataset)
        if not data_source.viewfilter:
            results = query_func(
                id=data_source.id, offset=data_source.offset, limit=data_source.limit
            )
        else:
            results = query_func(id=data_source.id, limit=0)
            dfn = get_definition(results)
            data_source.viewfilter = nosql_apply_parameters_to_query(
                data_source.viewfilter, data_source.parameters
            )
            viewfilter = fill_viewfilter_with_ids(data_source.viewfilter, dfn)
            results = query_func(
                id=data_source.id,
                viewfilter=viewfilter,
                offset=data_source.offset,
                limit=data_source.limit,
            )

        # Get a list of attributes and metrics
        attributes = get_attr_names(results)
        metrics = get_metric_names(results)

        # get data based on attributes and metrics
        rows = flatten_json(results['result']['data']['root'], attributes, metrics)
        return json_normalize(rows)
Esempio n. 8
0
 def _retrieve_data(self,
                    data_source: LightspeedDataSource) -> pd.DataFrame:
     endpoint = nosql_apply_parameters_to_query(data_source.endpoint,
                                                data_source.parameters)
     data = self.bearer_oauth_get_endpoint(endpoint)
     data = jq(data_source.filter).transform(data)
     return pd.DataFrame(data)
Esempio n. 9
0
def test_apply_parameter_to_query_int_param():
    """
    It should work when a paramters is an int
    """
    query = [{'$match': {'domain': '%(param1)s', 'cat': '%(param2)s'}}]
    parameters = {'param1': 'yo', 'param2': 1}
    expected = [{'$match': {'domain': 'yo', 'cat': 1}}]
    assert nosql_apply_parameters_to_query(query, parameters) == expected
Esempio n. 10
0
    def _retrieve_data(self, data_source: RevinateDataSource) -> pd.DataFrame:
        """
        Primary function and point of entry
        """
        endpoint = data_source.endpoint

        endpoint = nosql_apply_parameters_to_query(
            query=endpoint, parameters=data_source.params)

        result = self._run_fetch(endpoint, jq_filter=data_source.filter)
        return pd.DataFrame(result)
def normalize_query(query, parameters):
    query = nosql_apply_parameters_to_query(query, parameters)

    if isinstance(query, dict):
        query = [{'$match': query}]

    for stage in query:
        # Allow ordered sorts
        if '$sort' in stage and isinstance(stage['$sort'], list):
            stage['$sort'] = SON([x.popitem() for x in stage['$sort']])

    return query
    def _retrieve_data(self, data_source: AircallDataSource) -> pd.DataFrame:
        endpoint = nosql_apply_parameters_to_query(data_source.endpoint,
                                                   data_source.parameters)
        query = nosql_apply_parameters_to_query(data_source.query,
                                                data_source.parameters)
        limit = float('inf') if data_source.limit == -1 else data_source.limit

        current_page = 1
        is_last_page = False
        data = []

        while limit > 0 and not is_last_page:
            per_page = PER_PAGE if limit > PER_PAGE else limit

            # data = [], current_page = 1, limit = 60
            page_data, is_last_page = self._get_page_data(
                endpoint, query, data_source.filter, current_page, per_page)

            # data = [{...}, ..., {...}], current_page = 2, limit = 10
            data += page_data
            current_page += 1
            limit -= per_page

        return pd.DataFrame(data)
Esempio n. 13
0
 def _render_query(self, data_source):
     query = nosql_apply_parameters_to_query(
         data_source.dict(by_alias=True),
         data_source.parameters,
         handle_errors=True)
     if self.template:
         template = {
             k: v
             for k, v in self.template.dict(by_alias=True).items() if v
         }
         for k in query.keys() & template.keys():
             if query[k]:
                 template[k].update(query[k])
             query[k] = template[k]
     return query
Esempio n. 14
0
    def get_df(self, data_source: HttpAPIDataSource) -> pd.DataFrame:

        if self.auth:
            session = self.auth.get_session()
        else:
            session = Session()

        query = nosql_apply_parameters_to_query(data_source.dict(),
                                                data_source.parameters)

        if self.template:
            template = {k: v for k, v in self.template.dict().items() if v}
            for k in query.keys() & template.keys():
                if query[k]:
                    template[k].update(query[k])
                query[k] = template[k]

        return pd.DataFrame(self.do_request(query, session))
Esempio n. 15
0
    def _retrieve_data(self, data_source: RokDataSource) -> pd.DataFrame:
        # Endpoint depends on the authentication mode
        endpoint = f'{self.host}/graphql'
        date_viewid_parameters = {
            'start_date': data_source.start_date,
            'end_date': data_source.end_date,
            'viewId': data_source.viewId,
        }

        if data_source.parameters:
            parameters = {**data_source.parameters, **date_viewid_parameters}
        else:
            parameters = date_viewid_parameters
        data_source.query = nosql_apply_parameters_to_query(
            data_source.query, parameters)

        if self.authenticated_with_token:
            if not data_source.live_data:
                raise InvalidAuthenticationMethodError(
                    """Request with ROK token is not possible while not
                     in live data mode. Change the connector configuration to live data"""
                )
            if not self.secret:
                raise NoROKSecretAvailableError('secrets not defined')
            res = self.retrieve_data_with_jwt(data_source, endpoint)

        else:
            endpoint = f'{endpoint}?DatabaseName={data_source.database}'
            # First retrieve the authentication token
            rok_token = self.retrieve_token_with_password(
                data_source.database, endpoint)
            # Then retrieve the data
            payload = {'query': data_source.query}
            res = requests.post(endpoint,
                                json=payload,
                                headers={
                                    'Token': rok_token
                                }).json()

        if 'errors' in res:
            raise ValueError(str(res['errors']))

        return pd.DataFrame(transform_with_jq(res, data_source.filter))
Esempio n. 16
0
def test_apply_params_with_missing_param():
    tests = [
        (
            {'domain': 'blah', 'country': {'$ne': '%(country)s'}, 'city': '%(city)s'},  # query
            {'city': 'Paris'},  # params
            {'domain': 'blah', 'country': {}, 'city': 'Paris'},
        ),  # expected
        (
            [{'$match': {'country': '%(country)s', 'city': 'Test'}}, {'$match': {'b': 1}}],
            {'city': 'Paris'},
            [{'$match': {'city': 'Test'}}, {'$match': {'b': 1}}],
        ),
        ({'code': '%(city)s_%(country)s', 'domain': 'Test'}, {'city': 'Paris'}, {'domain': 'Test'}),
        (
            {'code': '%(city)s_%(country)s', 'domain': 'Test'},
            {'city': 'Paris', 'country': 'France'},
            {'code': 'Paris_France', 'domain': 'Test'},
        ),
        (
            {'domain': 'blah', 'country': {'$ne': '{{country}}'}, 'city': '{{city}}'},
            {'city': 'Paris'},
            {'domain': 'blah', 'country': {}, 'city': 'Paris'},
        ),
        (
            [{'$match': {'country': '{{country["name"]}}', 'city': 'Test'}}, {'$match': {'b': 1}}],
            {'city': 'Paris'},
            [{'$match': {'city': 'Test'}}, {'$match': {'b': 1}}],
        ),
        (
            {'code': '{{city}}_{{country[0]}}', 'domain': 'Test'},
            {'city': 'Paris'},
            {'domain': 'Test'},
        ),
        (
            {'code': '{{city}}_{{country}}', 'domain': 'Test'},
            {'city': 'Paris', 'country': 'France'},
            {'code': 'Paris_France', 'domain': 'Test'},
        ),
        ({'code': '{{city}}_{{country}}', 'domain': 'Test'}, None, {'domain': 'Test'}),
    ]
    for (query, params, expected) in tests:
        assert nosql_apply_parameters_to_query(query, params) == expected
Esempio n. 17
0
    def get_df(self, data_source):
        client = pymongo.MongoClient(self.uri, ssl=self.ssl)

        col = client[self.database][data_source.collection]

        if isinstance(data_source.query, str):
            data_source.query = {'domain': data_source.query}
        data_source.query = handle_missing_params(data_source.query,
                                                  data_source.parameters)
        data_source.query = nosql_apply_parameters_to_query(
            data_source.query, data_source.parameters)
        data = []
        if isinstance(data_source.query, dict):
            data = col.find(data_source.query)
        elif isinstance(data_source.query, list):
            data = col.aggregate(data_source.query)
        df = pd.DataFrame(list(data))

        client.close()
        return df
Esempio n. 18
0
def test_apply_parameter_to_query_in_expression():
    """
    It sould work when a parameter is in an expression (e.g. OData)
    """
    query = {
        'entity': 'books',
        'query': {
            '$filter': "title eq '%(title)s'",
            '$top': "%(top)s"
        }
    }
    parameters = {"title": "the overstory", "top": 3}
    expected = {
        'entity': 'books',
        'query': {
            '$filter': "title eq 'the overstory'",
            '$top': 3
        }
    }
    assert nosql_apply_parameters_to_query(query, parameters) == expected
Esempio n. 19
0
    def get_df(self, data_source: GoogleAnalyticsDataSource) -> pd.DataFrame:
        credentials = ServiceAccountCredentials.from_json_keyfile_dict(
            self.credentials.dict(), self.scope
        )
        service = build(API, VERSION, credentials=credentials)
        report_request = ReportRequest(**nosql_apply_parameters_to_query(
            data_source.report_request.dict(),
            data_source.parameters
        ))
        report = get_query_results(service, report_request)
        reports_data = [pd.DataFrame(get_dict_from_response(report, report_request.dateRanges))]

        while 'nextPageToken' in report:
            report_request.pageToken = report['nextPageToken']

            report = get_query_results(service, report_request)
            reports_data.append(pd.DataFrame(
                get_dict_from_response(report, report_request.dateRanges)))

        return pd.concat(reports_data)
Esempio n. 20
0
    def _retrieve_data(self,
                       data_source: ElasticsearchDataSource) -> pd.DataFrame:
        data_source.body = nosql_apply_parameters_to_query(
            data_source.body, data_source.parameters)
        connection_params = []
        for host in self.hosts:
            parsed_url = urlparse(host.url)
            h = {"host": parsed_url.hostname}

            if parsed_url.path and parsed_url.path != "/":
                h["url_prefix"] = parsed_url.path
            if parsed_url.scheme == "https":
                h["port"] = host.port or 443
                h["use_ssl"] = True
            elif host.port:
                h["port"] = host.port

            if host.username or host.password:
                h["http_auth"] = f"{host.username}:{host.password}"
            if host.headers:
                h['headers'] = host.headers
            connection_params.append(h)

        esclient = Elasticsearch(connection_params,
                                 send_get_body_as=self.send_get_body_as)
        response = getattr(esclient,
                           data_source.search_method)(index=data_source.index,
                                                      body=data_source.body)

        if data_source.search_method == SearchMethod.msearch:
            res = []
            # Body alternate index and query `[index, query, index, query...]`
            queries = data_source.body[1::2]
            for query, data in zip(queries, response['responses']):
                res += _read_response(data)
        else:
            res = _read_response(response)

        df = json_normalize(res)
        return df
Esempio n. 21
0
def build_query_members(organization: str, name: str) -> str:
    """
    Builds the GraphQL query to retrieve a list of team members
    from Github's API
    :param organization: the organization name from which the
    members data will be extracted
    :param team the team name from which the
    members data will be extracted
    :return: graphql query with sanitized organization and team names
    """
    return nosql_apply_parameters_to_query(
        """
    query members($cursor: String){
      organization(login: "******") {
        team(slug: "%(team)s"){
            members(first: 100, orderBy: {field: LOGIN, direction: ASC},
             after: $cursor) {
              edges {
                node {
                  login
                }
              }
              pageInfo {
                hasNextPage
                endCursor
              }
            }
        }
      }
}
""",
        {
            'organization': organization,
            'team': name
        },
    )
Esempio n. 22
0
def test_nosql_apply_parameters_to_query(query, params, expected):
    assert nosql_apply_parameters_to_query(query, params) == expected
Esempio n. 23
0
def build_query_pr(organization: str, name: str) -> str:
    """
    Builds the GraphQL query to retrieve a list of pull requests
    from Github's API
    :param organization: the organization name from which the
    pull requests data will be extracted
    :param name a str representing the repository to extract the PRs from
    :return: graphql query with the sanitized organization name
    """
    return nosql_apply_parameters_to_query(
        """query pr($cursor: String) {
          organization(login: "******") {
            repository(name: "%(repo_name)s") {
                name
                pullRequests(orderBy: {field: CREATED_AT, direction: DESC},
                 first: 100, after: $cursor) {
                  nodes {
                    createdAt
                    mergedAt
                    deletions
                    additions
                    title
                    state
                    labels(orderBy: {field: NAME, direction: ASC}, last: 10) {
                      edges {
                        node {
                          name
                        }
                      }
                    }
                    commits(first: 1) {
                      edges {
                        node {
                          commit {
                            author {
                              user {
                                login
                              }
                            }
                          }
                        }
                      }
                    }
                  }
                    pageInfo {
                      hasNextPage
                      endCursor
                    }
                  }
                }
              }
              rateLimit {
                remaining
                resetAt
              }
            }""",
        {
            'organization': organization,
            'repo_name': name
        },
    )
Esempio n. 24
0
def test_apply_parameter_to_query():
    """
    It should work render all parameters
    """
    tests = [
        (
            {'$match': {'domain': 'truc', 'indic': '{{my_indic[0]*my_indic[1]}}'}},  # query
            {'my_indic': [5, 6]},  # params
            {'$match': {'domain': 'truc', 'indic': 30}},
        ),  # expected
        (
            {'$match': {'domain': 'truc', 'indic': '{%if my_indic%}1{%else%}2{%endif%}'}},
            {'my_indic': False},
            {'$match': {'domain': 'truc', 'indic': 2}},
        ),
        (
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '{{my_indic[0]}}'},
                        {'indic1': '{{my_indic[1]}}', 'indic2': 'yo_{{my_indic[2]}}'},
                        {'indic_list': '{{my_indic}}'},
                    ]
                }
            },
            {'my_indic': ['0', 1, '2']},
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '0'},
                        {'indic1': 1, 'indic2': 'yo_2'},
                        {'indic_list': ['0', 1, '2']},
                    ]
                }
            },
        ),
        (
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '%(my_indic_0)s'},
                        {'indic1': '%(my_indic_1)s', 'indic2': 'yo_%(my_indic_2)s'},
                        {'indic_list': '%(my_indic)s'},
                    ]
                }
            },
            {'my_indic_0': '0', 'my_indic_1': 1, 'my_indic_2': '2', 'my_indic': ['0', 1, '2']},
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '0'},
                        {'indic1': 1, 'indic2': 'yo_2'},
                        {'indic_list': ['0', 1, '2']},
                    ]
                }
            },
        ),
        (
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '{{my_indic["zero"]}}'},
                        {'indic1': '{{my_indic["one"]}}', 'indic2': 'yo_{{my_indic["two"]}}'},
                        {'indic_list': '{{my_indic}}'},
                    ]
                }
            },
            {'my_indic': {'zero': '0', 'one': 1, 'two': '2'}},
            {
                '$match': {
                    '$and': [
                        {'domain': 'truc', 'indic0': '0'},
                        {'indic1': 1, 'indic2': 'yo_2'},
                        {'indic_list': {'zero': '0', 'one': 1, 'two': '2'}},
                    ]
                }
            },
        ),
        (
            {'data': '%(fakirQuery)s'},
            {
                'fakirQuery': '[{"values":["bibou"],"chartParam":"test","type":"test","name":"test"}]'
            },
            {'data': '[{"values":["bibou"],"chartParam":"test","type":"test","name":"test"}]'},
        ),
        ({'data': 1}, {}, {'data': 1}),
        ({'data': '1'}, {}, {'data': '1'}),
    ]
    for (query, params, expected) in tests:
        assert nosql_apply_parameters_to_query(query, params) == expected