コード例 #1
0
ファイル: utils.py プロジェクト: uktrade/data-hub-korben
def cdms_list(client, entity_name, offset):
    '''
    Call the `cdms_api.list` method, passing through the entity_name and
    offset. This function records the duration of the network request. It also
    caches the resulting response if it’s successful and raises an informative
    exception if it’s not.
    '''
    cached, cache_path = is_cached(entity_name, offset)
    if cached:  # nothing to do, just load resp from cache
        return services.redis.get(cache_path)
    start_time = datetime.datetime.now()
    if client is None:
        client = CDMSRestApi()
    resp = client.list(entity_name, skip=offset)  # the actual request
    time_delta = (datetime.datetime.now() - start_time).seconds

    # the below will raise something useful, or pass by quietly
    raise_on_cdms_resp_errors(entity_name, offset, resp)

    # record our expensive network request
    services.redis.set(duration_record(entity_name, offset), str(time_delta))
    services.redis.set(cache_path, resp.content.decode(resp.encoding
                                                       or 'utf-8'))
    LOGGER.info("{0} ({1}) {2}s".format(entity_name, offset, time_delta))
    return resp.content
コード例 #2
0
    def test_setup_session_if_cookie_expired(self):
        """
        If the cookie is expired, a call to an arbitrary endpoint should reauthenticate and retry one more time
        transparently.
        """
        url = 'https://test/'
        body_response = 'success'

        def endpoint_callback():
            index = 0

            def wrapper(request):
                nonlocal index
                status_code = 200 if index else 401
                index += 1
                return (status_code, [], json.dumps({'d': body_response}))

            return wrapper

        responses.add_callback(responses.GET,
                               url,
                               match_querystring=True,
                               callback=endpoint_callback())
        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()
        resp = api.make_request('get', url)
        self.assertEqual(resp, body_response)
        self.assertTrue(api.auth.session)
コード例 #3
0
    def test_create(self):
        api = CDMSRestApi()
        resp = api.create(self.service, data=self.data)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(resp, 'something')
        self.assertDictEqual(json.loads(responses.calls[0].request.body),
                             self.data)
コード例 #4
0
    def test_defaults(self):
        """
        Call to the list endpoint with the defaults params.
        """
        api = CDMSRestApi()
        api.list(self.service)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query, '$top=50&$skip=0&')
コード例 #5
0
    def test_order_by_as_string(self):
        """
        Call to the list endpoint with the order_by param as a string instead of a list.
        """
        api = CDMSRestApi()
        api.list(self.service, order_by='something')

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query,
            '$top=50&$skip=0&$orderby=something')
コード例 #6
0
ファイル: huey_tasks.py プロジェクト: uktrade/data-hub-korben
def delete_odata(odata_tablename, ident):
    cdms_client = CDMSRestApi()
    resp = cdms_client.delete(odata_tablename, "guid'{0}'".format(ident))
    print(resp)
    try:
        resp.json()  # TODO: handle deauth (could raise json.JSONDecodeError)
        return resp.status_code == 204
    except json.JSONDecodeError as exc:
        cdms_client.auth.setup_session(True)
    resp = cdms_client.delete(odata_tablename, "guid'{0}'".format(ident))
    print(resp)
    resp.json()
    return resp.status_code == 204
コード例 #7
0
    def test_complete(self):
        """
        Call to the list endpoint with all params defined.
        """
        api = CDMSRestApi()
        api.list(self.service,
                 top=10,
                 skip=1,
                 select=['a', 'b'],
                 filters='c,d',
                 order_by=['e', 'f'])

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query,
            '$top=10&$skip=1&$filter=c,d&$orderby=e,f&$select=a,b')
コード例 #8
0
def fetch_missing(metadata, missing, attempts=0):
    if attempts < constants.DJANGO_INITIAL_MISSING_ATTEMPTS:
        pass
    else:
        return
    client = CDMSRestApi()
    for _, django_name in django_tables_dep_order(metadata):
        guids = missing[django_name]
        if not guids:
            continue
        LOGGER.info('Backfilling %s entries for %s after %s attempts',
                    len(guids), django_name, attempts)
        table = metadata.tables[django_name]
        get_fn = functools.partial(utils.get_django, client, table.name)
        django_dicts = list(map(get_fn, guids))
        results, still_missing = etl.load.to_sqla_table_idempotent(
            table, [x for _, x in django_dicts if x])
        count_non_existant = len([x for _, x in django_dicts if x is False])
        if count_non_existant:
            LOGGER.info('%s has %s non-existant entries', django_name,
                        count_non_existant)
        if still_missing:
            return fetch_missing(metadata,
                                 still_missing,
                                 attempts=attempts + 1)
コード例 #9
0
    def test_reuse_existing_cookie(self):
        """
        If the cookie file exists, use that without making any auth calls.
        """
        self.mock_cookie()

        api = CDMSRestApi()
        self.assertEqual(len(responses.calls), 0)
        self.assertTrue(api.session)
コード例 #10
0
    def test_500(self):
        """
        Endpoint returning an error other than 401/404 should raise ErrorResponseException.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=500)

        api = CDMSRestApi()
        self.assertRaises(ErrorResponseException, api.make_request, 'get', url)
コード例 #11
0
    def test_404(self):
        """
        Endpoint returning 404 should raise CDMSNotFoundException.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=404)

        api = CDMSRestApi()
        self.assertRaises(CDMSNotFoundException, api.make_request, 'get', url)
コード例 #12
0
    def test_invalid_credentials(self):
        """
        CDMSRestApi raises LoginErrorException on init when un/pw are invalid
        """
        self.mock_initial_login()
        self.mock_login_step(1, errors=True)

        with self.assertRaises(LoginErrorException):
            CDMSRestApi()
コード例 #13
0
    def test_exception_with_initial_form(self):
        """
        CDMSRestApi raises if AD login returns 500

        In case of exception with the initial login url, the constructor should
        raise UnexpectedResponseException.
        """
        self.mock_initial_login(status_code=500)

        with self.assertRaises(UnexpectedResponseException):
            CDMSRestApi()
コード例 #14
0
    def test_first_successful_login(self):
        """
        CDMSRestApi logs in using AD on init

        When logging in for the first time (=> no cookie exists), the
        constructor logs in and saves the valid cookie on the filesystem.
        """
        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()

        self.assertTrue(self.cookie_storage.exists())
        self.assertTrue(api.auth.session)
コード例 #15
0
    def test_setup_session_tries_only_once_if_cookie_expired(self):
        """
        If the cookie is expired, a call to an arbitrary endpoint should retry just once and fail after that.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=401)

        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()
        self.assertRaises(CDMSUnauthorizedException, api.make_request, 'get',
                          url)
        self.assertEqual(len(responses.calls), 6)
コード例 #16
0
ファイル: views.py プロジェクト: uktrade/data-hub-korben
def validate_credentials(request):
    'Validate a set of CDMS credentials'
    cdms_cookie_path = uuid.uuid4().hex
    try:
        json_data = request.json_body
        username = json_data.get('username')
        password = json_data.get('password')

        if not (username and password):
            SENTRY_CLIENT.captureMessage(
                'Missing credentials from validate-credentials request body')
            return False
        auth = ActiveDirectoryAuth(username, password, cdms_cookie_path)
        api_client = CDMSRestApi(auth)
        api_client.auth.login()
    except (ValueError, RequestException):
        SENTRY_CLIENT.captureException()
        return False
    return True
コード例 #17
0
 def cdms_client_fn(username, password):
     auth = ActiveDirectoryAuth(username=username,
                                password=password,
                                cookie_path=cookie_path)
     return CDMSRestApi(auth=auth)
コード例 #18
0
ファイル: conftest.py プロジェクト: uktrade/data-hub-korben
def cdms_client():
    'Placeholder for disconnect management and stuff?'
    client = CDMSRestApi()
    return client
コード例 #19
0
        )


def main(client, traversal_spec):
    '''
    Download everything, traversing from company to contact and then
    interaction. Tee the data to the OData database and Leeloo web API.
    '''
    (root_table, root_pkey), children = traversal_spec
    odata_metadata = services.db.get_odata_metadata()
    odata_table = odata_metadata.tables[root_table]
    base_select = sqla.select([odata_table])
    execute = odata_metadata.bind.execute
    odata_chunks = select_chunks(execute, odata_table, base_select)
    for odata_chunk in odata_chunks:
        for odata_row in odata_chunk:
            guid = getattr(odata_row, root_pkey)
            traverse(client, odata_metadata, guid, children)


if __name__ == '__main__':
    traversal_spec = (
        ('AccountSet', 'AccountId'),
        (
            ('ContactSet', 'ParentCustomerId/Id'),
            ('detica_interactionSet', 'optevia_Organisation/Id'),
        ),
    )
    client = CDMSRestApi()
    main(client, traversal_spec)
コード例 #20
0
 def test_delete(self):
     api = CDMSRestApi()
     api.delete(self.service, self.guid)
     self.assertEqual(len(responses.calls), 1)
コード例 #21
0
 def test_exception_if_credentials_configured(self):
     """
     CDMSRestApi raises when CDMS un / pw settings are left blank
     """
     with self.assertRaises(ImproperlyConfigured):
         CDMSRestApi()
コード例 #22
0
    def test_get(self):
        api = CDMSRestApi()
        resp = api.get(self.service, self.guid)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(resp, 'something')
コード例 #23
0
ファイル: main.py プロジェクト: uktrade/data-hub-korben
def main(names=None, client=None):
    if not client:  # assume this is not a testing case
        # force login to setup cookie to be used by subsequent client instances
        CDMSRestApi().auth.setup_session(True)
    if names is None:
        names = etl.spec.MAPPINGS.keys()
    else:
        names = set(names.split(','))
    pool = multiprocessing.Pool(processes=scrape_constants.PROCESSES)
    entity_chunks = []
    metadata = services.db.get_odata_metadata()
    spent = set(json.loads(services.redis.get(SPENT_KEY) or '[]'))
    len_spent = len(spent)
    if len_spent:
        LOGGER.info("Skipping {0} entity types \o/".format(len_spent))
    to_scrape = names - spent
    LOGGER.info('Scraping the following entities:')
    for name in names:
        LOGGER.info('    %s %s', name, '✔' if name in to_scrape else '✘')
    for entity_name in to_scrape:
        try:
            # validate cache is in good shape (ie. no missing requests)
            cache_names = map(
                lambda path: path.split('/')[-1],
                services.redis.keys(
                    os.path.join('cache', 'json', entity_name, '*')))
            caches = sorted(map(int, cache_names))
            for index, offset in list(enumerate(caches)):
                if caches[index - 1] != offset - 50:
                    start = caches[index - 1] + 50
                    LOGGER.info('In a previous run %s broke at %s',
                                entity_name, start)
                    break
            else:
                start = max(caches) + 50
        except (FileNotFoundError, ValueError):
            start = 0
        end = start + (scrape_constants.CHUNKSIZE * scrape_constants.PAGESIZE)
        entity_chunks.append(
            classes.EntityChunk(client, entity_name, start, end))
    last_report = 0
    final_tick = False

    while True:  # take a deep breath

        # use the magic of modulo
        now = datetime.datetime.now()
        report_conditions = (
            now.second,
            now.second % scrape_constants.INTERVAL == 0,
            last_report != now.second,
        )
        if not all(report_conditions):
            continue  # this isn’t a report loop

        LOGGER.info("Tick at {0}".format(now.strftime("%Y-%m-%d %H:%M:%S")))

        last_report = now.second
        reauthd_this_tick = False

        for entity_chunk in random.sample(entity_chunks, len(entity_chunks)):

            if entity_chunk.state in (types.EntityChunkState.complete,
                                      types.EntityChunkState.spent):
                continue  # NOQA

            # how many tasks pending in total
            pending = sum(entity_chunk.pending()
                          for entity_chunk in entity_chunks)
            if pending <= scrape_constants.PROCESSES:  # throttling
                if entity_chunk.state == types.EntityChunkState.incomplete:
                    entity_chunk.start(pool)
            else:
                fmt_str = "Throttling {0.entity_name} ({0.offset_start}-{0.offset_end})"  # noqa: E501
                LOGGER.info(fmt_str.format(entity_chunk))

            for entity_page in entity_chunk.entity_pages:
                entity_page.poll()  # updates the state of the EntityPage
                if entity_page.state == types.EntityPageState.complete:
                    # make cheeky call to etl.load
                    results, _ = etl.main.from_odata_json(
                        metadata.tables[entity_page.entity_name],
                        utils.json_cache_key(entity_page.entity_name,
                                             entity_page.offset))
                    LOGGER.info("Records {0}-{1} went into {2}".format(
                        entity_page.offset,
                        entity_page.offset + sum(result.rowcount
                                                 for result in results),
                        entity_page.entity_name))
                    entity_page.state = types.EntityPageState.inserted
                if entity_page.state == types.EntityPageState.spent:
                    # make cheeky call to etl.load
                    try:
                        results, _ = etl.main.from_odata_json(
                            metadata.tables[entity_page.entity_name],
                            utils.json_cache_key(entity_page.entity_name,
                                                 entity_page.offset))
                        LOGGER.info("Records {0}-{1} went into {2}".format(
                            entity_page.offset,
                            entity_page.offset + sum(result.rowcount
                                                     for result in results),
                            entity_page.entity_name))
                    except TypeError as exc:
                        # happens when spent EntityPage doesn’t have any data
                        pass
                    # if there is no pending requests, stop requesting this
                    # entity (it’s spent)
                    entitypage_states = set(x.state
                                            for x in entity_chunk.entity_pages)
                    if types.EntityPageState.pending not in entitypage_states:
                        entity_chunk.state = types.EntityChunkState.spent
                        spent = set(
                            json.loads(services.redis.get(SPENT_KEY) or '[]'))
                        spent.add(entity_chunk.entity_name)
                        services.redis.set(SPENT_KEY, json.dumps(tuple(spent)))
                        LOGGER.error("{0} ({1}) spent".format(
                            entity_page.entity_name, entity_page.offset))
                if entity_page.state == types.EntityPageState.deauthd:
                    if not reauthd_this_tick:
                        CDMSRestApi().auth.setup_session(True)
                        reauthd_this_tick = True
                    entity_page.reset()
            entity_chunk.poll()  # update state of EntityChunk

        done = (  # ask if all the EntityChunks are done
            (entity_chunk.state == types.EntityChunkState.complete
             or entity_chunk.state == types.EntityChunkState.spent)
            for entity_chunk in entity_chunks)
        if all(done):
            if not final_tick:  # make sure last page is processed
                final_tick = True
                continue
            LOGGER.info('Waiting for Pool.close ...')
            pool.close()
            LOGGER.info('Waiting for Pool.join ...')
            pool.join()
            if not client:  # assume this is not a testing case
                exit(1)
            return
        LOGGER.info("{0}/{1} entity chunks report complete".format(
            len([x for x in done if x]), len(entity_chunks)))
        time.sleep(1)  # don’t spam
コード例 #24
0
 def test_exception_if_urls_not_configured(self):
     """
     CDMSRestApi raises when CDMS URL settings are left blank
     """
     with self.assertRaises(ImproperlyConfigured):
         CDMSRestApi()