Beispiel #1
0
def cdms_list(client, entity_name, offset):
    '''
    Call the `cdms_api.list` method, passing through the entity_name and
    offset. This function records the duration of the network request. It also
    caches the resulting response if it’s successful and raises an informative
    exception if it’s not.
    '''
    cached, cache_path = is_cached(entity_name, offset)
    if cached:  # nothing to do, just load resp from cache
        return services.redis.get(cache_path)
    start_time = datetime.datetime.now()
    if client is None:
        client = CDMSRestApi()
    resp = client.list(entity_name, skip=offset)  # the actual request
    time_delta = (datetime.datetime.now() - start_time).seconds

    # the below will raise something useful, or pass by quietly
    raise_on_cdms_resp_errors(entity_name, offset, resp)

    # record our expensive network request
    services.redis.set(duration_record(entity_name, offset), str(time_delta))
    services.redis.set(cache_path, resp.content.decode(resp.encoding
                                                       or 'utf-8'))
    LOGGER.info("{0} ({1}) {2}s".format(entity_name, offset, time_delta))
    return resp.content
    def test_setup_session_if_cookie_expired(self):
        """
        If the cookie is expired, a call to an arbitrary endpoint should reauthenticate and retry one more time
        transparently.
        """
        url = 'https://test/'
        body_response = 'success'

        def endpoint_callback():
            index = 0

            def wrapper(request):
                nonlocal index
                status_code = 200 if index else 401
                index += 1
                return (status_code, [], json.dumps({'d': body_response}))

            return wrapper

        responses.add_callback(responses.GET,
                               url,
                               match_querystring=True,
                               callback=endpoint_callback())
        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()
        resp = api.make_request('get', url)
        self.assertEqual(resp, body_response)
        self.assertTrue(api.auth.session)
    def test_create(self):
        api = CDMSRestApi()
        resp = api.create(self.service, data=self.data)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(resp, 'something')
        self.assertDictEqual(json.loads(responses.calls[0].request.body),
                             self.data)
    def test_defaults(self):
        """
        Call to the list endpoint with the defaults params.
        """
        api = CDMSRestApi()
        api.list(self.service)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query, '$top=50&$skip=0&')
    def test_order_by_as_string(self):
        """
        Call to the list endpoint with the order_by param as a string instead of a list.
        """
        api = CDMSRestApi()
        api.list(self.service, order_by='something')

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query,
            '$top=50&$skip=0&$orderby=something')
Beispiel #6
0
def delete_odata(odata_tablename, ident):
    cdms_client = CDMSRestApi()
    resp = cdms_client.delete(odata_tablename, "guid'{0}'".format(ident))
    print(resp)
    try:
        resp.json()  # TODO: handle deauth (could raise json.JSONDecodeError)
        return resp.status_code == 204
    except json.JSONDecodeError as exc:
        cdms_client.auth.setup_session(True)
    resp = cdms_client.delete(odata_tablename, "guid'{0}'".format(ident))
    print(resp)
    resp.json()
    return resp.status_code == 204
    def test_complete(self):
        """
        Call to the list endpoint with all params defined.
        """
        api = CDMSRestApi()
        api.list(self.service,
                 top=10,
                 skip=1,
                 select=['a', 'b'],
                 filters='c,d',
                 order_by=['e', 'f'])

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(
            urlparse(responses.calls[0].request.url).query,
            '$top=10&$skip=1&$filter=c,d&$orderby=e,f&$select=a,b')
def fetch_missing(metadata, missing, attempts=0):
    if attempts < constants.DJANGO_INITIAL_MISSING_ATTEMPTS:
        pass
    else:
        return
    client = CDMSRestApi()
    for _, django_name in django_tables_dep_order(metadata):
        guids = missing[django_name]
        if not guids:
            continue
        LOGGER.info('Backfilling %s entries for %s after %s attempts',
                    len(guids), django_name, attempts)
        table = metadata.tables[django_name]
        get_fn = functools.partial(utils.get_django, client, table.name)
        django_dicts = list(map(get_fn, guids))
        results, still_missing = etl.load.to_sqla_table_idempotent(
            table, [x for _, x in django_dicts if x])
        count_non_existant = len([x for _, x in django_dicts if x is False])
        if count_non_existant:
            LOGGER.info('%s has %s non-existant entries', django_name,
                        count_non_existant)
        if still_missing:
            return fetch_missing(metadata,
                                 still_missing,
                                 attempts=attempts + 1)
    def test_reuse_existing_cookie(self):
        """
        If the cookie file exists, use that without making any auth calls.
        """
        self.mock_cookie()

        api = CDMSRestApi()
        self.assertEqual(len(responses.calls), 0)
        self.assertTrue(api.session)
    def test_500(self):
        """
        Endpoint returning an error other than 401/404 should raise ErrorResponseException.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=500)

        api = CDMSRestApi()
        self.assertRaises(ErrorResponseException, api.make_request, 'get', url)
    def test_404(self):
        """
        Endpoint returning 404 should raise CDMSNotFoundException.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=404)

        api = CDMSRestApi()
        self.assertRaises(CDMSNotFoundException, api.make_request, 'get', url)
    def test_invalid_credentials(self):
        """
        CDMSRestApi raises LoginErrorException on init when un/pw are invalid
        """
        self.mock_initial_login()
        self.mock_login_step(1, errors=True)

        with self.assertRaises(LoginErrorException):
            CDMSRestApi()
    def test_exception_with_initial_form(self):
        """
        CDMSRestApi raises if AD login returns 500

        In case of exception with the initial login url, the constructor should
        raise UnexpectedResponseException.
        """
        self.mock_initial_login(status_code=500)

        with self.assertRaises(UnexpectedResponseException):
            CDMSRestApi()
    def test_first_successful_login(self):
        """
        CDMSRestApi logs in using AD on init

        When logging in for the first time (=> no cookie exists), the
        constructor logs in and saves the valid cookie on the filesystem.
        """
        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()

        self.assertTrue(self.cookie_storage.exists())
        self.assertTrue(api.auth.session)
    def test_setup_session_tries_only_once_if_cookie_expired(self):
        """
        If the cookie is expired, a call to an arbitrary endpoint should retry just once and fail after that.
        """
        url = 'https://test/'
        responses.add(responses.GET, url, match_querystring=True, status=401)

        self.mock_initial_login()
        self.mock_login_step(1)
        self.mock_login_step(2)
        self.mock_login_step(3)

        api = CDMSRestApi()
        self.assertRaises(CDMSUnauthorizedException, api.make_request, 'get',
                          url)
        self.assertEqual(len(responses.calls), 6)
Beispiel #16
0
def validate_credentials(request):
    'Validate a set of CDMS credentials'
    cdms_cookie_path = uuid.uuid4().hex
    try:
        json_data = request.json_body
        username = json_data.get('username')
        password = json_data.get('password')

        if not (username and password):
            SENTRY_CLIENT.captureMessage(
                'Missing credentials from validate-credentials request body')
            return False
        auth = ActiveDirectoryAuth(username, password, cdms_cookie_path)
        api_client = CDMSRestApi(auth)
        api_client.auth.login()
    except (ValueError, RequestException):
        SENTRY_CLIENT.captureException()
        return False
    return True
Beispiel #17
0
 def cdms_client_fn(username, password):
     auth = ActiveDirectoryAuth(username=username,
                                password=password,
                                cookie_path=cookie_path)
     return CDMSRestApi(auth=auth)
Beispiel #18
0
def cdms_client():
    'Placeholder for disconnect management and stuff?'
    client = CDMSRestApi()
    return client
Beispiel #19
0
        )


def main(client, traversal_spec):
    '''
    Download everything, traversing from company to contact and then
    interaction. Tee the data to the OData database and Leeloo web API.
    '''
    (root_table, root_pkey), children = traversal_spec
    odata_metadata = services.db.get_odata_metadata()
    odata_table = odata_metadata.tables[root_table]
    base_select = sqla.select([odata_table])
    execute = odata_metadata.bind.execute
    odata_chunks = select_chunks(execute, odata_table, base_select)
    for odata_chunk in odata_chunks:
        for odata_row in odata_chunk:
            guid = getattr(odata_row, root_pkey)
            traverse(client, odata_metadata, guid, children)


if __name__ == '__main__':
    traversal_spec = (
        ('AccountSet', 'AccountId'),
        (
            ('ContactSet', 'ParentCustomerId/Id'),
            ('detica_interactionSet', 'optevia_Organisation/Id'),
        ),
    )
    client = CDMSRestApi()
    main(client, traversal_spec)
 def test_delete(self):
     api = CDMSRestApi()
     api.delete(self.service, self.guid)
     self.assertEqual(len(responses.calls), 1)
 def test_exception_if_credentials_configured(self):
     """
     CDMSRestApi raises when CDMS un / pw settings are left blank
     """
     with self.assertRaises(ImproperlyConfigured):
         CDMSRestApi()
    def test_get(self):
        api = CDMSRestApi()
        resp = api.get(self.service, self.guid)

        self.assertEqual(len(responses.calls), 1)
        self.assertEqual(resp, 'something')
Beispiel #23
0
def main(names=None, client=None):
    if not client:  # assume this is not a testing case
        # force login to setup cookie to be used by subsequent client instances
        CDMSRestApi().auth.setup_session(True)
    if names is None:
        names = etl.spec.MAPPINGS.keys()
    else:
        names = set(names.split(','))
    pool = multiprocessing.Pool(processes=scrape_constants.PROCESSES)
    entity_chunks = []
    metadata = services.db.get_odata_metadata()
    spent = set(json.loads(services.redis.get(SPENT_KEY) or '[]'))
    len_spent = len(spent)
    if len_spent:
        LOGGER.info("Skipping {0} entity types \o/".format(len_spent))
    to_scrape = names - spent
    LOGGER.info('Scraping the following entities:')
    for name in names:
        LOGGER.info('    %s %s', name, '✔' if name in to_scrape else '✘')
    for entity_name in to_scrape:
        try:
            # validate cache is in good shape (ie. no missing requests)
            cache_names = map(
                lambda path: path.split('/')[-1],
                services.redis.keys(
                    os.path.join('cache', 'json', entity_name, '*')))
            caches = sorted(map(int, cache_names))
            for index, offset in list(enumerate(caches)):
                if caches[index - 1] != offset - 50:
                    start = caches[index - 1] + 50
                    LOGGER.info('In a previous run %s broke at %s',
                                entity_name, start)
                    break
            else:
                start = max(caches) + 50
        except (FileNotFoundError, ValueError):
            start = 0
        end = start + (scrape_constants.CHUNKSIZE * scrape_constants.PAGESIZE)
        entity_chunks.append(
            classes.EntityChunk(client, entity_name, start, end))
    last_report = 0
    final_tick = False

    while True:  # take a deep breath

        # use the magic of modulo
        now = datetime.datetime.now()
        report_conditions = (
            now.second,
            now.second % scrape_constants.INTERVAL == 0,
            last_report != now.second,
        )
        if not all(report_conditions):
            continue  # this isn’t a report loop

        LOGGER.info("Tick at {0}".format(now.strftime("%Y-%m-%d %H:%M:%S")))

        last_report = now.second
        reauthd_this_tick = False

        for entity_chunk in random.sample(entity_chunks, len(entity_chunks)):

            if entity_chunk.state in (types.EntityChunkState.complete,
                                      types.EntityChunkState.spent):
                continue  # NOQA

            # how many tasks pending in total
            pending = sum(entity_chunk.pending()
                          for entity_chunk in entity_chunks)
            if pending <= scrape_constants.PROCESSES:  # throttling
                if entity_chunk.state == types.EntityChunkState.incomplete:
                    entity_chunk.start(pool)
            else:
                fmt_str = "Throttling {0.entity_name} ({0.offset_start}-{0.offset_end})"  # noqa: E501
                LOGGER.info(fmt_str.format(entity_chunk))

            for entity_page in entity_chunk.entity_pages:
                entity_page.poll()  # updates the state of the EntityPage
                if entity_page.state == types.EntityPageState.complete:
                    # make cheeky call to etl.load
                    results, _ = etl.main.from_odata_json(
                        metadata.tables[entity_page.entity_name],
                        utils.json_cache_key(entity_page.entity_name,
                                             entity_page.offset))
                    LOGGER.info("Records {0}-{1} went into {2}".format(
                        entity_page.offset,
                        entity_page.offset + sum(result.rowcount
                                                 for result in results),
                        entity_page.entity_name))
                    entity_page.state = types.EntityPageState.inserted
                if entity_page.state == types.EntityPageState.spent:
                    # make cheeky call to etl.load
                    try:
                        results, _ = etl.main.from_odata_json(
                            metadata.tables[entity_page.entity_name],
                            utils.json_cache_key(entity_page.entity_name,
                                                 entity_page.offset))
                        LOGGER.info("Records {0}-{1} went into {2}".format(
                            entity_page.offset,
                            entity_page.offset + sum(result.rowcount
                                                     for result in results),
                            entity_page.entity_name))
                    except TypeError as exc:
                        # happens when spent EntityPage doesn’t have any data
                        pass
                    # if there is no pending requests, stop requesting this
                    # entity (it’s spent)
                    entitypage_states = set(x.state
                                            for x in entity_chunk.entity_pages)
                    if types.EntityPageState.pending not in entitypage_states:
                        entity_chunk.state = types.EntityChunkState.spent
                        spent = set(
                            json.loads(services.redis.get(SPENT_KEY) or '[]'))
                        spent.add(entity_chunk.entity_name)
                        services.redis.set(SPENT_KEY, json.dumps(tuple(spent)))
                        LOGGER.error("{0} ({1}) spent".format(
                            entity_page.entity_name, entity_page.offset))
                if entity_page.state == types.EntityPageState.deauthd:
                    if not reauthd_this_tick:
                        CDMSRestApi().auth.setup_session(True)
                        reauthd_this_tick = True
                    entity_page.reset()
            entity_chunk.poll()  # update state of EntityChunk

        done = (  # ask if all the EntityChunks are done
            (entity_chunk.state == types.EntityChunkState.complete
             or entity_chunk.state == types.EntityChunkState.spent)
            for entity_chunk in entity_chunks)
        if all(done):
            if not final_tick:  # make sure last page is processed
                final_tick = True
                continue
            LOGGER.info('Waiting for Pool.close ...')
            pool.close()
            LOGGER.info('Waiting for Pool.join ...')
            pool.join()
            if not client:  # assume this is not a testing case
                exit(1)
            return
        LOGGER.info("{0}/{1} entity chunks report complete".format(
            len([x for x in done if x]), len(entity_chunks)))
        time.sleep(1)  # don’t spam
 def test_exception_if_urls_not_configured(self):
     """
     CDMSRestApi raises when CDMS URL settings are left blank
     """
     with self.assertRaises(ImproperlyConfigured):
         CDMSRestApi()