예제 #1
0
def _hawk_api_request(
    url: str,
    credentials: dict,
    results_key: Optional[str],
    next_key: Optional[str],
    validate_response: Optional[bool] = True,
    force_http: Optional[bool] = False,
):
    sender = Sender(
        credentials,
        # Currently data workspace denies hawk requests signed with https urls.
        # Once fixed the protocol replacement can be removed.
        url.replace('https', 'http') if force_http else url,
        "get",
        content="",
        content_type="",
        always_hash_content=True,
    )

    logger.info(f"Fetching page {url}")
    response = requests.get(
        url,
        headers={
            "Authorization": sender.request_header,
            "Content-Type": ""
        },
        timeout=300,
    )

    try:
        response.raise_for_status()
    except requests.exceptions.HTTPError:
        logger.warning(f"Request failed: {response.text}")
        raise

    if validate_response:
        try:
            sender.accept_response(
                response.headers["Server-Authorization"],
                content=response.content,
                content_type=response.headers["Content-Type"],
            )
        except HawkFail as e:
            logger.error(f"HAWK Authentication failed {str(e)}")
            raise

    response_json = response.json()

    if (next_key and next_key not in response_json) or (
            results_key and results_key not in response_json):
        raise ValueError("Unexpected response structure")

    return response_json
예제 #2
0
    def handle(self, *args, **options):
        hawk_log = logging.getLogger('mohawk')
        hawk_log.setLevel(logging.DEBUG)
        hawk_log.addHandler(logging.StreamHandler())

        try:
            import requests
        except ImportError:
            raise CommandError('To use this command you first need to '
                               'install the requests module')
        url = options['url']
        if not url:
            raise CommandError('Specify a URL to load with --url')

        qs = options['d'] or ''
        request_content_type = ('application/x-www-form-urlencoded'
                                if qs else 'text/plain')
        method = options['X']

        credentials = lookup_credentials(options['creds'])

        sender = Sender(credentials,
                        url,
                        method.upper(),
                        content=qs,
                        content_type=request_content_type)

        headers = {
            'Authorization': sender.request_header,
            'Content-Type': request_content_type
        }

        do_request = getattr(requests, method.lower())
        res = do_request(url, data=qs, headers=headers)

        print '{method} -d {qs} {url}'.format(method=method.upper(),
                                              qs=qs or 'None',
                                              url=url)
        print res.text

        # Verify we're talking to our trusted server.
        print res.headers
        auth_hdr = res.headers.get('Server-Authorization', None)
        if auth_hdr:
            sender.accept_response(auth_hdr,
                                   content=res.text,
                                   content_type=res.headers['Content-Type'])
            print '<response was Hawk verified>'
        else:
            print '** NO Server-Authorization header **'
            print '<response was NOT Hawk verified>'
예제 #3
0
    def handle(self, *args, **options):
        # Configure the mohawk lib for debug logging so we can see inputs to
        # the signature functions and other useful stuff.
        hawk_log = logging.getLogger('mohawk')
        hawk_log.setLevel(logging.DEBUG)
        hawk_log.addHandler(logging.StreamHandler())

        url = options['url']
        if not url:
            raise CommandError('Specify a URL to load with --url')

        creds_key = options['creds']
        if not creds_key:
            raise CommandError('Specify ID for Hawk credentials with --creds')

        method = options['X']
        qs = options['d'] or ''
        request_content_type = ('application/x-www-form-urlencoded'
                                if qs else 'text/plain')

        credentials = lookup_credentials(creds_key)

        sender = Sender(credentials,
                        url,
                        method.upper(),
                        content=qs,
                        content_type=request_content_type)

        headers = {
            'Authorization': sender.request_header,
            'Content-Type': request_content_type
        }

        res = request(url, method.lower(), data=qs, headers=headers)

        self.stdout.write('{method} -d {qs} {url}'.format(
            method=method.upper(), qs=qs or 'None', url=url))
        self.stdout.write(res.text)

        # Verify we're talking to our trusted server.
        self.stdout.write(str(res.headers))
        auth_hdr = res.headers.get('Server-Authorization', None)
        if auth_hdr:
            sender.accept_response(auth_hdr,
                                   content=res.text,
                                   content_type=res.headers['Content-Type'])
            self.stdout.write('<response was Hawk verified>')
        else:
            self.stdout.write('** NO Server-Authorization header **')
            self.stdout.write('<response was NOT Hawk verified>')
예제 #4
0
    def handle(self, *args, **options):
        # Configure the mohawk lib for debug logging so we can see inputs to
        # the signature functions and other useful stuff.
        hawk_log = logging.getLogger('mohawk')
        hawk_log.setLevel(logging.DEBUG)
        hawk_log.addHandler(logging.StreamHandler())

        url = options['url']
        if not url:
            raise CommandError('Specify a URL to load with --url')

        creds_key = options['creds']
        if not creds_key:
            raise CommandError('Specify ID for Hawk credentials with --creds')

        method = options['X']
        qs = options['d'] or ''
        request_content_type = ('application/x-www-form-urlencoded'
                                if qs else 'text/plain')

        credentials = lookup_credentials(creds_key)

        sender = Sender(credentials,
                        url, method.upper(),
                        content=qs,
                        content_type=request_content_type)

        headers = {'Authorization': sender.request_header,
                   'Content-Type': request_content_type}

        res = request(url, method.lower(), data=qs, headers=headers)

        self.stdout.write('{method} -d {qs} {url}'.format(method=method.upper(),
                                                          qs=qs or 'None',
                                                          url=url))
        self.stdout.write(res.text)

        # Verify we're talking to our trusted server.
        self.stdout.write(str(res.headers))
        auth_hdr = res.headers.get('Server-Authorization', None)
        if auth_hdr:
            sender.accept_response(auth_hdr,
                                   content=res.text,
                                   content_type=res.headers['Content-Type'])
            self.stdout.write('<response was Hawk verified>')
        else:
            self.stdout.write('** NO Server-Authorization header **')
            self.stdout.write('<response was NOT Hawk verified>')
예제 #5
0
    def handle(self, *args, **options):
        hawk_log = logging.getLogger('mohawk')
        hawk_log.setLevel(logging.DEBUG)
        hawk_log.addHandler(logging.StreamHandler())

        try:
            import requests
        except ImportError:
            raise CommandError('To use this command you first need to '
                               'install the requests module')
        url = options['url']
        if not url:
            raise CommandError('Specify a URL to load with --url')

        qs = options['d'] or ''
        request_content_type = ('application/x-www-form-urlencoded'
                                if qs else 'text/plain')
        method = options['X']

        credentials = lookup_credentials(options['creds'])

        sender = Sender(credentials,
                        url, method.upper(),
                        content=qs,
                        content_type=request_content_type)

        headers = {'Authorization': sender.request_header,
                   'Content-Type': request_content_type}

        do_request = getattr(requests, method.lower())
        res = do_request(url, data=qs, headers=headers)

        print '{method} -d {qs} {url}'.format(method=method.upper(),
                                              qs=qs or 'None',
                                              url=url)
        print res.text

        # Verify we're talking to our trusted server.
        print res.headers
        auth_hdr = res.headers.get('Server-Authorization', None)
        if auth_hdr:
            sender.accept_response(auth_hdr,
                                   content=res.text,
                                   content_type=res.headers['Content-Type'])
            print '<response was Hawk verified>'
        else:
            print '** NO Server-Authorization header **'
            print '<response was NOT Hawk verified>'
예제 #6
0
    def send(self, value):
        """
        :param json_data: A dict OF DATA TO SEND
        :return: URL OF WHERE DATA WAS STORED, AND etl.id OF RECORD
        """
        content = json.dumps(value)

        # Hawk Sender WILL DO THE WORK OF SIGNINGs
        sender = Sender(
            self.hawk,
            self.url,
            b'POST',
            content=content,
            content_type=CONTENT_TYPE
        )

        # STANDARD POST
        response = requests.post(
            url=self.url,
            data=content,
            headers={
                'Authorization': sender.request_header,
                'Content-Type': CONTENT_TYPE
            }
        )

        if response.status_code != 200:
            raise Exception(response.content)

        # SERVER SIGNED THE RESPONSE. VERIFY IT
        sender.accept_response(
            response.headers['Server-Authorization'],
            content=response.content,
            content_type=response.headers['Content-Type']
        )

        about = json.loads(response.content)
        return about['link'], about['etl']['id']
예제 #7
0
 def test_successful_authentication(self):
     sender = Sender(
         credentials={
             'id': self.client_id,
             'key': self.client_key,
             'algorithm': 'sha256'
         },
         url='http://localhost:80/test/',
         method='GET',
         content='',
         content_type='',
     )
     with self.app.test_client() as c:
         response = c.get('/test/',
                          headers={'Authorization': sender.request_header})
         assert response.status_code == 200
         assert response.get_data() == b'OK'
         # check if accepted response doesn't throw exception
         sender.accept_response(
             response.headers.get('Server-Authorization'),
             content=response.get_data(),
             content_type=response.mimetype,
         )
예제 #8
0
def run_fetch(source_url,
              run_fetch_task_id=None,
              task_instance=None,
              **kwargs):
    """Fetch data from source.

    Args:
        source_url (str): URL for API Endpoint to fetch data from source.

    Source endpoint has to accept GET request and respond with HTTP 200 OK for success.
    Needs to be paginated, response is expected to have below structure;
    {
        'next': <link_to_next_page>,
        'results': [list of dict]
    }

    Notes:
    XCOM isn't used to transfer data between tasks because it is not built to handle
    very large data transfer between tasks.
    Saving fetched records into a file would prevent us from scaling with celery, saving into
    a single variable causes worker shutdown due to high memory usage. That's why,
    this saves paginated response into indexed named variables and returns variable names
    to be consumed by the task which inserts data into regarding table. Alternatives are;
    - Shared network storage
    - S3 (Security risk)

    Example source_url.
        source_url = https://datahub-api-demo.london.cloudapps.digital/v4/datasets/omis-dataset
    TODO:
        By the impletation of other Datasets pipeline, there will be more generic structure to
        support various pipeline types.

    """
    def mark_task_failed():
        def rollback_variables(index):
            for i in range(index):
                key = f'{run_fetch_task_id}{i}'
                Variable.delete(key)
                redis_client.delete(key)

        task_instance.xcom_push(key='state', value=False)
        rollback_variables(index)

    redis_client = get_redis_client()
    index = 0
    while True:
        sender = Sender(
            credentials,
            source_url,
            'get',
            always_hash_content=False,
        )

        response = requests.get(
            source_url,
            headers={'Authorization': sender.request_header},
        )
        if response.status_code != 200:
            mark_task_failed()
            raise Exception(
                f'GET request to {source_url} is unsuccessful\n'
                f'Message: {response.text}', )
        try:
            sender.accept_response(
                response.headers['Server-Authorization'],
                content=response.content,
                content_type=response.headers['Content-Type'])
        except HawkFail as e:
            mark_task_failed()
            raise Exception(f'HAWK Authentication failed {str(e)}')

        response_json = response.json()
        if 'results' not in response_json or 'next' not in response_json:
            mark_task_failed()
            raise Exception('Unexpected response structure')

        key = f'{run_fetch_task_id}{index}'
        Variable.set(
            key,
            response_json['results'],
            serialize_json=True,
        )
        redis_client.set(key, 1)
        next_page = response_json['next']
        if next_page:
            index += 1
            source_url = next_page
            logging.info('Moving on to the next page')
        else:
            break

    logging.info('Fetching from source completed')
    task_instance.xcom_push(key='state', value=True)
예제 #9
0
HOST = os.getenv("TS_HOST", 'api.threatstack.com')
USER_ID = get_or_throw("TS_USER_ID")
ORGANIZATION_ID = get_or_throw("TS_ORGANIZATION_ID")
API_KEY = get_or_throw("TS_API_KEY")

BASE_PATH = 'https://' + HOST
URI_PATH = '/help/hawk/self-test'

credentials = {'id': USER_ID, 'key': API_KEY, 'algorithm': 'sha256'}
URL = BASE_PATH + URI_PATH
sender = Sender(credentials,
                URL,
                "GET",
                always_hash_content=False,
                ext=ORGANIZATION_ID)

response = requests.get(URL, headers={'Authorization': sender.request_header})
print(response.text)
# Note a warning is logged out during the authenticate call:
# seen_nonce was None; not checking nonce. You may be vulnerable to replay attacks
# This is not an issue because the nonce is randomly generated above and a different
# nonce is used for each request.
sender.accept_response(response.headers['Server-Authorization'],
                       content=response.text,
                       content_type=response.headers['Content-Type'])

# accept_response will throw if the response is not authentic
# after this call we know the response is authentic
print('Response is authentic')