def main(override_args=None):
    starter = BaseScripts()
    logger.debug(f'START: get_query_hash.py')

    # Load initial args
    parser = starter.start('Retrieve a query hash from a query body (a json used for the Advanced Search).')
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_body_path',
        help='path to the json file containing the query body',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    with open(args.query_body_path, 'r') as query_body_file:
        query_body = json.load(query_body_file)
    logger.debug(f'Retrieving query hash for query body: {query_body}')

    advanced_search = AdvancedSearch(endpoint_config, args.env, tokens)

    response = advanced_search.get_threats(query_body, limit=0)
    if not response or 'query_hash' not in response:
        logger.error("Couldn't retrieve a query hash, is the query body valid ?")
        exit(1)
    query_hash = response['query_hash']
    if args.output:
        with open(args.output, 'w') as output:
            output.write(query_hash)
        logger.info(f'Query hash saved in {args.output}')
    else:
        logger.info(f'Query hash associated: {query_hash}')
Exemple #2
0
    def _handle_bulk_search_task(self, task_uuid):
        retrieve_bulk_result_url = self._build_url_for_endpoint(
            'retrieve-bulk-search')
        retrieve_bulk_result_url = retrieve_bulk_result_url.format(
            task_uuid=task_uuid)

        start_time = time()
        back_off_time = 10

        json_response = None
        while not json_response:
            response = requests.get(url=retrieve_bulk_result_url,
                                    headers={'Authorization': self.tokens[0]})
            if response.status_code == 200:
                json_response = response.json()
            elif response.status_code == 401:
                logger.debug('Refreshing expired Token')
                self._token_update(response.json())
            elif time(
            ) - start_time + back_off_time < self.OCD_DTL_MAX_BULK_SEARCH_TIME:
                sleep(back_off_time)
                back_off_time = min(back_off_time * 2,
                                    self.OCD_DTL_MAX_BACK_OFF_TIME)
            else:
                logger.error()
                raise TimeoutError(
                    f'No bulk search result after waiting {self.OCD_DTL_MAX_BULK_SEARCH_TIME / 60:.0f} mins\n'
                    f'task_uuid: "{task_uuid}"')

        return json_response
 def refresh_token(self, refresh_token: str):
     """
     Refresh the current token
     :param refresh_token: str
     """
     logger.debug('Token will be refresh')
     return self.retrieve_token({'Authorization': refresh_token}, True)
Exemple #4
0
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()
    logger.debug(f'START: get_threats_from_query_hash.py')

    # Load initial args
    parser = starter.start(
        'Retrieve a list of response from a given query hash.')
    parser.add_argument(
        '--query_fields',
        help=
        'fields to be retrieved from the threat (default: only the hashkey)\n'
        'If an atom detail isn\'t present in a particular atom, empty string is returned.',
        nargs='+',
        default=['threat_hashkey'],
    )
    parser.add_argument(
        '--list',
        help=
        'Turn the output in a list (require query_fields to be a single element)',
        action='store_true',
    )
    required_named = parser.add_argument_group('required arguments')
    required_named.add_argument(
        'query_hash',
        help='the query hash from which to retrieve the response hashkeys',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    if len(args.query_fields) > 1 and args.list:
        parser.error(
            "List output format is only available if a single element is queried (via query_fields)"
        )

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    logger.debug(
        f'Start to search for threat from the query hash:{args.query_hash}')

    bulk_search = BulkSearch(endpoint_config, args.env, tokens)

    response = bulk_search.get_threats(args.query_hash, args.query_fields)
    original_count = response.get('count', 0)
    logger.info(f'Number of threat that have been retrieved: {original_count}')

    formatted_output = format_output(response, args.list)
    if args.output:
        with open(args.output, 'w') as output:
            output.write(formatted_output)
    else:
        logger.info(formatted_output)

    if args.output:
        logger.info(f'Threats saved in {args.output}')
    else:
        logger.info('Done')
Exemple #5
0
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start(
        'Add tags and/or comments to a specified list of hashkeys.')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to add tags and/or the comment',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line',
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '--tags',
        nargs='+',
        help='add a list of tags',
        required=True,
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    post_engine_add_comments = TagsPost(endpoint_config, args.env, tokens)

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    hashkeys = set(args.hashkeys) if args.hashkeys else set()

    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)

    response_dict = post_engine_add_comments.post_tags(
        hashkeys,
        args.tags,
        public=args.public,
    )

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_tags.py')
Exemple #6
0
 def _post_comment(self, hashkey: str, comment: str, visibility: str = 'organization') -> dict:
     """
     Post comment on threat hashkey
     """
     payload = {
         'content': comment,
         'visibility': visibility,
     }
     url = self.url.format(hashkey=hashkey)
     logger.debug(url)
     return self.datalake_requests(url, 'post', self._post_headers(), payload)
Exemple #7
0
    def handle_bulk_task(self, task_uuid, retrieve_bulk_result_url, *, timeout, additional_checks: List[Check] = None) \
            -> Json:
        """
        Handle a generic bulk task, blocking until the task is done or the timeout is up

        :param task_uuid: uuid of the bulk task
        :param retrieve_bulk_result_url: endpoint to query, must contained a task_uuid field
        :param timeout: timeout after which a TimeoutError is raised
        :param additional_checks: functions to call on a potential json, if all checks return True, the Json is returned
        :return: a Json returned on HTTP 200 validating all additional_checks
        """
        retrieve_bulk_result_url = retrieve_bulk_result_url.format(task_uuid=task_uuid)

        spinner = None
        if logger.isEnabledFor(logging.INFO):
            spinner = Halo(text=f'Waiting for bulk task {task_uuid} response', spinner='dots')
            spinner.start()

        start_time = time()
        back_off_time = 10

        json_response = None
        while not json_response:
            response = requests.get(
                url=retrieve_bulk_result_url,
                headers={'Authorization': self.tokens[0]},
                verify=self.requests_ssl_verify
            )
            if response.status_code == 200:
                potential_json_response = response.json()
                if additional_checks and not all(check(potential_json_response) for check in additional_checks):
                    continue  # the json isn't valid
                if spinner:
                    spinner.succeed(f'bulk task {task_uuid} done')
                json_response = potential_json_response
            elif response.status_code == 401:
                logger.debug('Refreshing expired Token')
                self._token_update(response.json())
            elif time() - start_time + back_off_time < timeout:
                sleep(back_off_time)
                back_off_time = min(back_off_time * 2, self.OCD_DTL_MAX_BACK_OFF_TIME)
            else:
                if spinner:
                    spinner.fail(f'bulk task {task_uuid} timeout')
                logger.error()
                raise TimeoutError(
                    f'No bulk result after waiting {timeout / 60:.0f} mins\n'
                    f'task_uuid: "{task_uuid}"'
                )

        if spinner:
            spinner.stop()
        return json_response
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()
    logger.debug(f'START: get_threats_by_hashkey.py')

    # Load initial args
    parser = starter.start(
        'Retrieve threats (as Json) from a list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threats to retreive',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='list of threats ids (hashkeys) that need to be retrieved',
    )
    parser.add_argument(
        '--lost',
        help='saved hashes that were not found',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")
    threats_list = starter._load_csv(
        args.input_file) if args.input_file else args.hashkeys

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)

    logger.debug(f'TOTAL: {len(threats_list)} threats found')
    url_threats = main_url + endpoint_url['endpoints']['threats']
    search_engine_threats = ThreatsSearch(url_threats, main_url, tokens)
    list_threats, list_lost_hashes = search_engine_threats.get_json(
        threats_list)

    if args.output:
        starter.save_output(args.output, list_threats)
        logger.debug(f'Threats JSON saved in {args.output}\n')
    if args.lost:
        starter.save_output(args.lost, list_lost_hashes)
        logger.debug(f'Threats lost saved in {args.lost}\n')
    logger.debug(f'END: get_threats_by_hashkey.py')
Exemple #9
0
 def _post_comments_and_tags(self,
                             hashkey: str,
                             content: str,
                             tags: list,
                             visibility: str = 'organization') -> dict:
     """
     Post comments and tag on threats hashkey
     """
     payload = {
         'content': content,
         'tags': tags,
         'visibility': visibility,
     }
     logger.debug(f'{self.url}{hashkey}/comments/')
     return self.datalake_requests(f'{self.url}{hashkey}/comments/', 'post',
                                   self._post_headers(), payload)
Exemple #10
0
    def add_threats(self, atom_list: list, atom_type: str, is_whitelist: bool, threats_score: Dict[str, int],
                    is_public: bool, tags: list, links: list, override_type: str) -> dict:
        """
        Use it to add a list of threats to the API.

        :param atom_list: atoms that needs to be added.
        :param atom_type: must be one of the _authorized_atom_value
        :param is_whitelist: if true the score will be set to 0
        :param threats_score:  a dict that contain {threat_type -> score}
        :param is_public: if true the added threat will be public else will be reserved to organization
        :param tags: a list of tags to add
        :param links: external_analysis_link to include with each atoms
        :param override_type: either 'permanent' or 'temporary'. Permanent don't allow future automatic score change
        """
        payload = {
            'override_type': override_type,
            'public': is_public,
            'threat_data': {
                'content': {},
                'scores': [],
                'threat_types': [],
                'tags': tags
            }
        }
        if is_whitelist:
            for threat in self.authorized_threats_value:
                payload['threat_data']['scores'].append({'score': {'risk': 0}, 'threat_type': threat})
                payload['threat_data']['threat_types'].append(threat)
        else:
            for threat, score in threats_score.items():
                payload['threat_data']['scores'].append({'score': {'risk': score}, 'threat_type': threat})
                payload['threat_data']['threat_types'].append(threat)

        return_value = {'results': []}
        for atom in atom_list:
            if not atom:  # empty value
                logger.info(f'EMPTY ATOM {atom.ljust(self.terminal_size - 6, " ")} \x1b[0;30;41m  KO  \x1b[0m')
                continue
            response_dict = self._add_new_atom(atom, atom_type, payload, links)

            if response_dict.get('atom_value'):
                logger.info(atom.ljust(self.terminal_size - 6, ' ') + '\x1b[0;30;42m' + '  OK  ' + '\x1b[0m')
                return_value['results'].append(response_dict)
            else:
                logger.info(atom.ljust(self.terminal_size - 6, ' ') + '\x1b[0;30;41m' + '  KO  ' + '\x1b[0m')
                logger.debug(response_dict)
        return return_value
Exemple #11
0
 def _post_tags_to_hashkey(self, hashkey: str, tags: List[str], visibility: str = 'organization') -> dict:
     """
     Post tags on a single threat hashkey
     """
     tags_payload = []
     for tag in tags:
         tags_payload.append(
             {
                 'name': tag,
                 'visibility': visibility,
             }
         )
     payload = {
         'tags': tags_payload,
     }
     url = self.url.format(hashkey=hashkey)
     logger.debug(url)
     return self.datalake_requests(url, 'post', self._post_headers(), payload)
Exemple #12
0
    def _post_new_score(self,
                        hashkey: str,
                        scores: Dict[str, int],
                        override_type: str = 'temporary') -> dict:
        """
        Post new score to the API
        """
        payload = {'override_type': override_type, 'scores': []}
        for threat_type, score in scores.items():
            if score is None:
                return {'message': 'No score to modify'}
            payload['scores'].append({
                'threat_type': threat_type,
                'score': {
                    'risk': score
                }
            })

        logger.debug('url : ' + repr(self.url))
        return self.datalake_requests(f'{self.url}{hashkey}/scoring-edits/',
                                      'post', self._post_headers(), payload)
    def refresh_token(self, refresh_token: str):
        """
        Refresh the current token
        :param refresh_token: str
        """
        logger.debug('Token will be refresh')
        headers = {'Authorization': refresh_token}
        response = requests.post(url=self.url_refresh, headers=headers, verify=self.requests_ssl_verify)

        json_response = json.loads(response.text)
        if response.status_code == 401 and json_response.get('msg') == 'Token has expired':
            logger.debug('Refreshing the refresh token')
            # Refresh token is also expired, we need to restart the authentication from scratch
            return self.get_token()
        elif 'access_token' in json_response:
            return json_response
        # else an error occurred

        logger.error(f'An error occurred while refreshing the refresh token, for URL: {self.url_refresh}\n'
                     f'response of the API: {response.text}')
        exit(1)
    def retrieve_token(self, data: dict, refresh_token: bool):
        """
        Generate a token from data, if the refresh_token is set to True,
        then it will refresh a token, else it will create a new token.

            Variable data is the refresh token in case of refresh_token.
            Variable data is the header in case of not refresh_token.

        :param data: dict
        :param refresh_token: bool
        :return dict
        """
        if refresh_token:
            raw_res = requests.post(url=self.url_refresh, headers=data)
        else:
            raw_res = requests.post(url=self.url_token, json=data)
        api_response = json.loads(raw_res.text)
        if 'access_token' in api_response.keys():
            return api_response

        logger.debug('ERROR :  Wrong requests, please refer to the API')

        logger.debug(
            f'for URL: {self.url_refresh if refresh_token else self.url_token}\n'
        )
        logger.debug(raw_res.text)
        return
Exemple #15
0
    def _send_request(self, url: str, method: str, headers: dict, data: dict):
        """
        Send the correct http request to url from method [get, post, delete, patch, put].
        Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above.

        :param url: str
        :param method: str
        :param data: dict
        :param headers: dict
        :param tokens: list
        :return: str
        """
        common_kwargs = {
            'url': url,
            'headers': headers,
            'verify': self.requests_ssl_verify
        }

        if method == 'get':
            api_response = requests.get(**common_kwargs)
        elif method == 'post':
            api_response = requests.post(**common_kwargs,
                                         data=json.dumps(data))
        elif method == 'delete':
            api_response = requests.delete(**common_kwargs,
                                           data=json.dumps(data))
        elif method == 'patch':
            api_response = requests.patch(**common_kwargs,
                                          data=json.dumps(data))
        elif method == 'put':
            api_response = requests.put(**common_kwargs, data=json.dumps(data))
        else:
            logger.debug(
                'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method'
            )
            raise TypeError('Unknown method to requests %s', method)
        return api_response
Exemple #16
0
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API
        """
        self.headers = headers
        tries_left = self.SET_MAX_RETRY

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            headers['Authorization'] = self.tokens[0]
        while True:
            response = self._send_request(url, method, headers, post_body)
            logger.debug(f'API response:\n{str(response.text)}')
            if response.status_code == 401:
                logger.warning(
                    'Token expired or Missing authorization header. Updating token'
                )
                self._token_update(self._load_response(response))
            elif response.status_code == 422:
                logger.warning('Bad authorization header. Updating token')
                logger.debug(f'422 HTTP code: {response.text}')
                self._token_update(self._load_response(response))
            elif response.status_code < 200 or response.status_code > 299:
                logger.error(
                    f'API returned non 2xx response code : {response.status_code}\n{response.text}'
                    f'\n Retrying')
            else:
                try:
                    dict_response = self._load_response(response)
                    return dict_response
                except JSONDecodeError:
                    logger.error(
                        'Request unexpectedly returned non dict value. Retrying'
                    )
            tries_left -= 1
            if tries_left <= 0:
                logger.error(
                    'Request failed: Will return nothing for this request')
                return {}
Exemple #17
0
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API.
        """
        tries_left = self.SET_MAX_RETRY
        api_response = None

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            headers['Authorization'] = self.tokens[0]

        while tries_left > 0:
            try:
                response = self._send_request(url, method, headers, post_body)
                dict_response = self._load_response(response)
                if self._token_update(dict_response):
                    return dict_response

            except:
                tries_left -= 1
                if tries_left <= 0:
                    logger.warning(
                        'Request failed: Will return nothing for this request')
                    return {}
                elif not api_response:
                    logger.debug(
                        'ERROR : Something has gone wrong with requests ...')
                    logger.debug('sleep 5 seconds')
                    time.sleep(5)
                else:
                    logger.warning(
                        'ERROR :  Wrong requests, please refer to the API')
                    logger.warning(
                        f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n'
                    )
                    logger.warning(api_response.text)
Exemple #18
0
class BaseEngine:
    OCD_DTL_QUOTA_TIME = int(os.getenv('OCD_DTL_QUOTA_TIME', 1))
    OCD_DTL_REQUESTS_PER_QUOTA_TIME = int(
        os.getenv('OCD_DTL_REQUESTS_PER_QUOTA_TIME', 5))
    logger.debug(
        f'Throttle selected: {OCD_DTL_REQUESTS_PER_QUOTA_TIME} queries per {OCD_DTL_QUOTA_TIME}s'
    )

    SET_MAX_RETRY = 3

    def __init__(self, url: str, token_url: str, tokens: list):
        self.url = url
        self.token_url = token_url
        self.tokens = tokens
        self.terminal_size = self._get_size_terminal()
        self.token_generator = TokenGenerator(token_url)

        self.SET_MAX_RETRY = 3

    def _get_size_terminal(self) -> int:
        """
        Return the terminal size for pretty print
        """
        stty_sizes = os.popen('stty size', 'r').read().split()
        if len(stty_sizes) >= 2:
            return int(stty_sizes[1])
        else:  # Return default terminal size
            return 80

    @throttle(
        period=OCD_DTL_QUOTA_TIME,
        call_per_period=OCD_DTL_REQUESTS_PER_QUOTA_TIME,
    )
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API.
        """
        tries_left = self.SET_MAX_RETRY
        api_response = None

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            headers['Authorization'] = self.tokens[0]

        while tries_left > 0:
            try:
                response = self._send_request(url, method, headers, post_body)
                dict_response = self._load_response(response)
                if self._token_update(dict_response):
                    return dict_response

            except:
                tries_left -= 1
                if tries_left <= 0:
                    logger.warning(
                        'Request failed: Will return nothing for this request')
                    return {}
                elif not api_response:
                    logger.debug(
                        'ERROR : Something has gone wrong with requests ...')
                    logger.debug('sleep 5 seconds')
                    time.sleep(5)
                else:
                    logger.warning(
                        'ERROR :  Wrong requests, please refer to the API')
                    logger.warning(
                        f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n'
                    )
                    logger.warning(api_response.text)

    def _send_request(self, url: str, method: str, headers: dict, data: dict):
        """
        Send the correct http request to url from method [get, post, delete, patch, put].
        Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above.

        :param url: str
        :param method: str
        :param data: dict
        :param headers: dict
        :param tokens: list
        :return: str
        """
        if method == 'get':
            api_response = requests.get(url=url, headers=headers)
        elif method == 'post':
            api_response = requests.post(url=url,
                                         headers=headers,
                                         data=json.dumps(data))
        elif method == 'delete':
            api_response = requests.delete(url=url,
                                           headers=headers,
                                           data=json.dumps(data))
        elif method == 'patch':
            api_response = requests.patch(url=url,
                                          headers=headers,
                                          data=json.dumps(data))
        elif method == 'put':
            api_response = requests.put(url=url,
                                        headers=headers,
                                        data=json.dumps(data))
        else:
            logger.debug(
                'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method'
            )
            raise TypeError('Unknown method to requests %s', method)
        return api_response

    def _load_response(self, api_response: Response):
        """
        Load the API response from JSON format to dict.
        The endpoint for events is a bit special, the json.loads() doesn't work for the return format of the API.
        We get for this special case a return dict containing the length of the response i.e.:

            if length of response ==  3 then: no events

        :param: api_response: dict
        :return: dict_response
        """
        if api_response.text.startswith('[') and api_response.text.endswith(
                ']\n'):
            # This condition is for the date-histogram endpoints
            dict_response = {'response_length': len(api_response.text)}
        else:
            dict_response = json.loads(api_response.text)
        return dict_response

    def _token_update(self, dict_response: dict):
        """
        Allow to update token when API response is either Missing Authorization Header
        or Token has expired. Return False is the token has been regenerated.

        :param dict_response: dict
        :return: Bool
        """
        if dict_response.get('msg') == 'Missing Authorization Header':
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            self.headers['Authorization'] = self.tokens[0]
            return False

        elif dict_response.get('msg') == 'Token has expired':
            fresh_token = self.token_generator.refresh_token(self.tokens[1])
            self.tokens = [
                f'Token {fresh_token["access_token"]}', self.tokens[1]
            ]
            self.headers['Authorization'] = self.tokens[0]
            return False

        return True

    def _pretty_debug_request(self, url: str, method: str, data: dict,
                              headers: dict, tokens: list):
        """
        Return pretty debug string

        :param url: str
        :param method: str
        :param data: dict
        :param headers: dict
        :param tokens: list
        :return: str
        """
        debug = ('-' * self.terminal_size + 'DEBUG - datalake_requests:\n' +
                 f' - url: \n{url}\n' + f' - method: \n{method}\n' +
                 f' - headers: \n{headers}\n' + f' - data: \n{data}\n' +
                 f' - token: \n{tokens[0]}\n' +
                 f' - refresh_token: \n{tokens[1]}\n' +
                 '-' * self.terminal_size)
        return debug
Exemple #19
0
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Edit scores of a specified list of ids (hashkeys)')
    parser.add_argument(
        'hashkeys',
        help='hashkeys of the threat to edit score.',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input_file',
        help='hashkey txt file, with one hashkey by line.',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'Choose specific threat types and their score, like: ddos 50 scam 15.',
    )
    parser.add_argument(
        '--permanent',
        help=
        '''Permanent: all values will override any values provided by both newer and
            older IOCs. Newer IOCs with override_type permanent can still override old permanent changes.
            temporary: all values should override any values provided by older IOCs,
            but not newer ones.''',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()

    logger.debug(f'START: edit_score.py')

    if not args.hashkeys and not args.input_file:
        parser.error("either a hashkey or an input_file is required")

    if not args.threat_types or len(args.threat_types) % 2 != 0:
        parser.error("threat_types invalid ! should be like: ddos 50 scam 15")
    parsed_threat_type = AddThreatsPost.parse_threat_types(args.threat_types)

    hashkeys = set(args.hashkeys) if args.hashkeys else set()
    if args.input_file:
        retrieve_hashkeys_from_file(args.input_file, hashkeys)

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)
    url_threats = main_url + endpoint_url['endpoints']['threats']
    post_engine_edit_score = ThreatsScoringPost(url_threats, main_url, tokens)

    response_dict = post_engine_edit_score.post_new_score_from_list(
        hashkeys,
        parsed_threat_type,
        'permanent' if args.permanent else 'temporary',
    )

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.info(f'Results saved in {args.output}\n')
    logger.debug(f'END: edit_score.py')
def main(override_args=None):
    """Method to start the script"""

    # Load initial args
    starter = BaseScripts()
    parser = starter.start(
        'Gets threats or hashkeys from given atom types and atom values.')
    supported_atom_types = parser.add_argument_group('Supported Atom Types')

    parser.add_argument(
        'untyped_atoms',
        help=
        'untyped atom values to lookup. Useful when you do not know what is the atom type',
        nargs='*',
    )
    for atom_type in ATOM_TYPES_FLAGS:
        supported_atom_types.add_argument(
            f'--{atom_type}',
            action='append',
            help=f'set a single {atom_type} atom type with its value',
        )
    parser.add_argument(
        '-ad',
        '--atom-details',
        dest='hashkey_only',
        default=True,
        action='store_false',
        help='returns threats full details',
    )
    parser.add_argument(
        '-i',
        '--input',
        action='append',
        help='read threats to add from FILE. [atomtype:path/to/file.txt]',
    )
    parser.add_argument(
        '-ot',
        '--output-type',
        help='set to the output type desired {json,csv}. Default is json',
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: bulk_lookup_threats.py')

    # create output type header
    accept_header = {'Accept': None}

    if args.output_type:
        try:
            accept_header['Accept'] = BaseEngine.output_type2header(
                args.output_type)
        except ParserError as e:
            logger.exception(
                f'Exception raised while getting output type headers # {str(e)}',
                exc_info=False)
            exit(1)

    # to gather all typed atoms passed by arguments and input files
    typed_atoms = {}

    # set validations flags regarding the presence or absence of cli arguments
    has_file = False if args.input is None else True
    has_flag = False
    for flag in ATOM_TYPES_FLAGS:
        atom_values = getattr(args, flag)
        if atom_values is not None:
            typed_atoms[flag] = atom_values
            has_flag = True

    # validate that at least there is one untyped atom or one atom or one input file
    if (not has_flag and not has_file
            and not args.untyped_atoms) or (SUBCOMMAND_NAME
                                            in args.untyped_atoms):
        parser.error(
            "you must provide at least one of following: untyped atom, atom type, input file."
        )

    # process input files
    if has_file:
        for input_file in args.input:
            file_atom_type, filename = get_atom_type_from_filename(input_file)
            logger.debug(f'file {filename} was recognized as {file_atom_type}')

            if file_atom_type == UNTYPED_ATOM_TYPE:
                args.untyped_atoms += starter._load_list(filename)
            else:
                typed_atoms.setdefault(file_atom_type,
                                       []).extend(starter._load_list(filename))

    # load api_endpoints and tokens
    endpoints_config, main_url, tokens = starter.load_config(args)
    post_engine_bulk_lookup_threats = BulkLookupThreats(
        endpoints_config, args.env, tokens)
    post_engine_atom_values_extractor = AtomValuesExtractor(
        endpoints_config, args.env, tokens)

    # lookup for atom types
    if args.untyped_atoms:
        atoms_values_extractor_response = post_engine_atom_values_extractor.atom_values_extract(
            args.untyped_atoms)
        if atoms_values_extractor_response['found'] > 0:
            typed_atoms = join_dicts(
                typed_atoms, atoms_values_extractor_response['results'])
        else:
            logger.warning('none of your untyped atoms could be typed')

        # find out what atoms couldn't be typed for printing them
        if atoms_values_extractor_response['not_found'] > 0:
            for atom_type, atom_list in atoms_values_extractor_response[
                    'results'].items():
                args.untyped_atoms = [
                    untyped_atom for untyped_atom in args.untyped_atoms
                    if untyped_atom not in
                    atoms_values_extractor_response['results'][atom_type]
                ]

            logger.warning(
                f'\x1b[6;37;43m{"#" * 60} UNTYPED ATOMS {"#" * 47}\x1b[0m')
            logger.warning('\n'.join(args.untyped_atoms))
            logger.warning('')

    response = post_engine_bulk_lookup_threats.bulk_lookup_threats(
        threats=typed_atoms,
        additional_headers=accept_header,
        hashkey_only=args.hashkey_only)

    pretty_print(response, args.output_type)

    if args.output:
        starter.save_output(args.output, response)
        logger.debug(f'Results saved in {args.output}\n')

    logger.debug(f'END: lookup_threats.py')
Exemple #21
0
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')

    parser.add_argument(
        'threats',
        help='threats to lookup',
        nargs='*',
    )
    parser.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
    )
    parser.add_argument(
        '-td',
        '--threat_details',
        action='store_true',
        help='set if you also want to have access to the threat details ',
    )
    parser.add_argument(
        '-ot',
        '--output_type',
        default='json',
        help=
        'set to the output type desired {json,csv}. Default is json if not specified',
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: lookup_threats.py')

    if not args.threats and not args.input:
        parser.error("either a threat or an input_file is required")

    if args.atom_type not in PostEngine.authorized_atom_value:
        parser.error("atom type must be in {}".format(','.join(
            PostEngine.authorized_atom_value)))

    args.output_type = output_type2header(args.output_type, parser)
    hashkey_only = not args.threat_details
    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    get_engine_lookup_threats = LookupThreats(endpoint_config, args.env,
                                              tokens)
    list_threats = list(args.threats) if args.threats else []
    if args.input:
        if args.is_csv:
            try:
                list_threats = list_threats + starter._load_csv(
                    args.input, args.delimiter, args.column - 1)
            except ValueError as ve:
                logger.error(ve)
                exit()
        else:
            list_threats = list_threats + starter._load_list(args.input)
    list_threats = list(OrderedDict.fromkeys(
        list_threats))  # removing duplicates while preserving order
    response_dict = get_engine_lookup_threats.lookup_threats(
        list_threats, args.atom_type, hashkey_only, args.output_type)

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: lookup_threats.py')
Exemple #22
0
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')
    required_named.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
        required=True,
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help='set it to define the added threats as whitelist',
        action='store_true',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help=
        'choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
    )
    parser.add_argument(
        '--tag',
        nargs='+',
        help='add a list of tags',
        default=[],
    )
    parser.add_argument(
        '--link',
        help='add link as external_analysis_link',
        nargs='+',
    )
    parser.add_argument(
        '--permanent',
        help=
        'sets override_type to permanent. Scores won\'t be updated by the algorithm. Default is temporary',
        action='store_true',
    )
    parser.add_argument(
        '--no-bulk',
        help=
        'force an api call for each threats, useful to retrieve the details of threats created',
        action='store_true',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: add_new_threats.py')

    if not args.threat_types and not args.whitelist:
        parser.error(
            "threat types is required if the atom is not for whitelisting")

    permanent = 'permanent' if args.permanent else 'temporary'

    if args.is_csv:
        try:
            list_new_threats = starter._load_csv(args.input, args.delimiter,
                                                 args.column - 1)
        except ValueError as ve:
            logger.error(ve)
            exit()
    else:
        list_new_threats = starter._load_list(args.input)
    list_new_threats = defang_threats(list_new_threats, args.atom_type)
    list_new_threats = list(OrderedDict.fromkeys(
        list_new_threats))  # removing duplicates while preserving order
    threat_types = ThreatsPost.parse_threat_types(args.threat_types) or []

    # Load api_endpoints and tokens
    endpoint_config, main_url, tokens = starter.load_config(args)
    if args.no_bulk:
        post_engine_add_threats = ThreatsPost(endpoint_config, args.env,
                                              tokens)
        response_dict = post_engine_add_threats.add_threats(
            list_new_threats, args.atom_type, args.whitelist, threat_types,
            args.public, args.tag, args.link, permanent)
    else:
        post_engine_add_threats = BulkThreatsPost(endpoint_config, args.env,
                                                  tokens)
        hashkeys = post_engine_add_threats.add_bulk_threats(
            list_new_threats, args.atom_type, args.whitelist, threat_types,
            args.public, args.tag, args.link, permanent)
        response_dict = {'haskeys': list(hashkeys)}

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_new_threats.py')
def main(override_args=None):
    """Method to start the script"""
    starter = BaseScripts()

    # Load initial args
    parser = starter.start('Submit a new threat to Datalake from a file')
    required_named = parser.add_argument_group('required arguments')
    csv_controle = parser.add_argument_group('CSV control arguments')
    required_named.add_argument(
        '-i',
        '--input',
        help='read threats to add from FILE',
        required=True,
    )
    required_named.add_argument(
        '-a',
        '--atom_type',
        help='set it to define the atom type',
        required=True,
    )
    csv_controle.add_argument(
        '--is_csv',
        help='set if the file input is a CSV',
        action='store_true',
    )
    csv_controle.add_argument(
        '-d',
        '--delimiter',
        help='set the delimiter of the CSV file',
        default=',',
    )
    csv_controle.add_argument(
        '-c',
        '--column',
        help='select column of the CSV file, starting at 1',
        type=int,
        default=1,
    )
    parser.add_argument(
        '-p',
        '--public',
        help='set the visibility to public',
        action='store_true',
    )
    parser.add_argument(
        '-w',
        '--whitelist',
        help='set it to define the added threats as whitelist',
        action='store_true',
    )
    parser.add_argument(
        '-t',
        '--threat_types',
        nargs='+',
        help='choose specific threat types and their score, like: ddos 50 scam 15',
        default=[],
    )
    parser.add_argument(
        '--tag',
        nargs='+',
        help='add a list of tags',
        default=[],
    )
    parser.add_argument(
        '--link',
        help='add link as external_analysis_link',
        nargs='+',
    )
    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: add_new_threats.py')

    if not args.threat_types and not args.whitelist:
        parser.error("threat types is required if the atom is not for whitelisting")

    # Load api_endpoints and tokens
    endpoint_url, main_url, tokens = starter.load_config(args)
    url_manual_threats = main_url + endpoint_url['endpoints']['threats-manual']
    post_engine_add_threats = AddThreatsPost(url_manual_threats, main_url, tokens)
    if args.is_csv:
        list_new_threats = starter._load_csv(args.input, args.delimiter, args.column - 1)
    else:
        list_new_threats = starter._load_list(args.input)
    threat_types = AddThreatsPost.parse_threat_types(args.threat_types) or []
    response_dict = post_engine_add_threats.add_threats(
        list_new_threats,
        args.atom_type,
        args.whitelist,
        threat_types,
        args.public,
        args.tag,
        args.link,
    )

    if args.output:
        starter.save_output(args.output, response_dict)
        logger.debug(f'Results saved in {args.output}\n')
    logger.debug(f'END: add_new_threats.py')
Exemple #24
0
class BaseEngine:
    OCD_DTL_QUOTA_TIME = int(os.getenv('OCD_DTL_QUOTA_TIME', 1))
    OCD_DTL_REQUESTS_PER_QUOTA_TIME = int(
        os.getenv('OCD_DTL_REQUESTS_PER_QUOTA_TIME', 5))
    logger.debug(
        f'Throttle selected: {OCD_DTL_REQUESTS_PER_QUOTA_TIME} queries per {OCD_DTL_QUOTA_TIME}s'
    )

    Json = Union[
        dict, list]  # json like object that can be a dict or root level array

    SET_MAX_RETRY = 3

    def __init__(self, endpoint_config: dict, environment: str, tokens: list):
        self.endpoint_config = endpoint_config
        self.environment = environment
        self.requests_ssl_verify = suppress_insecure_request_warns(environment)
        self.url = self._build_url(endpoint_config, environment)
        self.tokens = tokens
        self.terminal_size = self._get_size_terminal()
        self.token_generator = TokenGenerator(endpoint_config,
                                              environment=environment)
        self.headers = None
        self.SET_MAX_RETRY = 3

    def _get_size_terminal(self) -> int:
        """
        Return the terminal size for pretty print
        """
        stty_sizes = os.popen('stty size', 'r').read().split()
        if len(stty_sizes) >= 2:
            return int(stty_sizes[1])
        else:  # Return default terminal size
            return 80

    @throttle(
        period=OCD_DTL_QUOTA_TIME,
        call_per_period=OCD_DTL_REQUESTS_PER_QUOTA_TIME,
    )
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API
        """
        self.headers = headers
        tries_left = self.SET_MAX_RETRY

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.replace_tokens(fresh_tokens)
        while True:
            response = self._send_request(url, method, headers, post_body)
            logger.debug(f'API response:\n{str(response.text)}')
            if response.status_code == 401:
                logger.warning(
                    'Token expired or Missing authorization header. Updating token'
                )
                self._token_update(self._load_response(response))
            elif response.status_code == 422:
                logger.warning('Bad authorization header. Updating token')
                logger.debug(f'422 HTTP code: {response.text}')
                self._token_update(self._load_response(response))
            elif response.status_code < 200 or response.status_code > 299:
                logger.error(
                    f'API returned non 2xx response code : {response.status_code}\n{response.text}'
                    f'\n Retrying')
            else:
                try:
                    dict_response = self._load_response(response)
                    return dict_response
                except JSONDecodeError:
                    logger.error(
                        'Request unexpectedly returned non dict value. Retrying'
                    )
            tries_left -= 1
            if tries_left <= 0:
                logger.error(
                    'Request failed: Will return nothing for this request')
                return {}
            # time.sleep(5)

    def _send_request(self, url: str, method: str, headers: dict, data: dict):
        """
        Send the correct http request to url from method [get, post, delete, patch, put].
        Raise a TypeError 'Unknown method to requests {method}' when the method is not one of the above.

        :param url: str
        :param method: str
        :param data: dict
        :param headers: dict
        :param tokens: list
        :return: str
        """
        common_kwargs = {
            'url': url,
            'headers': headers,
            'verify': self.requests_ssl_verify
        }

        if method == 'get':
            api_response = requests.get(**common_kwargs)
        elif method == 'post':
            api_response = requests.post(**common_kwargs,
                                         data=json.dumps(data))
        elif method == 'delete':
            api_response = requests.delete(**common_kwargs,
                                           data=json.dumps(data))
        elif method == 'patch':
            api_response = requests.patch(**common_kwargs,
                                          data=json.dumps(data))
        elif method == 'put':
            api_response = requests.put(**common_kwargs, data=json.dumps(data))
        else:
            logger.debug(
                'ERROR : Wrong requests, please only do [get, post, put, patch, delete] method'
            )
            raise TypeError('Unknown method to requests %s', method)
        return api_response

    def _load_response(self, api_response: Response):
        """
        Load the API response from JSON format to dict.
        The endpoint for events is a bit special, the json.loads() doesn't work for the return format of the API.
        We get for this special case a return dict containing the length of the response i.e.:

            if length of response ==  3 then: no events

        :param: api_response: dict
        :return: dict_response
        """
        if api_response.text.startswith('[') and api_response.text.endswith(
                ']\n'):
            # This condition is for the date-histogram endpoints
            dict_response = {'response_length': len(api_response.text)}
        else:
            dict_response = json.loads(api_response.text)
        return dict_response

    def _token_update(self, dict_response: dict):
        """
        Allow to update token when API response is either Missing Authorization Header
        or Token has expired. Return False is the token has been regenerated.

        :param dict_response: dict
        :return: Bool
        """
        if dict_response.get('msg') == 'Missing Authorization Header':
            fresh_tokens = self.token_generator.get_token()
            self.replace_tokens(fresh_tokens)
            return False
        elif dict_response.get(
                'msg'
        ) == 'Bad Authorization header. Expected value \'Token <JWT>\'':
            fresh_tokens = self.token_generator.get_token()
            self.replace_tokens(fresh_tokens)
            return False
        elif dict_response.get('msg') == 'Token has expired':
            fresh_tokens = self.token_generator.refresh_token(self.tokens[1])
            self.replace_tokens(fresh_tokens)
            return False

        return True

    def replace_tokens(self, fresh_tokens: dict):
        access_token = fresh_tokens["access_token"]
        # Update of the refresh token is optional
        refresh_token = fresh_tokens.get('refresh_token',
                                         self.tokens[1].replace('Token ', ''))

        self.tokens = [f'Token {access_token}', f'Token {refresh_token}']
        self.headers['Authorization'] = self.tokens[0]

    def _pretty_debug_request(self, url: str, method: str, data: dict,
                              headers: dict, tokens: list):
        """
        Return pretty debug string

        :param url: str
        :param method: str
        :param data: dict
        :param headers: dict
        :param tokens: list
        :return: str
        """
        debug = ('-' * self.terminal_size + 'DEBUG - datalake_requests:\n' +
                 f' - url: \n{url}\n' + f' - method: \n{method}\n' +
                 f' - headers: \n{headers}\n' + f' - data: \n{data}\n' +
                 f' - token: \n{tokens[0]}\n' +
                 f' - refresh_token: \n{tokens[1]}\n' +
                 '-' * self.terminal_size)
        return debug

    def _build_url(self, endpoint_config: dict, environment: str):
        """To be implemented by each subclass"""
        raise NotImplemented()

    def _build_url_for_endpoint(self, endpoint_name):
        base_url = urljoin(self.endpoint_config['main'][self.environment],
                           self.endpoint_config['api_version'])
        enpoints = self.endpoint_config['endpoints']
        return urljoin(base_url, enpoints[endpoint_name], allow_fragments=True)