Beispiel #1
0
    def queue_bulk_threats(self, atom_list, payload):
        hashkey_created = []
        bulk_in_flight = []  # bulk task uuid unchecked

        for batch in split_list(atom_list, self._batch_size()):
            if len(bulk_in_flight) >= self.OCD_DTL_MAX_BULK_THREATS_IN_FLIGHT:
                bulk_threat_task_uuid = bulk_in_flight.pop(0)
                hashkey_created += self.check_bulk_threats_added(bulk_threat_task_uuid)

            payload['atom_values'] = '\n'.join(batch)  # Raw csv expected
            response = self.datalake_requests(self.url, 'post', self._post_headers(), payload)

            task_uid = response.get('task_uuid')
            if task_uid:
                bulk_in_flight.append(response['task_uuid'])
            else:
                logger.warning(f'batch of threats from {batch[0]} to {batch[-1]} failed to be created')

        # Finish to check the other bulk tasks
        for bulk_threat_task_uuid in bulk_in_flight:
            hashkey_created += self.check_bulk_threats_added(bulk_threat_task_uuid)

        nb_threats = len(hashkey_created)
        if nb_threats > 0:
            ok_sign = '\x1b[0;30;42m' + '  OK  ' + '\x1b[0m'
            logger.info(f'Created {nb_threats} threats'.ljust(self.terminal_size - 6, ' ') + ok_sign)
        else:
            ko_sign = '\x1b[0;30;41m' + '  KO  ' + '\x1b[0m'
            logger.info(f'Failed to create any threats'.ljust(self.terminal_size - 6, ' ') + ko_sign)
        return set(hashkey_created)
Beispiel #2
0
    def check_bulk_threats_added(self, bulk_threat_task_uuid) -> list:
        """Check if the bulk manual threat submission completed successfully and if so return the hashkeys created"""

        def is_completed_task(json_response):
            return json_response['state'] in ('DONE', 'CANCELLED')

        hashkey_created = []
        url = self._build_url_for_endpoint('retrieve-threats-manual-bulk')

        try:
            response = self.handle_bulk_task(
                bulk_threat_task_uuid,
                url,
                timeout=self.OCD_DTL_MAX_BULK_THREATS_TIME,
                additional_checks=[is_completed_task]
            )
        except TimeoutError:
            response = {}

        hashkeys = response.get('hashkeys')
        atom_values = response.get('atom_values')

        # if the state is not DONE we consider the batch a failure
        if hashkeys and response.get('state', 'CANCELLED') == 'DONE':
            for hashkey in hashkeys:
                hashkey_created.append(hashkey)
        else:
            # default values in case the json is missing some fields
            hashkeys = hashkeys or ['<missing value>']
            atom_values = atom_values or ['<missing value>']
            logger.warning(f'batch of threats from {atom_values[0]}({hashkeys[0]}) to {atom_values[-1]}({hashkeys[-1]})'
                           f' failed to be created during task {bulk_threat_task_uuid}')
        return hashkey_created
Beispiel #3
0
def defang_threats(threats, atom_type):
    defanged = []
    # matches urls like http://www.website.com:444/file.html
    standard_url_regex = re.compile(r'^(https?:\/\/)[a-z0-9]+([\-\.][a-z0-9]+)*\.[a-z]{2,5}(:[0-9]{1,5})?(\/.*)?$')
    # matches urls like http://185.25.5.3:8080/result.php (ipv4 or ipv6)
    ip_url_regex = re.compile(r'^(https?:\/\/)[0-9a-zA-Z]{1,4}([\.:][0-9a-zA-Z]{1,4}){3,7}(:[0-9]{1,5})?(\/.*)?$')
    for threat in threats:
        unmodified_threat = threat
        threat = threat.replace('[.]', '.')
        threat = threat.replace('(.)', '.')
        if atom_type == 'url':
            if not threat.startswith('http'):
                if threat.startswith('hxxp'):
                    threat = threat.replace('hxxp', 'http')
                elif threat.startswith('ftp'):
                    threat = threat.replace('ftp', 'http')
                elif threat.startswith('sftp'):
                    threat = threat.replace('sftp', 'https')
                else:
                    threat = 'http://' + threat
            if not standard_url_regex.match(threat) and not ip_url_regex.match(threat):
                logger.warning(f'\'{unmodified_threat}\' has been modified as \'{threat}\' but is still not recognized'
                               f' as an url. Skipping this line')
                continue
            if unmodified_threat != threat:
                logger.info(f'\'{unmodified_threat}\' has been modified as \'{threat}\'')
        defanged.append(threat)
    return defanged
Beispiel #4
0
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API
        """
        self.headers = headers
        tries_left = self.SET_MAX_RETRY

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            headers['Authorization'] = self.tokens[0]
        while True:
            response = self._send_request(url, method, headers, post_body)
            logger.debug(f'API response:\n{str(response.text)}')
            if response.status_code == 401:
                logger.warning(
                    'Token expired or Missing authorization header. Updating token'
                )
                self._token_update(self._load_response(response))
            elif response.status_code == 422:
                logger.warning('Bad authorization header. Updating token')
                logger.debug(f'422 HTTP code: {response.text}')
                self._token_update(self._load_response(response))
            elif response.status_code < 200 or response.status_code > 299:
                logger.error(
                    f'API returned non 2xx response code : {response.status_code}\n{response.text}'
                    f'\n Retrying')
            else:
                try:
                    dict_response = self._load_response(response)
                    return dict_response
                except JSONDecodeError:
                    logger.error(
                        'Request unexpectedly returned non dict value. Retrying'
                    )
            tries_left -= 1
            if tries_left <= 0:
                logger.error(
                    'Request failed: Will return nothing for this request')
                return {}
Beispiel #5
0
 def post_new_score_from_list(self, hashkeys: list, scores: Dict[str, int],
                              override_type: str = 'temporary') -> list:
     """
     Post new score to the API from a list of hashkeys
     """
     return_value = []
     for hashkey in hashkeys:
         response = self._post_new_score(hashkey, scores, override_type)
         if response.get('message'):
             logger.warning('\x1b[6;30;41m' + hashkey + ': ' + response.get('message') + '\x1b[0m')
             return_value.append(hashkey + ': ' + response.get('message'))
         else:
             return_value.append(hashkey + ': OK')
             logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m')
     return return_value
Beispiel #6
0
 def post_tags(self, hashkeys: Set[str], tags: List[str], *, public=True) -> list:
     """
     Post tags on threat hashkeys
     """
     visibility = 'public' if public else 'organization'
     return_value = []
     for hashkey in hashkeys:
         response = self._post_tags_to_hashkey(hashkey, tags, visibility)
         if response.get('message'):
             logger.warning('\x1b[6;30;41m' + hashkey + ': ' + response.get('message') + '\x1b[0m')
             return_value.append(hashkey + ': ' + response.get('message'))
         else:
             return_value.append(hashkey + ': OK')
             logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m')
     return return_value
Beispiel #7
0
 def post_comments_and_tags_from_list(self,
                                      hashkeys: Set[str],
                                      content: str,
                                      tags: list,
                                      *,
                                      public=True) -> list:
     """
     Post comments and tag on threats hashkey
     """
     visibility = 'public' if public else 'organization'
     return_value = []
     for hashkey in hashkeys:
         response = self._post_comments_and_tags(hashkey, content, tags,
                                                 visibility)
         if response.get('message'):
             logger.warning('\x1b[6;30;41m' + hashkey + ': ' +
                            response.get('message') + '\x1b[0m')
             return_value.append(hashkey + ': ' + response.get('message'))
         else:
             return_value.append(hashkey + ': OK')
             logger.info('\x1b[6;30;42m' + hashkey + ': OK\x1b[0m')
     return return_value
Beispiel #8
0
    def datalake_requests(self,
                          url: str,
                          method: str,
                          headers: dict,
                          post_body: dict = None):
        """
        Use it to request the API.
        """
        tries_left = self.SET_MAX_RETRY
        api_response = None

        logger.debug(
            self._pretty_debug_request(url, method, post_body, headers,
                                       self.tokens))

        if not headers.get('Authorization'):
            fresh_tokens = self.token_generator.get_token()
            self.tokens = [
                f'Token {fresh_tokens["access_token"]}',
                f'Token {fresh_tokens["refresh_token"]}'
            ]
            headers['Authorization'] = self.tokens[0]

        while tries_left > 0:
            try:
                response = self._send_request(url, method, headers, post_body)
                dict_response = self._load_response(response)
                if self._token_update(dict_response):
                    return dict_response

            except:
                tries_left -= 1
                if tries_left <= 0:
                    logger.warning(
                        'Request failed: Will return nothing for this request')
                    return {}
                elif not api_response:
                    logger.debug(
                        'ERROR : Something has gone wrong with requests ...')
                    logger.debug('sleep 5 seconds')
                    time.sleep(5)
                else:
                    logger.warning(
                        'ERROR :  Wrong requests, please refer to the API')
                    logger.warning(
                        f'for URL: {url}\nwith:\nheaders:{headers}\nbody:{post_body}\n'
                    )
                    logger.warning(api_response.text)
def main(override_args=None):
    """Method to start the script"""

    # Load initial args
    starter = BaseScripts()
    parser = starter.start(
        'Gets threats or hashkeys from given atom types and atom values.')
    supported_atom_types = parser.add_argument_group('Supported Atom Types')

    parser.add_argument(
        'untyped_atoms',
        help=
        'untyped atom values to lookup. Useful when you do not know what is the atom type',
        nargs='*',
    )
    for atom_type in ATOM_TYPES_FLAGS:
        supported_atom_types.add_argument(
            f'--{atom_type}',
            action='append',
            help=f'set a single {atom_type} atom type with its value',
        )
    parser.add_argument(
        '-ad',
        '--atom-details',
        dest='hashkey_only',
        default=True,
        action='store_false',
        help='returns threats full details',
    )
    parser.add_argument(
        '-i',
        '--input',
        action='append',
        help='read threats to add from FILE. [atomtype:path/to/file.txt]',
    )
    parser.add_argument(
        '-ot',
        '--output-type',
        help='set to the output type desired {json,csv}. Default is json',
    )

    if override_args:
        args = parser.parse_args(override_args)
    else:
        args = parser.parse_args()
    logger.debug(f'START: bulk_lookup_threats.py')

    # create output type header
    accept_header = {'Accept': None}

    if args.output_type:
        try:
            accept_header['Accept'] = BaseEngine.output_type2header(
                args.output_type)
        except ParserError as e:
            logger.exception(
                f'Exception raised while getting output type headers # {str(e)}',
                exc_info=False)
            exit(1)

    # to gather all typed atoms passed by arguments and input files
    typed_atoms = {}

    # set validations flags regarding the presence or absence of cli arguments
    has_file = False if args.input is None else True
    has_flag = False
    for flag in ATOM_TYPES_FLAGS:
        atom_values = getattr(args, flag)
        if atom_values is not None:
            typed_atoms[flag] = atom_values
            has_flag = True

    # validate that at least there is one untyped atom or one atom or one input file
    if (not has_flag and not has_file
            and not args.untyped_atoms) or (SUBCOMMAND_NAME
                                            in args.untyped_atoms):
        parser.error(
            "you must provide at least one of following: untyped atom, atom type, input file."
        )

    # process input files
    if has_file:
        for input_file in args.input:
            file_atom_type, filename = get_atom_type_from_filename(input_file)
            logger.debug(f'file {filename} was recognized as {file_atom_type}')

            if file_atom_type == UNTYPED_ATOM_TYPE:
                args.untyped_atoms += starter._load_list(filename)
            else:
                typed_atoms.setdefault(file_atom_type,
                                       []).extend(starter._load_list(filename))

    # load api_endpoints and tokens
    endpoints_config, main_url, tokens = starter.load_config(args)
    post_engine_bulk_lookup_threats = BulkLookupThreats(
        endpoints_config, args.env, tokens)
    post_engine_atom_values_extractor = AtomValuesExtractor(
        endpoints_config, args.env, tokens)

    # lookup for atom types
    if args.untyped_atoms:
        atoms_values_extractor_response = post_engine_atom_values_extractor.atom_values_extract(
            args.untyped_atoms)
        if atoms_values_extractor_response['found'] > 0:
            typed_atoms = join_dicts(
                typed_atoms, atoms_values_extractor_response['results'])
        else:
            logger.warning('none of your untyped atoms could be typed')

        # find out what atoms couldn't be typed for printing them
        if atoms_values_extractor_response['not_found'] > 0:
            for atom_type, atom_list in atoms_values_extractor_response[
                    'results'].items():
                args.untyped_atoms = [
                    untyped_atom for untyped_atom in args.untyped_atoms
                    if untyped_atom not in
                    atoms_values_extractor_response['results'][atom_type]
                ]

            logger.warning(
                f'\x1b[6;37;43m{"#" * 60} UNTYPED ATOMS {"#" * 47}\x1b[0m')
            logger.warning('\n'.join(args.untyped_atoms))
            logger.warning('')

    response = post_engine_bulk_lookup_threats.bulk_lookup_threats(
        threats=typed_atoms,
        additional_headers=accept_header,
        hashkey_only=args.hashkey_only)

    pretty_print(response, args.output_type)

    if args.output:
        starter.save_output(args.output, response)
        logger.debug(f'Results saved in {args.output}\n')

    logger.debug(f'END: lookup_threats.py')