Beispiel #1
0
        def get_status():
            # Query the API for the status of the export.
            status = self._api.get('{}/export/{}/status'.format(self.type, self.uuid)).json()

            # We need to get the list of chunks that we haven't completed yet and are
            # available for download.
            unfinished = [c for c in status['chunks_available'] if c not in self.processed]

            # Add the chunks_unfinished key with the unfinished list as the
            # associated value and then return the status to the caller.
            status['chunks_unfinished'] = unfinished

            # if there are no more chunks to process and the export status is
            # set to finished, then we will break the iteration.
            if (status['status'] == 'FINISHED'
              and len(status['chunks_unfinished']) < 1):
                raise StopIteration()

            if status['status'] == 'ERROR':
                raise TioExportsError(self.type, self.uuid)

            if (status['status'] == 'QUEUED' and self.timeout
              and time.time() > self.timeout):
                self.cancel()
                raise TioExportsTimeout(self.type, self.uuid)

            return status
Beispiel #2
0
    def next(self):
        '''
        Get the next item in the current page
        '''
        if self._is_iterator is None:
            self._is_iterator = True
        elif not self._is_iterator:
            raise TioExportsError(export=self.type,
                                  uuid=self.uuid,
                                  msg=(f'ExportIterator for {self.uuid} '
                                       'already set to run as a threaded '
                                       'job.  Cannot perform iterable '
                                       'operations.')
                                  )
        # If we have worked through the current page of records then we should
        # query the next page of records.
        if self.page_count >= len(self.page):
            self._get_page()
            self.page_count = 0

        # Get the relevant record, increment the counters, and return the
        # record.
        item = self.page[self.page_count]
        self.count += 1
        self.page_count += 1
        if self.boxify:
            return Box(item)
        return item
Beispiel #3
0
    def _get_status(self) -> Dict:
        '''
        Get the current status of the export, and then calculate where the
        iterator is within the export job and return the status to the caller
        '''
        status = self._api.exports.status(self.type, self.uuid)
        self._log.debug('%s export %s is currenty %s',
                        self.type,
                        self.uuid,
                        status.status
                        )

        # If the export has errored and the _term_on_error flag is set, then
        # we will raise an export error.
        if status.status == 'ERROR' and self._term_on_error:
            raise TioExportsError(self.type, self.uuid)

        # If the export is still queued and the timeout has been reached, then
        # we will inform the API that we want to cancel the export and then
        # raise a timeout exception.
        if (
            status.status == 'QUEUED'
            and self.timeout is not None
            and time.time() > self.timeout + self.start_time
        ):
            self.cancel()
            raise TioExportsTimeout(self.type, self.uuid)

        # If the _wait_for_complete flag has been set, then we will always
        # return an empty list until the status is 'FINISHED'
        if self._wait_for_complete and status.status != 'FINISHED':
            status.chunks_unfinished = []

        # In all other situations, we will Compute which chunks are currently
        # unprocessed by the iterator and store the list within the
        # "chunks_unfinished" attribute.
        else:
            avail = status.get('chunks_available', [])
            unfinished = [c for c in avail if c not in self.processed]
            status.chunks_unfinished = unfinished
        self.chunks = status.chunks_unfinished
        self.status = status.status

        # return the status to the caller.
        return status
Beispiel #4
0
    def run_threaded(self,
                     func: Any,
                     kwargs: Optional[Dict] = None,
                     num_threads: int = 2,
                     ) -> List:
        '''
        Initiate a multi-threaded export using the provided function and
        keyword arguments.  The following field names are reserved and must be
        accepted either as an optional keyword argument, or as a named param.

            * data (list[dict]): Receiver of the data-chunk.
            * export_uuid (str): Receiver for the export job UUID.
            * export_type (str): Receiver for the export data-type.
            * export_chunk_id (int): Receiver for the export chunk id.

        Args:
            func:
                The function to pass to the thread executor.
            kwargs:
                Any additional keyword arguments that are to be passed to the
                function as part of execution.
            num_threads:
                How many concurrent threads should be run.  The default is
                ``2``.

        Examples:

            A simple example is to simply download the chunks and write them
            to disk.

            >>> def write_chunk(data: List[Dict],
            ...                 export_uuid: str,
            ...                 export_type: str,
            ...                 export_chunk_id: int
            ...                 ):
            ...     fn = f'{export_type}-{export_uuid}-{export_chunk_id}.json'
            ...     with open(fn, 'w') as fobj:
            ...         json.dump(data, fn)
            >>>
            >>> export = tio.exports.vulns()
            >>> export.run_threaded(write_chunk, num_threads=4)
        '''
        if not kwargs:
            kwargs = {}
        # if the iterator flag is unset, then we will set it.
        if self._is_iterator is None:
            self._is_iterator = False

        # if the iterator flag was already set to be an iterable, then we will
        # raise an error that we cannot continue.
        elif self._is_iterator:
            raise TioExportsError(export=self.type,
                                  uuid=self.uuid,
                                  msg=(f'ExportIterator for {self.uuid} '
                                       'already set to run as an iterable '
                                       'job.  Cannot perform threaded '
                                       'operations.')
                                  )

        # initiate the thread pool and get the show on the road.
        with ThreadPoolExecutor(max_workers=num_threads) as executor:
            jobs = []
            # we will want to make sure to stay in this loop until the job is
            # finished and all of the chunks have been processed.
            while not (len(self._get_chunks()) < 1
                       and self.status in ['FINISHED']
                       ):
                # loop over the number of chunks that are available, pulling
                # each chunk id out of the chunk list, constructing the kwargs
                # with the necessary elements, and loading that job into the
                # executor.  When all of the chunks have been added to the
                # pool, then call the _get_chunks method again to wait for more
                # chunks to become available.
                num_chunks = range(len(self.chunks))
                for _ in num_chunks:
                    chunk_id = self.chunks.pop(0)
                    kwargs['data'] = self._api.exports.download_chunk(
                        self.type, self.uuid, chunk_id)
                    kwargs['export_uuid'] = self.uuid
                    kwargs['export_type'] = self.type
                    kwargs['export_chunk_id'] = chunk_id
                    self._log.debug(
                        (f'{self.type} export {self.uuid} chunk {chunk_id} '
                         'has been downloaded and the data has been handed '
                         'off to the specified function'
                         ))
                    jobs.append(executor.submit(func, **kwargs))
                    self.processed.append(chunk_id)
        return jobs