def get_status(): # Query the API for the status of the export. status = self._api.get('{}/export/{}/status'.format(self.type, self.uuid)).json() # We need to get the list of chunks that we haven't completed yet and are # available for download. unfinished = [c for c in status['chunks_available'] if c not in self.processed] # Add the chunks_unfinished key with the unfinished list as the # associated value and then return the status to the caller. status['chunks_unfinished'] = unfinished # if there are no more chunks to process and the export status is # set to finished, then we will break the iteration. if (status['status'] == 'FINISHED' and len(status['chunks_unfinished']) < 1): raise StopIteration() if status['status'] == 'ERROR': raise TioExportsError(self.type, self.uuid) if (status['status'] == 'QUEUED' and self.timeout and time.time() > self.timeout): self.cancel() raise TioExportsTimeout(self.type, self.uuid) return status
def next(self): ''' Get the next item in the current page ''' if self._is_iterator is None: self._is_iterator = True elif not self._is_iterator: raise TioExportsError(export=self.type, uuid=self.uuid, msg=(f'ExportIterator for {self.uuid} ' 'already set to run as a threaded ' 'job. Cannot perform iterable ' 'operations.') ) # If we have worked through the current page of records then we should # query the next page of records. if self.page_count >= len(self.page): self._get_page() self.page_count = 0 # Get the relevant record, increment the counters, and return the # record. item = self.page[self.page_count] self.count += 1 self.page_count += 1 if self.boxify: return Box(item) return item
def _get_status(self) -> Dict: ''' Get the current status of the export, and then calculate where the iterator is within the export job and return the status to the caller ''' status = self._api.exports.status(self.type, self.uuid) self._log.debug('%s export %s is currenty %s', self.type, self.uuid, status.status ) # If the export has errored and the _term_on_error flag is set, then # we will raise an export error. if status.status == 'ERROR' and self._term_on_error: raise TioExportsError(self.type, self.uuid) # If the export is still queued and the timeout has been reached, then # we will inform the API that we want to cancel the export and then # raise a timeout exception. if ( status.status == 'QUEUED' and self.timeout is not None and time.time() > self.timeout + self.start_time ): self.cancel() raise TioExportsTimeout(self.type, self.uuid) # If the _wait_for_complete flag has been set, then we will always # return an empty list until the status is 'FINISHED' if self._wait_for_complete and status.status != 'FINISHED': status.chunks_unfinished = [] # In all other situations, we will Compute which chunks are currently # unprocessed by the iterator and store the list within the # "chunks_unfinished" attribute. else: avail = status.get('chunks_available', []) unfinished = [c for c in avail if c not in self.processed] status.chunks_unfinished = unfinished self.chunks = status.chunks_unfinished self.status = status.status # return the status to the caller. return status
def run_threaded(self, func: Any, kwargs: Optional[Dict] = None, num_threads: int = 2, ) -> List: ''' Initiate a multi-threaded export using the provided function and keyword arguments. The following field names are reserved and must be accepted either as an optional keyword argument, or as a named param. * data (list[dict]): Receiver of the data-chunk. * export_uuid (str): Receiver for the export job UUID. * export_type (str): Receiver for the export data-type. * export_chunk_id (int): Receiver for the export chunk id. Args: func: The function to pass to the thread executor. kwargs: Any additional keyword arguments that are to be passed to the function as part of execution. num_threads: How many concurrent threads should be run. The default is ``2``. Examples: A simple example is to simply download the chunks and write them to disk. >>> def write_chunk(data: List[Dict], ... export_uuid: str, ... export_type: str, ... export_chunk_id: int ... ): ... fn = f'{export_type}-{export_uuid}-{export_chunk_id}.json' ... with open(fn, 'w') as fobj: ... json.dump(data, fn) >>> >>> export = tio.exports.vulns() >>> export.run_threaded(write_chunk, num_threads=4) ''' if not kwargs: kwargs = {} # if the iterator flag is unset, then we will set it. if self._is_iterator is None: self._is_iterator = False # if the iterator flag was already set to be an iterable, then we will # raise an error that we cannot continue. elif self._is_iterator: raise TioExportsError(export=self.type, uuid=self.uuid, msg=(f'ExportIterator for {self.uuid} ' 'already set to run as an iterable ' 'job. Cannot perform threaded ' 'operations.') ) # initiate the thread pool and get the show on the road. with ThreadPoolExecutor(max_workers=num_threads) as executor: jobs = [] # we will want to make sure to stay in this loop until the job is # finished and all of the chunks have been processed. while not (len(self._get_chunks()) < 1 and self.status in ['FINISHED'] ): # loop over the number of chunks that are available, pulling # each chunk id out of the chunk list, constructing the kwargs # with the necessary elements, and loading that job into the # executor. When all of the chunks have been added to the # pool, then call the _get_chunks method again to wait for more # chunks to become available. num_chunks = range(len(self.chunks)) for _ in num_chunks: chunk_id = self.chunks.pop(0) kwargs['data'] = self._api.exports.download_chunk( self.type, self.uuid, chunk_id) kwargs['export_uuid'] = self.uuid kwargs['export_type'] = self.type kwargs['export_chunk_id'] = chunk_id self._log.debug( (f'{self.type} export {self.uuid} chunk {chunk_id} ' 'has been downloaded and the data has been handed ' 'off to the specified function' )) jobs.append(executor.submit(func, **kwargs)) self.processed.append(chunk_id) return jobs