def process_downloads(self, sites, collection): """ Method to download and process files :param sites: List of file to download and process :type sites: list :param collection: Mongodb Collection name :type collection: str :return: :rtype: """ worker_size = min(32, os.cpu_count() + 4) start_time = time.time() thread_map(self.download_site, sites, desc="Downloading files") if self.do_process: thread_map( self.file_to_queue, self.file_queue.get_full_list(), desc="Processing downloaded files", ) self._process_queue_to_db(worker_size, collection=collection) # checking if last-modified was in the response headers and not set to default if "01-01-1970" != self.last_modified.strftime("%d-%m-%Y"): setColUpdate(self.feed_type.lower(), self.last_modified) self.logger.info("Duration: {}".format( timedelta(seconds=time.time() - start_time)))
def __init__(self, action, collection, doc, update_watchlist=False, signal_groups=[]): self.action = action self.collection = collection self.doc = doc self.update_watchlist = update_watchlist self.signal_groups = signal_groups if self.update_watchlist and collection == 'cves': setColUpdate('watchlist', datetime.now()) self._update_watchlist()
collections = getCVEs()["results"] # check cpes for cves and parse and store missing cpes in cpeother batch = [] # skip on empty collections col = list(collections) if not col: print("Empty collections, import skipped") sys.exit(2) for item in tqdm(col): for cpeentry in item["vulnerable_configuration"]: checkdup = getAlternativeCPE(cpeentry) if checkdup and len(checkdup) <= 0: entry = getCPE(cpeentry) if entry and len(entry.count) <= 0: title = cpeentry title = title[10:] title = title.replace(":-:", " ", 10) title = title.replace(":", " ", 10) title = title.replace("_", " ", 10) title = urllib.parse.unquote_plus(title) title = title.title() batch.append({"id": cpeentry, "title": title}) if len(batch) != 0: cpeotherBulkInsert(batch) # update database info after successful program-run setColUpdate("cpeother", icve)