class plantmongo: def __init__(self, addr, user="******", password="******"): self.addr = addr self.user = user self.password = password self.conn = Connection(addr) self.db = conn.plant if not db.authenticate("plant", "plant"): print "unable to authenticate to mongodb" self.connected = False return None self.coll = self.db.test def publish(self, xbeeobj): post = { "time": time.time(), "temp": xbeeobj.temp, "moisture": xbeeobj.moisture, "dewpoint": xbeeobj.dewpoint, "pressure": xbeeobj.pressure, "light": xbeeobj.light, "batt": xbeeobj.batt, "rssi": xbeeobj.rssi, } self.coll.save(post) def close(self): self.conn.disconnect()
def WriteTempFile(self, data, hash_name=None): if self.use_cache == True: if hash_name is None: hash = md5(self.url ) hash_name = hash.hexdigest() self.last_hash_name = hash_name self.log.debug('write file to cache: ', hash_name) self.log.debug('use mongo: %s' % self.use_mongo) # open(self.download_temp+hash_name, 'wb').write(data) if self.use_mongo == False: f_name = self.download_temp + hash_name + '.gz' f = gzip.open(f_name, 'wb') f.write(data) f.close() if self.use_mongo == True: connection = Connection("localhost", 27017) db = connection['parser'] s = StringIO.StringIO() f = gzip.GzipFile(fileobj=s, mode='wb') f.write(data) f.close() val = s.getvalue() s.close() del (s) del (f) fs = GridFS(db) fp = fs.open(hash_name , 'w', self.download_temp.replace('/', '') ) fp.write(val) fp.close() connection.disconnect()
def GetTempFile(self): """ Metoda pobiera/zapisuje stronke pobierana do cache'u """ data = None if self.use_cache == True: hash = md5(self.url) self.hash_name = hash.hexdigest() self.page_from_cache = False # f_name = self.download_temp + self.hash_name f_name = self.download_temp + self.hash_name + '.gz' if self._devel == True: print 'seek cache: ',f_name, '::', self.url # czy plik lokalny jest gz if os.path.exists(f_name.replace('.gz', '') ): data = open(f_name.replace('.gz', ''), 'rb').read() f = gzip.open(f_name, 'wb') f.write(data) f.close() os.unlink( f_name.replace('.gz', '') ) return data # teraz odczyt pliku gzip if self.read_cache == True: if self.use_mongo == True: try: connection = Connection("localhost", 27017) db = connection['parser'] fs = GridFS(db) fp = fs.open(self.hash_name , 'r', self.download_temp.replace('/', '') ) f = gzip.GzipFile(fileobj=fp, mode='rb') data = f.read() f.close() fp.close() del(f) connection.disconnect() except Exception, e: print 'read cahce error: ', e self.page_from_cache = False return None elif os.path.exists(f_name): f = gzip.open(f_name, 'rb') data = f.read() f.close() else: data = '' if self._devel == True: print '# Found cache: ', self.hash_name self.page_from_cache = True
def test_disconnect(self): c = Connection(self.host, self.port) coll = c.foo.bar c.disconnect() c.disconnect() coll.count() c.disconnect() c.disconnect() coll.count()
def test_disconnect(self): c = Connection(self.host, self.port) coll = c.pymongo_test.bar c.disconnect() c.disconnect() coll.count() c.disconnect() c.disconnect() coll.count()
class MongoDB(IPlugin, Report): """Stores report in MongoDB.""" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise Exception: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) try: self.conn = Connection(host, port) self.db = self.conn.MalwareAnalyse except TypeError: raise Exception("Mongo connection port must be integer") except ConnectionFailure: raise Exception("Cannot connect to MongoDB") def run(self, results, objfile): """Writes report. @param results: analysis results dictionary. @param objfile: file object @raise Exception: if fails to connect or write to MongoDB. """ self.connect() #Count query using URL hash and file hash url_md5 = results["Info"]["url"]["md5"] file_md5 = results["Info"]["file"]["md5"] query = { "$and" : [{ "Info.url.md5": { "$in": [url_md5] } }, { "Info.file.md5": { "$in": [file_md5] } }]} count = self.db.ragpicker.find(query).count() #If report available for the file and url -> not insert if count == 0: # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the report and retrieve its object id. self.db.ragpicker.insert(report) self.conn.disconnect()
class MongoDBJobStore(JobStore): def __init__(self, database='apscheduler', collection='jobs', connection=None, pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): self.jobs = [] self.pickle_protocol = pickle_protocol if not database: raise ValueError('The "database" parameter must not be empty') if not collection: raise ValueError('The "collection" parameter must not be empty') if connection: self.connection = connection else: self.connection = Connection(**connect_args) self.collection = self.connection[database][collection] def add_job(self, job): job_dict = job.__getstate__() job_dict['trigger'] = Binary( pickle.dumps(job.trigger, self.pickle_protocol)) job_dict['args'] = Binary(pickle.dumps(job.args, self.pickle_protocol)) job_dict['kwargs'] = Binary( pickle.dumps(job.kwargs, self.pickle_protocol)) job.id = self.collection.insert(job_dict) self.jobs.append(job) def remove_job(self, job): self.collection.remove(job.id) self.jobs.remove(job) def load_jobs(self): jobs = [] for job_dict in self.collection.find(): try: job = Job.__new__(Job) job_dict['id'] = job_dict.pop('_id') job_dict['trigger'] = pickle.loads(job_dict['trigger']) job_dict['args'] = pickle.loads(job_dict['args']) job_dict['kwargs'] = pickle.loads(job_dict['kwargs']) job.__setstate__(job_dict) jobs.append(job) except Exception: job_name = job_dict.get('name', '(unknown)') logger.exception('Unable to restore job "%s"', job_name) self.jobs = jobs def update_job(self, job): spec = {'_id': job.id} document = { '$set': { 'next_run_time': job.next_run_time }, '$inc': { 'runs': 1 } } self.collection.update(spec, document) def close(self): self.connection.disconnect() def __repr__(self): connection = self.collection.database.connection return '<%s (connection=%s)>' % (self.__class__.__name__, connection)
class Mongodb(Storage): settings_name = 'STORAGE_MONGODB' class settings: host = 'localhost' port = 27017 max_pool_size = 10 network_timeout = None document_class = dict tz_aware = False database = 'test' def __init__(self, *args, **kwargs): super(Mongodb, self).__init__(*args, **kwargs) self.connection = Connection(host=self.settings.host, port=self.settings.port, max_pool_size=self.settings.max_pool_size, network_timeout=self.settings.network_timeout, document_class=self.settings.document_class, tz_aware=self.settings.tz_aware) self.db = self.connection[self.settings.database] def __del__(self): self.connection.disconnect() super(Mongodb, self).__del__() def get(self, tag, key): stored_obj = self.db[tag].find_one(key) if stored_obj: if '_obj' in stored_obj: obj = loads(stored_obj['_obj']) else: obj = stored_obj del obj['_id'] else: obj = stored_obj return obj def put(self, tag, key, obj): if isinstance(obj, dict): obj_to_save = obj obj_to_save['_id'] = key else: obj_to_save = {} obj_to_save['_id'] = key obj_to_save['_obj'] = dumps(obj) self.db[tag].save(obj_to_save) def delete(self, tag, key): self.db[tag].remove(key) def get_free_key(self, tag): return self.db[tag].save({}) def get_dataset(self, tag): return self.db[tag].find()
class MongoDB(Report): """Stores report in MongoDB.""" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise CuckooReportError: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) try: self.conn = Connection(host, port) self.db = self.conn.cuckoo self.fs = GridFS(self.db) except TypeError: raise CuckooReportError("Mongo connection port must be integer") except ConnectionFailure: raise CuckooReportError("Cannot connect to MongoDB") def store_file(self, file_obj, filename=""): """Store a file in GridFS. @param file_obj: object to the file to store @param filename: name of the file to store @return: object id of the stored file """ if not filename: filename = file_obj.get_name() existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) if existing: return existing["_id"] else: new = self.fs.new_file(filename=filename, sha256=file_obj.get_sha256()) for chunk in file_obj.get_chunks(): new.write(chunk) try: new.close() except FileExists: to_find = {"sha256": file_obj.get_sha256()} return self.db.fs.files.find_one(to_find)["_id"] else: return new._id def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. #new_dropped = [] #for dropped in report["dropped"]: # new_drop = dict(dropped) # drop = File(dropped["path"]) # if drop.valid(): # dropped_id = self.store_file(drop, filename=dropped["name"]) # new_drop["object_id"] = dropped_id # new_dropped.append(new_drop) #report["dropped"] = new_dropped # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
def open_db(host, db): conn = Connection(host) try: yield conn[db] finally: conn.disconnect()
class MongoDBJobStore(JobStore): def __init__(self, database='apscheduler', collection='jobs', connection=None, pickle_protocol=pickle.HIGHEST_PROTOCOL, **connect_args): self.jobs = [] self.pickle_protocol = pickle_protocol if not database: raise ValueError('The "database" parameter must not be empty') if not collection: raise ValueError('The "collection" parameter must not be empty') if connection: self.connection = connection else: self.connection = Connection(**connect_args) self.collection = self.connection[database][collection] def add_job(self, job): job_dict = job.__getstate__() job_dict['trigger'] = Binary(pickle.dumps(job.trigger, self.pickle_protocol)) job_dict['args'] = Binary(pickle.dumps(job.args, self.pickle_protocol)) job_dict['kwargs'] = Binary(pickle.dumps(job.kwargs, self.pickle_protocol)) job.id = self.collection.insert(job_dict) self.jobs.append(job) def remove_job(self, job): self.collection.remove(job.id) self.jobs.remove(job) def load_jobs(self): jobs = [] for job_dict in self.collection.find(): try: job = Job.__new__(Job) job_dict['id'] = job_dict.pop('_id') job_dict['trigger'] = pickle.loads(job_dict['trigger']) job_dict['args'] = pickle.loads(job_dict['args']) job_dict['kwargs'] = pickle.loads(job_dict['kwargs']) job.__setstate__(job_dict) jobs.append(job) except Exception: job_name = job_dict.get('name', '(unknown)') logger.exception('Unable to restore job "%s"', job_name) self.jobs = jobs def update_job(self, job): spec = {'_id': job.id} document = {'$set': {'next_run_time': job.next_run_time}, '$inc': {'runs': 1}} self.collection.update(spec, document) def close(self): self.connection.disconnect() def __repr__(self): connection = self.collection.database.connection return '<%s (connection=%s)>' % (self.__class__.__name__, connection)
class MongoDB(Report): """Stores report in MongoDB.""" # Mongo schema version, used for data migration. SCHEMA_VERSION = "1" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise CuckooReportError: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) try: self.conn = Connection(host, port) self.db = self.conn.cuckoo self.fs = GridFS(self.db) except TypeError: raise CuckooReportError("Mongo connection port must be integer") except ConnectionFailure: raise CuckooReportError("Cannot connect to MongoDB") def store_file(self, file_obj, filename=""): """Store a file in GridFS. @param file_obj: object to the file to store @param filename: name of the file to store @return: object id of the stored file """ if not filename: filename = file_obj.get_name() existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) if existing: return existing["_id"] else: new = self.fs.new_file(filename=filename, sha256=file_obj.get_sha256()) for chunk in file_obj.get_chunks(): new.write(chunk) try: new.close() except FileExists: to_find = {"sha256": file_obj.get_sha256()} return self.db.fs.files.find_one(to_find)["_id"] else: return new._id def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set mongo schema version. # TODO: This is not optimal becuase it run each analysis. Need to run # only one time at startup. if "cuckoo_schema" in self.db.collection_names(): if self.db.cuckoo_schema.find_one()["version"] != self.SCHEMA_VERSION: CuckooReportError("Mongo schema version not expected, check data migration tool") else: self.db.cuckoo_schema.save({"version": self.SCHEMA_VERSION}) # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. # TODO: This is not optimal because it run each analysis. Need to run # only one time at startup. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Store the Zipped Droppings file in GridFS and reference it back in the report. #cuckoo_dropped_zip_path = os.path.join(self.analysis_path, "cuckoodroppings.zip") #cuckoo_dropped_zip = File(cuckoo_dropped_zip_path) #if cuckoo_dropped_zip.valid(): # cuckoo_droppings_id = self.store_file(cuckoo_dropped_zip) # report["zippeddroppings"] = {"cuckoo_droppings_id": cuckoo_droppings_id} # report["zippeddroppings"].update(results["zippeddroppings"]) # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes #Other info we want Quick access to from the web UI if report.has_key("virustotal") and report["virustotal"] and report["virustotal"].has_key("positives") and results["virustotal"].has_key("total"): report["virustotal_summary"] = "%s/%s" % (report["virustotal"]["positives"],report["virustotal"]["total"]) new_suricata_files = [] if report.has_key("suricata") and report["suricata"]: suricata={} suricata["info"]={} suricata["info"]["id"]=report["info"]["id"] # Walk through the suricata extracted files, store them in GridFS and update the # report with the ObjectIds # Store the suri extracted files in GridFS and reference it back in the report. suri_extracted_zip_path = os.path.join(self.analysis_path, "logs/files.zip") suri_extracted_zip = File(suri_extracted_zip_path) if suri_extracted_zip.valid(): suri_extracted_zip_id = self.store_file(suri_extracted_zip) suricata["suri_extracted_zip"]=suri_extracted_zip_id if report["suricata"].has_key("files") and len(report["suricata"]["files"]) > 0: suricata["file_cnt"] = len(report["suricata"]["files"]) for suricata_file_e in report["suricata"]["files"]: if suricata_file_e.has_key("file_info"): tmp_suricata_file_d = dict(suricata_file_e) suricata_file = File(suricata_file_e["file_info"]["path"]) if suricata_file.valid(): suricata_file_id = self.store_file(suricata_file, filename=suricata_file_e["file_info"]["name"]) tmp_suricata_file_d["object_id"] = suricata_file_id new_suricata_files.append(tmp_suricata_file_d) suricata["files"] = new_suricata_files if report["suricata"].has_key("alert_log_full_path") and report["suricata"]["alert_log_full_path"]: suricata_alert_log = File(report["suricata"]["alert_log_full_path"]) if suricata_alert_log.valid(): suricata_alert_log_id = self.store_file(suricata_alert_log) suricata["alert_log_id"] = suricata_alert_log_id if report["suricata"].has_key("tls_log_full_path") and report["suricata"]["tls_log_full_path"]: tls_log = File(report["suricata"]["tls_log_full_path"]) if tls_log.valid(): tls_log_id = self.store_file(tls_log) suricata["tls_log_id"] = tls_log_id if report["suricata"].has_key("http_log_full_path") and report["suricata"]["http_log_full_path"]: http_log = File(report["suricata"]["http_log_full_path"]) if http_log.valid(): http_log_id = self.store_file(http_log) suricata["http_log_id"] = http_log_id if report["suricata"].has_key("file_log_full_path") and report["suricata"]["file_log_full_path"]: file_log = File(report["suricata"]["file_log_full_path"]) if file_log.valid(): file_log_id = self.store_file(file_log) suricata["file_log_id"] = file_log_id if report["suricata"].has_key("dns_log_full_path") and report["suricata"]["dns_log_full_path"]: dns_log = File(report["suricata"]["dns_log_full_path"]) if dns_log.valid(): dns_log_id = self.store_file(dns_log) suricata["dns_log_id"] = dns_log_id if report["suricata"].has_key("ssh_log_full_path") and report["suricata"]["ssh_log_full_path"]: ssh_log = File(report["suricata"]["ssh_log_full_path"]) if ssh_log.valid(): ssh_log_id = self.store_file(ssh_log) suricata["ssh_log_id"] = ssh_log_id if report["suricata"].has_key("tls") and len(report["suricata"]["tls"]) > 0: suricata["tls_cnt"] = len(report["suricata"]["tls"]) suricata["tls"]=report["suricata"]["tls"] if report["suricata"] and report["suricata"].has_key("alerts") and len(report["suricata"]["alerts"]) > 0: suricata["alert_cnt"] = len(report["suricata"]["alerts"]) suricata["alerts"]=report["suricata"]["alerts"] if results["suricata"].has_key("http") and len(report["suricata"]["http"]) > 0: suricata["http_cnt"] = len(report["suricata"]["http"]) suricata["http"]=report["suricata"]["http"] self.db.suricata.save(suricata) #do not store this in analysis collection del report["suricata"] if results.has_key("behavior") and results["behavior"].has_key("martianlist") and results["behavior"]["martianlist"] and len(results["behavior"]["martianlist"]) > 0: report["mlist_cnt"] = len(results["behavior"]["martianlist"]) # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
class Database(): def __init__(self): self.__cfgReporting = Config( os.path.join(RAGPICKER_ROOT, 'config', 'reporting.conf')) self.__cfgProcessing = Config( os.path.join(RAGPICKER_ROOT, 'config', 'processing.conf')) self.__mongodbEnabled = self.__cfgReporting.getOption( "mongodb", "enabled") self.__codedbEnabled = self.__cfgReporting.getOption( "codeDB", "enabled") self.__bluecoatEnabled = self.__cfgProcessing.getOption( "all_bluecoatMalwareAnalysisAppliance", "enabled") if self.__mongodbEnabled: #Anbindung an Datenbank MongoDB Collection Ragpicker herstellen try: mongodbHost = self.__cfgReporting.getOption("mongodb", "host") mongodbPort = self.__cfgReporting.getOption("mongodb", "port") self.__mongodbConnection = Connection(mongodbHost, mongodbPort) self.__mongodbCollectionRagpicker = self.__mongodbConnection.MalwareAnalyse.ragpicker self.__mongodbCollectionFamilies = self.__mongodbConnection.MalwareAnalyse.families self.__mongodbCollectionSandboxTaskQueue = self.__mongodbConnection.MalwareAnalyse.sandboxTaskQueue except TypeError: raise Exception( "MongoDB connection port in report.config must be integer") except ConnectionFailure: raise Exception("Cannot connect to MongoDB (ragpicker)") if self.__codedbEnabled: #Anbindung an Datenbank MongoDB Collection CodeDB herstellen try: codedbHost = self.__cfgReporting.getOption( "codeDB", "mongo_db_host") codedbPort = self.__cfgReporting.getOption( "codeDB", "mongo_db_port") self.__codedbConnection = Connection(codedbHost, codedbPort) self.__codedbCollectionCodedb = self.__codedbConnection.MalwareAnalyse.codeDB except TypeError: raise Exception( "MongoDB connection port for CodeDB in report.config must be integer" ) except ConnectionFailure: raise Exception("Cannot connect to MongoDB (codeDB)") def __del__(self): if self.__mongodbEnabled: self.__mongodbConnection.disconnect() if self.__codedbEnabled: self.__codedbConnection.disconnect() # ------------------------------------------------------------------------------ # Ragpicker Database (MongoDB) # ------------------------------------------------------------------------------ def isRagpickerDBEnabled(self): return self.__mongodbEnabled def getStatisticsAntivirus(self): queries = [] ret = [] queries.append({ "product": "Avast Antivirus", "findStr1": "AntivirusScanAvast", "findStr2": "AntivirusScanAvast.avast", "ok": "OK" }) queries.append({ "product": "AVG Antivirus", "findStr1": "AntivirusScanAvg", "findStr2": "AntivirusScanAvg.Avg", "ok": "OK" }) queries.append({ "product": "Avira", "findStr1": "AntivirusScanAvira", "findStr2": "AntivirusScanAvira.Avira.scan", "ok": "OK" }) queries.append({ "product": "BitDefender", "findStr1": "AntivirusScanBitDefender", "findStr2": "AntivirusScanBitDefender.BitDefender", "ok": "OK" }) queries.append({ "product": "ClamAV", "findStr1": "AntivirusScanClamAv", "findStr2": "AntivirusScanClamAv.ClamAv", "ok": " OK" }) queries.append({ "product": "COMODO", "findStr1": "AntivirusScanCOMODO", "findStr2": "AntivirusScanCOMODO.COMODO", "ok": "OK" }) queries.append({ "product": "ESET", "findStr1": "AntivirusScanESET", "findStr2": "AntivirusScanESET.ESET", "ok": "OK" }) queries.append({ "product": "F-Prot", "findStr1": "AntivirusScanFProt", "findStr2": "AntivirusScanFProt.FProt", "ok": "OK" }) queries.append({ "product": "F-Secure", "findStr1": "AntivirusScanF-Secure", "findStr2": "AntivirusScanF-Secure.F-Secure", "ok": "OK" }) for q in queries: av = {} av["product"] = q.get("product") av["analyzed"] = str( self.__mongodbCollectionRagpicker.find({ q.get("findStr1"): { "$ne": None } }).count()) av["notanalyzed"] = str( self.__mongodbCollectionRagpicker.find({ q.get("findStr1"): None }).count()) av["malware"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ q.get("findStr1"): { "$ne": None } }, { q.get("findStr2"): { "$ne": q.get("ok") } }] }).count()) av["nonemalware"] = str( self.__mongodbCollectionRagpicker.find({ q.get("findStr2"): q.get("ok") }).count()) if av.get("analyzed") != "0": av["rate"] = "{:.2f} %".format( (float(av.get("malware")) / float(av.get("analyzed")) * 100)) else: av["rate"] = "--" ret.append(av) return ret def getStatisticsNoneMalwareByAV(self): return self.__mongodbCollectionRagpicker.find({ "$and": [{ "$or": [{ "AntivirusScanAvast.avast": "OK" }, { "AntivirusScanAvast": None }] }, { "$or": [{ "AntivirusScanAvg.Avg": "OK" }, { "AntivirusScanAvg": None }] }, { "$or": [{ "AntivirusScanAvira.Avira.scan": "OK" }, { "AntivirusScanAvira": None }] }, { "$or": [{ "AntivirusScanBitDefender.BitDefender": "OK" }, { "AntivirusScanBitDefender": None }] }, { "$or": [{ "AntivirusScanClamAv.ClamAv": " OK" }, { "AntivirusScanClamAv": None }] }, { "$or": [{ "AntivirusScanCOMODO.COMODO": "OK" }, { "AntivirusScanCOMODO": None }] }, { "$or": [{ "AntivirusScanESET.ESET": "OK" }, { "AntivirusScanESET": None }] }, { "$or": [{ "AntivirusScanFProt.FProt": "OK" }, { "AntivirusScanFProt": None }] }, { "$or": [{ "AntivirusScanF-Secure.F-Secure": "OK" }, { "AntivirusScanF-Secure": None }] }, { "VirusTotal.file.verbose_msg": { "$ne": None } }] }).count() def getSamplesNotFoundByAV(self): return self.__mongodbCollectionRagpicker.find( { "$and": [{ "$or": [{ "AntivirusScanAvast.avast": "OK" }, { "AntivirusScanAvast": None }] }, { "$or": [{ "AntivirusScanAvg.Avg": "OK" }, { "AntivirusScanAvg": None }] }, { "$or": [{ "AntivirusScanAvira.Avira.scan": "OK" }, { "AntivirusScanAvira": None }] }, { "$or": [{ "AntivirusScanBitDefender.BitDefender": "OK" }, { "AntivirusScanBitDefender": None }] }, { "$or": [{ "AntivirusScanClamAv.ClamAv": " OK" }, { "AntivirusScanClamAv": None }] }, { "$or": [{ "AntivirusScanCOMODO.COMODO": "OK" }, { "AntivirusScanCOMODO": None }] }, { "$or": [{ "AntivirusScanESET.ESET": "OK" }, { "AntivirusScanESET": None }] }, { "$or": [{ "AntivirusScanFProt.FProt": "OK" }, { "AntivirusScanFProt": None }] }, { "$or": [{ "AntivirusScanF-Secure.F-Secure": "OK" }, { "AntivirusScanF-Secure": None }] }, { "VirusTotal.file.verbose_msg": { "$ne": None } }] }, { "Info.file.sha256": True, "Info.analyse.started": True }) def getSamplesNotFoundByVT(self): return self.__mongodbCollectionRagpicker.find( {"VirusTotal.file.verbose_msg": { "$ne": None }}, { "Info.file.sha256": True, "Info.analyse.started": True }) def getSamplesNotFoundByLocalAV(self): return self.__mongodbCollectionRagpicker.find( { "$and": [{ "$or": [{ "AntivirusScanAvast.avast": "OK" }, { "AntivirusScanAvast": None }] }, { "$or": [{ "AntivirusScanAvg.Avg": "OK" }, { "AntivirusScanAvg": None }] }, { "$or": [{ "AntivirusScanAvira.Avira.scan": "OK" }, { "AntivirusScanAvira": None }] }, { "$or": [{ "AntivirusScanBitDefender.BitDefender": "OK" }, { "AntivirusScanBitDefender": None }] }, { "$or": [{ "AntivirusScanClamAv.ClamAv": " OK" }, { "AntivirusScanClamAv": None }] }, { "$or": [{ "AntivirusScanCOMODO.COMODO": "OK" }, { "AntivirusScanCOMODO": None }] }, { "$or": [{ "AntivirusScanESET.ESET": "OK" }, { "AntivirusScanESET": None }] }, { "$or": [{ "AntivirusScanFProt.FProt": "OK" }, { "AntivirusScanFProt": None }] }, { "$or": [{ "AntivirusScanF-Secure.F-Secure": "OK" }, { "AntivirusScanF-Secure": None }] }] }, { "Info.file.sha256": True, "Info.analyse.started": True }) def getStatisticsVirusTotal(self): ret = {} ret["analyzed"] = self.__mongodbCollectionRagpicker.find({ "VirusTotal": { "$ne": None } }).count() ret["notAnalyzed"] = self.__mongodbCollectionRagpicker.find({ "VirusTotal": None }).count() ret["samplesFound"] = self.__mongodbCollectionRagpicker.find({ "VirusTotal.file.positives": { "$ne": None } }).count() ret["SamplesNotFound"] = self.__mongodbCollectionRagpicker.find({ "VirusTotal.file.verbose_msg": { "$ne": None } }).count() return ret def getStatisticsPackerSignatures(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group': { '_id': { 'PublisherO': "$VerifySigs.PublisherO", 'Issuer': "$VerifySigs.Issuer" }, 'count': { '$sum': 1 } } }, { '$sort': { "count": -1 } }]) def getStatisticsPackerCompiler(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group': { '_id': '$PEID', 'count': { '$sum': 1 } } }, { '$sort': { 'count': -1 } }]) def getStatisticsPeCharacteristics(self): ret = {} ret["exe"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ "Info.file.EXE": True }, { "Info.file.DLL": False }, { "Info.file.DRIVER": False }] }).count()) ret["dll"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ "Info.file.EXE": False }, { "Info.file.DLL": True }, { "Info.file.DRIVER": False }] }).count()) ret["driver"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ "Info.file.EXE": False }, { "Info.file.DLL": False }, { "Info.file.DRIVER": True }] }).count()) ret["noPe"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ "Info.file.EXE": None }, { "Info.file.DLL": None }, { "Info.file.DRIVER": None }] }).count()) ret["dllDriver"] = str( self.__mongodbCollectionRagpicker.find({ "$and": [{ "Info.file.EXE": False }, { "Info.file.DLL": True }, { "Info.file.DRIVER": True }] }).count()) return ret def getFiletypes(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group': { '_id': '$Info.file.type', 'count': { '$sum': 1 } } }, { '$sort': { 'count': -1 } }]) def countReportsRagpickerDB(self): return self.__mongodbCollectionRagpicker.find().count() def iterateRagpickerReports(self, sha256): for report in self.__mongodbCollectionRagpicker.find( {'Info.file.sha256': sha256}, {"_id": 0}): yield report # Attention deletes the whole Ragpicker-Database!!! # returns number of deleted reports def deleteRagpickerDB(self): count = self.__mongodbCollectionRagpicker.find().count() # Alle Ragpicker-Daten aus der MongoDB loeschen self.__mongodbCollectionRagpicker.remove() return count #Insert Ragpicker-Report in MongoDB def insertRagpickerDB(self, report): # Store the report try: self.__mongodbCollectionRagpicker.insert(report) except InvalidDocument as e: log.exception("Error InvalidDocument: %s", report) raise Exception("Error InvalidDocument: {0}".format(e)) except InvalidStringData: self.__mongodbCollectionRagpicker.insert( convertDirtyDict2ASCII(report)) #Count Ragpicker-Reports by file (and url) def countRagpickerDB(self, file_md5, url_md5=None): if url_md5: query = { "$and": [{ "Info.url.md5": { "$in": [url_md5] } }, { "Info.file.md5": { "$in": [file_md5] } }] } else: query = {"$and": [{"Info.file.md5": {"$in": [file_md5]}}]} return self.__mongodbCollectionRagpicker.find(query).count() # ------------------------------------------------------------------------------ # Ragpicker SandboxTaskQueue database (MongoDB) # ------------------------------------------------------------------------------ def insertSandboxTaskStatus(self, sandboxName, sha256, taskID, sampleID, taskState=None): statusReport = { "sandbox": sandboxName, "sha256": sha256, "sample_id": sampleID, "task_id": taskID, "task_state": taskState } # Store the SandboxTaskQueue-Status-report self.__mongodbCollectionSandboxTaskQueue.insert(statusReport) # Attention deletes the whole Ragpicker-SandboxTaskQueue-Database!!! # returns number of deleted reports def deleteSandboxTaskQueueDB(self): count = self.__mongodbCollectionSandboxTaskQueue.find().count() # Alle Daten aus der MongoDB loeschen self.__mongodbCollectionSandboxTaskQueue.remove() return count # ------------------------------------------------------------------------------ # Ragpicker families database (MongoDB) # ------------------------------------------------------------------------------ def insertFamily(self, familyReport): # Store the family-report self.__mongodbCollectionFamilies.insert(familyReport) #Count Ragpicker-Reports by file (and url) def countFamilyDB(self, parentObjectSHA256): query = { "$and": [{ "parentObjectSHA256": { "$in": [parentObjectSHA256] } }] } return self.__mongodbCollectionFamilies.find(query).count() def iterateFamilyReports(self, sha256): for report in self.__mongodbCollectionFamilies.find( {'parentObjectSHA256': sha256}, {"_id": 0}): yield report # Attention deletes the whole Ragpicker-Family-Database!!! # returns number of deleted reports def deleteFamilyDB(self): count = self.__mongodbCollectionFamilies.find().count() # Alle Ragpicker-Daten aus der MongoDB loeschen self.__mongodbCollectionFamilies.remove() return count # ------------------------------------------------------------------------------ # CodeDB Database (MongoDB) # ------------------------------------------------------------------------------ def isCodeDBEnabled(self): return self.__codedbEnabled def countReportsCodeDB(self): return self.__codedbCollectionCodedb.find().count() # Attention deletes the whole CodeDB-Database!!! # returns number of deleted reports def deleteCodeDB(self): count = self.__codedbCollectionCodedb.find().count() # Alle CodeDB-Reports aus der MongoDB loeschen self.__codedbCollectionCodedb.remove() return count #Count CodeDB-Reports by file sha256 def countCodeDB(self, file_sha256): return self.__codedbCollectionCodedb.find({ "sha256": file_sha256 }).count() #Insert CodeDB-Report in MongoDB def insertCodeDB(self, report): # Store the report try: self.__codedbCollectionCodedb.insert(report) except InvalidStringData: self.__codedbCollectionCodedb.insert( convertDirtyDict2ASCII(report))
class MongoDB(Report): """Stores report in MongoDB.""" def connect(self): """Connects to Mongo database, loads options and set connectors. @raise CuckooReportError: if unable to connect. """ host = self.options.get("host", "127.0.0.1") port = self.options.get("port", 27017) try: self.conn = Connection(host, port) self.db = self.conn.cuckoo self.fs = GridFS(self.db) except TypeError: raise CuckooReportError("Mongo connection port must be integer") except ConnectionFailure: raise CuckooReportError("Cannot connect to MongoDB") def store_file(self, file_obj, filename=""): """Store a file in GridFS. @param file_obj: object to the file to store @param filename: name of the file to store @return: object id of the stored file """ if not filename: filename = file_obj.get_name() existing = self.db.fs.files.find_one({"sha256": file_obj.get_sha256()}) if existing: return existing["_id"] else: new = self.fs.new_file(filename=filename, sha256=file_obj.get_sha256()) for chunk in file_obj.get_chunks(): new.write(chunk) try: new.close() except FileExists: to_find = {"sha256": file_obj.get_sha256()} return self.db.fs.files.find_one(to_find)["_id"] else: return new._id def run(self, results): """Writes report. @param results: analysis results dictionary. @raise CuckooReportError: if fails to connect or write to MongoDB. """ # We put the raise here and not at the import because it would # otherwise trigger even if the module is not enabled in the config. if not HAVE_MONGO: raise CuckooDependencyError("Unable to import pymongo " "(install with `pip install pymongo`)") self.connect() # Set an unique index on stored files, to avoid duplicates. # From pymongo docs: # Returns the name of the created index if an index is actually # created. # Returns None if the index already exists. self.db.fs.files.ensure_index("sha256", unique=True, sparse=True, name="sha256_unique") # Create a copy of the dictionary. This is done in order to not modify # the original dictionary and possibly compromise the following # reporting modules. report = dict(results) # Store the sample in GridFS. if results["info"]["category"] == "file": sample = File(self.file_path) if sample.valid(): fname = results["target"]["file"]["name"] sample_id = self.store_file(sample, filename=fname) report["target"] = {"file_id": sample_id} report["target"].update(results["target"]) # Store the PCAP file in GridFS and reference it back in the report. pcap_path = os.path.join(self.analysis_path, "dump.pcap") pcap = File(pcap_path) if pcap.valid(): pcap_id = self.store_file(pcap) report["network"] = {"pcap_id": pcap_id} report["network"].update(results["network"]) # Walk through the dropped files, store them in GridFS and update the # report with the ObjectIds. new_dropped = [] for dropped in report["dropped"]: new_drop = dict(dropped) drop = File(dropped["path"]) if drop.valid(): dropped_id = self.store_file(drop, filename=dropped["name"]) new_drop["object_id"] = dropped_id new_dropped.append(new_drop) report["dropped"] = new_dropped # Add screenshots. report["shots"] = [] shots_path = os.path.join(self.analysis_path, "shots") if os.path.exists(shots_path): # Walk through the files and select the JPGs. shots = [shot for shot in os.listdir(shots_path) if shot.endswith(".jpg")] for shot_file in sorted(shots): shot_path = os.path.join(self.analysis_path, "shots", shot_file) shot = File(shot_path) # If the screenshot path is a valid file, store it and # reference it back in the report. if shot.valid(): shot_id = self.store_file(shot) report["shots"].append(shot_id) # Store chunks of API calls in a different collection and reference # those chunks back in the report. In this way we should defeat the # issue with the oversized reports exceeding MongoDB's boundaries. # Also allows paging of the reports. new_processes = [] for process in report["behavior"]["processes"]: new_process = dict(process) chunk = [] chunks_ids = [] # Loop on each process call. for index, call in enumerate(process["calls"]): # If the chunk size is 100 or if the loop is completed then # store the chunk in MongoDB. if len(chunk) == 100: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Reset the chunk. chunk = [] # Append call to the chunk. chunk.append(call) # Store leftovers. if chunk: to_insert = {"pid": process["process_id"], "calls": chunk} chunk_id = self.db.calls.insert(to_insert) chunks_ids.append(chunk_id) # Add list of chunks. new_process["calls"] = chunks_ids new_processes.append(new_process) # Store the results in the report. report["behavior"] = dict(report["behavior"]) report["behavior"]["processes"] = new_processes # Store the report and retrieve its object id. self.db.analysis.save(report) self.conn.disconnect()
class Database(): def __init__(self): self.__cfgReporting = Config(os.path.join(RAGPICKER_ROOT, 'config', 'reporting.conf')) self.__cfgProcessing = Config(os.path.join(RAGPICKER_ROOT, 'config', 'processing.conf')) self.__mongodbEnabled = self.__cfgReporting.getOption("mongodb", "enabled") self.__codedbEnabled = self.__cfgReporting.getOption("codeDB", "enabled") self.__bluecoatEnabled = self.__cfgProcessing.getOption("all_bluecoatMalwareAnalysisAppliance", "enabled") if self.__mongodbEnabled: #Anbindung an Datenbank MongoDB Collection Ragpicker herstellen try: mongodbHost = self.__cfgReporting.getOption("mongodb", "host") mongodbPort = self.__cfgReporting.getOption("mongodb", "port") self.__mongodbConnection = Connection(mongodbHost, mongodbPort) self.__mongodbCollectionRagpicker = self.__mongodbConnection.MalwareAnalyse.ragpicker self.__mongodbCollectionFamilies = self.__mongodbConnection.MalwareAnalyse.families self.__mongodbCollectionSandboxTaskQueue = self.__mongodbConnection.MalwareAnalyse.sandboxTaskQueue except TypeError: raise Exception("MongoDB connection port in report.config must be integer") except ConnectionFailure: raise Exception("Cannot connect to MongoDB (ragpicker)") if self.__codedbEnabled: #Anbindung an Datenbank MongoDB Collection CodeDB herstellen try: codedbHost = self.__cfgReporting.getOption("codeDB", "mongo_db_host") codedbPort = self.__cfgReporting.getOption("codeDB", "mongo_db_port") self.__codedbConnection = Connection(codedbHost, codedbPort) self.__codedbCollectionCodedb = self.__codedbConnection.MalwareAnalyse.codeDB except TypeError: raise Exception("MongoDB connection port for CodeDB in report.config must be integer") except ConnectionFailure: raise Exception("Cannot connect to MongoDB (codeDB)") def __del__(self): if self.__mongodbEnabled: self.__mongodbConnection.disconnect() if self.__codedbEnabled: self.__codedbConnection.disconnect() # ------------------------------------------------------------------------------ # Ragpicker Database (MongoDB) # ------------------------------------------------------------------------------ def isRagpickerDBEnabled(self): return self.__mongodbEnabled def getStatisticsAntivirus(self): queries = [] ret = [] queries.append({"product" : "Avast Antivirus", "findStr1" : "AntivirusScanAvast", "findStr2" : "AntivirusScanAvast.avast", "ok" : "OK"}) queries.append({"product" : "AVG Antivirus", "findStr1" : "AntivirusScanAvg", "findStr2" : "AntivirusScanAvg.Avg", "ok" : "OK"}) queries.append({"product" : "Avira", "findStr1" : "AntivirusScanAvira", "findStr2" : "AntivirusScanAvira.Avira.scan", "ok" : "OK"}) queries.append({"product" : "BitDefender", "findStr1" : "AntivirusScanBitDefender", "findStr2" : "AntivirusScanBitDefender.BitDefender", "ok" : "OK"}) queries.append({"product" : "ClamAV", "findStr1" : "AntivirusScanClamAv", "findStr2" : "AntivirusScanClamAv.ClamAv", "ok" : " OK"}) queries.append({"product" : "COMODO", "findStr1" : "AntivirusScanCOMODO", "findStr2" : "AntivirusScanCOMODO.COMODO", "ok" : "OK"}) queries.append({"product" : "ESET", "findStr1" : "AntivirusScanESET", "findStr2" : "AntivirusScanESET.ESET", "ok" : "OK"}) queries.append({"product" : "F-Prot", "findStr1" : "AntivirusScanFProt", "findStr2" : "AntivirusScanFProt.FProt", "ok" : "OK"}) queries.append({"product" : "F-Secure", "findStr1" : "AntivirusScanF-Secure", "findStr2" : "AntivirusScanF-Secure.F-Secure", "ok" : "OK"}) for q in queries: av = {} av["product"] = q.get("product") av["analyzed"] = str(self.__mongodbCollectionRagpicker.find({q.get("findStr1"): {"$ne":None}}).count()) av["notanalyzed"] = str(self.__mongodbCollectionRagpicker.find({q.get("findStr1") : None}).count()) av["malware"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{q.get("findStr1") : {"$ne":None}}, {q.get("findStr2") : {"$ne": q.get("ok")}}]}).count()) av["nonemalware"] = str(self.__mongodbCollectionRagpicker.find({q.get("findStr2"): q.get("ok")}).count()) if av.get("analyzed") != "0": av["rate"] = "{:.2f} %".format((float(av.get("malware"))/float(av.get("analyzed"))*100)) else: av["rate"] = "--" ret.append(av) return ret def getStatisticsNoneMalwareByAV(self): return self.__mongodbCollectionRagpicker.find({ "$and": [{ "$or": [{"AntivirusScanAvast.avast" : "OK"}, {"AntivirusScanAvast" : None}]}, { "$or": [{"AntivirusScanAvg.Avg" : "OK"}, {"AntivirusScanAvg" : None}]}, { "$or": [{"AntivirusScanAvira.Avira.scan" : "OK"}, {"AntivirusScanAvira" : None}]}, { "$or": [{"AntivirusScanBitDefender.BitDefender" : "OK"}, {"AntivirusScanBitDefender" : None}]}, { "$or": [{"AntivirusScanClamAv.ClamAv" : " OK"}, {"AntivirusScanClamAv" : None}]}, { "$or": [{"AntivirusScanCOMODO.COMODO" : "OK"}, {"AntivirusScanCOMODO" : None}]}, { "$or": [{"AntivirusScanESET.ESET" : "OK"}, {"AntivirusScanESET" : None}]}, { "$or": [{"AntivirusScanFProt.FProt" : "OK"}, {"AntivirusScanFProt" : None}]}, { "$or": [{"AntivirusScanF-Secure.F-Secure" : "OK"}, {"AntivirusScanF-Secure" : None}]}, {"VirusTotal.file.verbose_msg" : {"$ne":None}}]}).count() def getSamplesNotFoundByAV(self): return self.__mongodbCollectionRagpicker.find({ "$and": [{ "$or": [{"AntivirusScanAvast.avast" : "OK"}, {"AntivirusScanAvast" : None}]}, { "$or": [{"AntivirusScanAvg.Avg" : "OK"}, {"AntivirusScanAvg" : None}]}, { "$or": [{"AntivirusScanAvira.Avira.scan" : "OK"}, {"AntivirusScanAvira" : None}]}, { "$or": [{"AntivirusScanBitDefender.BitDefender" : "OK"}, {"AntivirusScanBitDefender" : None}]}, { "$or": [{"AntivirusScanClamAv.ClamAv" : " OK"}, {"AntivirusScanClamAv" : None}]}, { "$or": [{"AntivirusScanCOMODO.COMODO" : "OK"}, {"AntivirusScanCOMODO" : None}]}, { "$or": [{"AntivirusScanESET.ESET" : "OK"}, {"AntivirusScanESET" : None}]}, { "$or": [{"AntivirusScanFProt.FProt" : "OK"}, {"AntivirusScanFProt" : None}]}, { "$or": [{"AntivirusScanF-Secure.F-Secure" : "OK"}, {"AntivirusScanF-Secure" : None}]}, {"VirusTotal.file.verbose_msg" : {"$ne":None}} ]}, {"Info.file.sha256": True, "Info.analyse.started": True }) def getSamplesNotFoundByVT(self): return self.__mongodbCollectionRagpicker.find({"VirusTotal.file.verbose_msg" : {"$ne":None}}, {"Info.file.sha256": True, "Info.analyse.started": True }) def getSamplesNotFoundByLocalAV(self): return self.__mongodbCollectionRagpicker.find({ "$and": [{ "$or": [{"AntivirusScanAvast.avast" : "OK"}, {"AntivirusScanAvast" : None}]}, { "$or": [{"AntivirusScanAvg.Avg" : "OK"}, {"AntivirusScanAvg" : None}]}, { "$or": [{"AntivirusScanAvira.Avira.scan" : "OK"}, {"AntivirusScanAvira" : None}]}, { "$or": [{"AntivirusScanBitDefender.BitDefender" : "OK"}, {"AntivirusScanBitDefender" : None}]}, { "$or": [{"AntivirusScanClamAv.ClamAv" : " OK"}, {"AntivirusScanClamAv" : None}]}, { "$or": [{"AntivirusScanCOMODO.COMODO" : "OK"}, {"AntivirusScanCOMODO" : None}]}, { "$or": [{"AntivirusScanESET.ESET" : "OK"}, {"AntivirusScanESET" : None}]}, { "$or": [{"AntivirusScanFProt.FProt" : "OK"}, {"AntivirusScanFProt" : None}]}, { "$or": [{"AntivirusScanF-Secure.F-Secure" : "OK"}, {"AntivirusScanF-Secure" : None}]} ]}, {"Info.file.sha256": True, "Info.analyse.started": True }) def getStatisticsVirusTotal(self): ret = {} ret["analyzed"] = self.__mongodbCollectionRagpicker.find({"VirusTotal" : {"$ne":None}}).count() ret["notAnalyzed"] = self.__mongodbCollectionRagpicker.find({"VirusTotal" : None}).count() ret["samplesFound"] = self.__mongodbCollectionRagpicker.find({"VirusTotal.file.positives" : {"$ne":None}}).count() ret["SamplesNotFound"] = self.__mongodbCollectionRagpicker.find({"VirusTotal.file.verbose_msg" : {"$ne":None}}).count() return ret def getStatisticsPackerSignatures(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group' : {'_id' : { 'PublisherO': "$VerifySigs.PublisherO", 'Issuer': "$VerifySigs.Issuer" }, 'count' : { '$sum': 1 }}},{'$sort':{"count": -1}}]) def getStatisticsPackerCompiler(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group' : {'_id' : '$PEID', 'count' : { '$sum': 1 } }}, {'$sort':{'count': -1}}]) def getStatisticsPeCharacteristics(self): ret = {} ret["exe"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{"Info.file.EXE" : True}, {"Info.file.DLL" : False}, {"Info.file.DRIVER" : False}]}).count()) ret["dll"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{"Info.file.EXE" : False}, {"Info.file.DLL" : True}, {"Info.file.DRIVER" : False}]}).count()) ret["driver"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{"Info.file.EXE" : False}, {"Info.file.DLL" : False}, {"Info.file.DRIVER" : True}]}).count()) ret["noPe"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{"Info.file.EXE" : None}, {"Info.file.DLL" : None}, {"Info.file.DRIVER" : None}]}).count()) ret["dllDriver"] = str(self.__mongodbCollectionRagpicker.find({ "$and": [{"Info.file.EXE" : False}, {"Info.file.DLL" : True}, {"Info.file.DRIVER" : True}]}).count()) return ret def getFiletypes(self): return self.__mongodbCollectionRagpicker.aggregate([{ '$group' : {'_id' : '$Info.file.type', 'count' : { '$sum': 1 } }}, {'$sort':{'count': -1}}]) def countReportsRagpickerDB(self): return self.__mongodbCollectionRagpicker.find().count() def iterateRagpickerReports(self, sha256): for report in self.__mongodbCollectionRagpicker.find({'Info.file.sha256' : sha256}, {"_id" : 0}): yield report # Attention deletes the whole Ragpicker-Database!!! # returns number of deleted reports def deleteRagpickerDB(self): count = self.__mongodbCollectionRagpicker.find().count() # Alle Ragpicker-Daten aus der MongoDB loeschen self.__mongodbCollectionRagpicker.remove() return count #Insert Ragpicker-Report in MongoDB def insertRagpickerDB(self, report): # Store the report try: self.__mongodbCollectionRagpicker.insert(report) except InvalidDocument as e: log.exception("Error InvalidDocument: %s", report) raise Exception("Error InvalidDocument: {0}".format(e)) except InvalidStringData: self.__mongodbCollectionRagpicker.insert(convertDirtyDict2ASCII(report)) #Count Ragpicker-Reports by file (and url) def countRagpickerDB(self, file_md5, url_md5=None): if url_md5: query = { "$and" : [{ "Info.url.md5": { "$in": [url_md5] } }, { "Info.file.md5": { "$in": [file_md5] } }]} else: query = { "$and" : [{ "Info.file.md5": { "$in": [file_md5] } }]} return self.__mongodbCollectionRagpicker.find(query).count() # ------------------------------------------------------------------------------ # Ragpicker SandboxTaskQueue database (MongoDB) # ------------------------------------------------------------------------------ def insertSandboxTaskStatus(self, sandboxName, sha256, taskID, sampleID, taskState=None): statusReport = {"sandbox":sandboxName, "sha256":sha256, "sample_id":sampleID, "task_id":taskID, "task_state":taskState} # Store the SandboxTaskQueue-Status-report self.__mongodbCollectionSandboxTaskQueue.insert(statusReport) # Attention deletes the whole Ragpicker-SandboxTaskQueue-Database!!! # returns number of deleted reports def deleteSandboxTaskQueueDB(self): count = self.__mongodbCollectionSandboxTaskQueue.find().count() # Alle Daten aus der MongoDB loeschen self.__mongodbCollectionSandboxTaskQueue.remove() return count # ------------------------------------------------------------------------------ # Ragpicker families database (MongoDB) # ------------------------------------------------------------------------------ def insertFamily(self, familyReport): # Store the family-report self.__mongodbCollectionFamilies.insert(familyReport) #Count Ragpicker-Reports by file (and url) def countFamilyDB(self, parentObjectSHA256): query = { "$and" : [{ "parentObjectSHA256": { "$in": [parentObjectSHA256] } }]} return self.__mongodbCollectionFamilies.find(query).count() def iterateFamilyReports(self, sha256): for report in self.__mongodbCollectionFamilies.find({'parentObjectSHA256' : sha256}, {"_id" : 0}): yield report # Attention deletes the whole Ragpicker-Family-Database!!! # returns number of deleted reports def deleteFamilyDB(self): count = self.__mongodbCollectionFamilies.find().count() # Alle Ragpicker-Daten aus der MongoDB loeschen self.__mongodbCollectionFamilies.remove() return count # ------------------------------------------------------------------------------ # CodeDB Database (MongoDB) # ------------------------------------------------------------------------------ def isCodeDBEnabled(self): return self.__codedbEnabled def countReportsCodeDB(self): return self.__codedbCollectionCodedb.find().count() # Attention deletes the whole CodeDB-Database!!! # returns number of deleted reports def deleteCodeDB(self): count = self.__codedbCollectionCodedb.find().count() # Alle CodeDB-Reports aus der MongoDB loeschen self.__codedbCollectionCodedb.remove() return count #Count CodeDB-Reports by file sha256 def countCodeDB(self, file_sha256): return self.__codedbCollectionCodedb.find({ "sha256" : file_sha256}).count() #Insert CodeDB-Report in MongoDB def insertCodeDB(self, report): # Store the report try: self.__codedbCollectionCodedb.insert(report) except InvalidStringData: self.__codedbCollectionCodedb.insert(convertDirtyDict2ASCII(report))
def open_db(host, db): conn = Connection(host) try: yield conn[db] finally: conn.disconnect()