def testCode6(): inicio = 0 client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) res = fs.find(timeout=False).skip(inicio) lc = Launcher() count = inicio reset = 0 start = time.time() first = True for f in res: sam_id = f.filename sample = Sample() sample.setID(sam_id) sample.setStorageVersion({}) lc.launchAnalysisByID(sample) reset += 1 count += 1 if (reset >= 1000): print( time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())) + " processed:" + str(count / 1000) + "K") reset = 0 print(str(count) + " processed")
def testCode4(): inicio = 10569000 client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) res = fs.find(timeout=False).skip(inicio) lc = Launcher() count = inicio reset = 0 for f in res: data = f.read() # print(f.filename,count) lc.launchFileAnalitics((f.filename, data)) reset += 1 count += 1 if (reset >= 1000): print(str(count) + " processed") reset = 0 print(str(count) + " processed")
def testCode4(): inicio = 10569000 client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) res = fs.find(timeout=False).skip(inicio) lc = Launcher() count = inicio reset = 0 for f in res: data = f.read() # print(f.filename,count) lc.launchFileAnalitics((f.filename, data)) reset += 1 count += 1 if(reset >= 1000): print(str(count) + " processed") reset = 0 print(str(count) + " processed")
def searchFull(search, limit): # print("1") client = MongoClient(envget('metadata.host'), envget('metadata.port')) # print("2") db = client[envget('db_metadata_name')] # print("3") coll_meta = db["db_metadata_collection"] # print("4") f1 = coll_meta.find(search).limit(limit) # print("5") l = [] for f in f1: l.append(f) # print("6") ret = [] for a in l: ret.append(str(a["file_id"])) # print("7") return ret
def searchFuzzy(fuzz, limit, thresh): client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db["db_metadata_collection"] f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit) l = [] for f in f1: l.append(f) ret = {} for a in l: res = -1 try: res = ssdeep.compare(a["fuzzy_hash"], fuzz) except InternalError: print(str(res) + "------" + str(a["fuzzy_hash"]) + "-----" + str(a["file_id"])) continue if(res >= thresh): ret[a["file_id"]] = res return ret
def searchFuzzy(fuzz, limit, thresh): client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db["db_metadata_collection"] f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit) l = [] for f in f1: l.append(f) ret = {} for a in l: res = -1 try: res = ssdeep.compare(a["fuzzy_hash"], fuzz) except InternalError: print( str(res) + "------" + str(a["fuzzy_hash"]) + "-----" + str(a["file_id"])) continue if (res >= thresh): ret[a["file_id"]] = res return ret
def testCode6(): inicio = 0 client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) res = fs.find(timeout=False).skip(inicio) lc = Launcher() count = inicio reset = 0 start = time.time() first = True for f in res: sam_id = f.filename sample = Sample() sample.setID(sam_id) sample.setStorageVersion({}) lc.launchAnalysisByID(sample) reset += 1 count += 1 if(reset >= 1000): print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()) ) + " processed:" + str(count / 1000) + "K") reset = 0 print(str(count) + " processed")
def get_tasks_on_queue(queue_name): q = Queue(queue_name, connection=Redis(host=envget('redis.host'))) jobs = q.jobs tasks = [] for job in jobs: task = {"date_enqueued": str( process_date(job.to_dict().get('enqueued_at')))} ''' to_dict() returns something like this: {u'origin': u'task_no_vt', u'status': u'queued', u'description': u"Api.task.generic_task('N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YEC')", u'created_at': '2017-03-03T20:14:47Z', u'enqueued_at': '2017-03-03T20:14:47Z', u'timeout': 31536000, u'data': '\x80\x02(X\x15\x00\x00\x00Api.task.generic_taskq\x01NU(N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YECq\x02\x85q\x03}q\x04tq\x05.'} ''' task_id = re.search('[A-Z0-9]{40}', job.to_dict().get('description')) if task_id is None: continue task['task_id'] = task_id.group(0) task['hashes'] = count_valid_hashes_in_task(task['task_id']) tasks.append(task) return tasks
def get_tasks_on_queue(queue_name): q = Queue(queue_name, connection=Redis(host=envget('redis.host'))) jobs = q.jobs tasks = [] for job in jobs: task = { "date_enqueued": str(process_date(job.to_dict().get('enqueued_at'))) } ''' to_dict() returns something like this: {u'origin': u'task_no_vt', u'status': u'queued', u'description': u"Api.task.generic_task('N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YEC')", u'created_at': '2017-03-03T20:14:47Z', u'enqueued_at': '2017-03-03T20:14:47Z', u'timeout': 31536000, u'data': '\x80\x02(X\x15\x00\x00\x00Api.task.generic_taskq\x01NU(N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YECq\x02\x85q\x03}q\x04tq\x05.'} ''' task_id = re.search('[A-Z0-9]{40}', job.to_dict().get('description')) if task_id is None: continue task['task_id'] = task_id.group(0) task['hashes'] = count_valid_hashes_in_task(task['task_id']) tasks.append(task) return tasks
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. import pathmagic from pymongo import MongoClient from env import envget client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db[envget('db_metadata_collection')] query = {} query["particular_header.file_entropy"] = {"$gt": 7.999} res = coll_meta.find(query) for e in res: print(("Found: %s") % (e,))
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. from pymongo import MongoClient from env import envget if 'client' not in globals(): print "Creating connection pool..." client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] client_fs = MongoClient(envget('files.host'), envget('files.port')) db_fs = client_fs[envget('db_files_name')] client_ver = MongoClient(envget('versions.host'), envget('versions.port')) db_ver = client_ver[envget('db_versions_name')] if (envget('temporal_files_db')): client_temp = MongoClient(envget('temp_files.host'), envget('temp_files.port')) db_temp = client_temp[envget('db_temp_files_name')]
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. from pymongo import MongoClient from env import envget if 'client' not in globals(): print "Creating connection pool..." client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] client_fs = MongoClient(envget('files.host'), envget('files.port')) db_fs = client_fs[envget('db_files_name')] client_ver = MongoClient(envget('versions.host'), envget('versions.port')) db_ver = client_ver[envget('db_versions_name')] if(envget('temporal_files_db')): client_temp = MongoClient( envget('temp_files.host'), envget('temp_files.port')) db_temp = client_temp[envget('db_temp_files_name')]
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. import pathmagic from pymongo import MongoClient import gridfs from env import envget # file_id="906f21f436b0dbb2c9cf37b80a90cdeb061ced3d" # file_id="109bf9de7b82ffd7b8194aa3741b5d42ee878ebb" file_id = "6abec077e93226f4d9d9a5351092f3e0baef6d78" client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) f = fs.find_one({"filename": file_id}) if(f is None): print("File does not exist.") exit(0) data = f.read() fd = open(file_id, "w+") fd.write(data) fd.close() print("File found")
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. import pathmagic from pymongo import MongoClient from env import envget client = MongoClient(envget('metadata.host'), envget('metadata.port')) db = client[envget('db_metadata_name')] coll_meta = db[envget('db_metadata_collection')] query = {} query["particular_header.file_entropy"] = {"$gt": 7.999} res = coll_meta.find(query) for e in res: print(("Found: %s") % (e, ))
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. import pathmagic from pymongo import MongoClient import gridfs from env import envget # file_id="906f21f436b0dbb2c9cf37b80a90cdeb061ced3d" # file_id="109bf9de7b82ffd7b8194aa3741b5d42ee878ebb" file_id = "6abec077e93226f4d9d9a5351092f3e0baef6d78" client = MongoClient(envget('files.host'), envget('files.port')) db = client[envget('db_files_name')] fs = gridfs.GridFS(db) f = fs.find_one({"filename": file_id}) if (f is None): print("File does not exist.") exit(0) data = f.read() fd = open(file_id, "w+") fd.write(data) fd.close() print("File found")
# Copyright (C) 2016 Deloitte Argentina. # This file is part of CodexGigas - https://github.com/codexgigassys/ # See the file 'LICENSE' for copying permission. from redis import Redis from rq import Queue import sys from env import envget qfail = Queue(sys.argv[1], connection=Redis(host=envget('redis.host'))) qfail.count qfail.empty()