def testCode6():
    inicio = 0
    client = MongoClient(envget('files.host'), envget('files.port'))
    db = client[envget('db_files_name')]
    fs = gridfs.GridFS(db)
    res = fs.find(timeout=False).skip(inicio)
    lc = Launcher()
    count = inicio
    reset = 0
    start = time.time()
    first = True
    for f in res:
        sam_id = f.filename
        sample = Sample()
        sample.setID(sam_id)
        sample.setStorageVersion({})
        lc.launchAnalysisByID(sample)
        reset += 1
        count += 1
        if (reset >= 1000):
            print(
                time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time()))
                + " processed:" + str(count / 1000) + "K")
            reset = 0
    print(str(count) + " processed")
def testCode4():
    inicio = 10569000
    client = MongoClient(envget('files.host'), envget('files.port'))
    db = client[envget('db_files_name')]
    fs = gridfs.GridFS(db)
    res = fs.find(timeout=False).skip(inicio)
    lc = Launcher()
    count = inicio
    reset = 0
    for f in res:
        data = f.read()
        # print(f.filename,count)
        lc.launchFileAnalitics((f.filename, data))
        reset += 1
        count += 1
        if (reset >= 1000):
            print(str(count) + " processed")
            reset = 0
    print(str(count) + " processed")
def testCode4():
    inicio = 10569000
    client = MongoClient(envget('files.host'), envget('files.port'))
    db = client[envget('db_files_name')]
    fs = gridfs.GridFS(db)
    res = fs.find(timeout=False).skip(inicio)
    lc = Launcher()
    count = inicio
    reset = 0
    for f in res:
        data = f.read()
        # print(f.filename,count)
        lc.launchFileAnalitics((f.filename, data))
        reset += 1
        count += 1
        if(reset >= 1000):
            print(str(count) + " processed")
            reset = 0
    print(str(count) + " processed")
Beispiel #4
0
def searchFull(search, limit):
    # print("1")
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    # print("2")
    db = client[envget('db_metadata_name')]
    # print("3")
    coll_meta = db["db_metadata_collection"]
    # print("4")
    f1 = coll_meta.find(search).limit(limit)
    # print("5")
    l = []
    for f in f1:
        l.append(f)

    # print("6")
    ret = []
    for a in l:
        ret.append(str(a["file_id"]))
    # print("7")

    return ret
Beispiel #5
0
def searchFull(search, limit):
    # print("1")
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    # print("2")
    db = client[envget('db_metadata_name')]
    # print("3")
    coll_meta = db["db_metadata_collection"]
    # print("4")
    f1 = coll_meta.find(search).limit(limit)
    # print("5")
    l = []
    for f in f1:
        l.append(f)

    # print("6")
    ret = []
    for a in l:
        ret.append(str(a["file_id"]))
    # print("7")

    return ret
Beispiel #6
0
def searchFuzzy(fuzz, limit, thresh):
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    db = client[envget('db_metadata_name')]
    coll_meta = db["db_metadata_collection"]

    f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit)
    l = []
    for f in f1:
        l.append(f)

    ret = {}
    for a in l:
        res = -1
        try:
            res = ssdeep.compare(a["fuzzy_hash"], fuzz)
        except InternalError:
            print(str(res) + "------" +
                  str(a["fuzzy_hash"]) + "-----" + str(a["file_id"]))
            continue
        if(res >= thresh):
            ret[a["file_id"]] = res

    return ret
Beispiel #7
0
def searchFuzzy(fuzz, limit, thresh):
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    db = client[envget('db_metadata_name')]
    coll_meta = db["db_metadata_collection"]

    f1 = coll_meta.find({}, {"file_id": 1, "fuzzy_hash": 1}).limit(limit)
    l = []
    for f in f1:
        l.append(f)

    ret = {}
    for a in l:
        res = -1
        try:
            res = ssdeep.compare(a["fuzzy_hash"], fuzz)
        except InternalError:
            print(
                str(res) + "------" + str(a["fuzzy_hash"]) + "-----" +
                str(a["file_id"]))
            continue
        if (res >= thresh):
            ret[a["file_id"]] = res

    return ret
def testCode6():
    inicio = 0
    client = MongoClient(envget('files.host'), envget('files.port'))
    db = client[envget('db_files_name')]
    fs = gridfs.GridFS(db)
    res = fs.find(timeout=False).skip(inicio)
    lc = Launcher()
    count = inicio
    reset = 0
    start = time.time()
    first = True
    for f in res:
        sam_id = f.filename
        sample = Sample()
        sample.setID(sam_id)
        sample.setStorageVersion({})
        lc.launchAnalysisByID(sample)
        reset += 1
        count += 1
        if(reset >= 1000):
            print(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(time.time())
                                ) + " processed:" + str(count / 1000) + "K")
            reset = 0
    print(str(count) + " processed")
def get_tasks_on_queue(queue_name):
    q = Queue(queue_name, connection=Redis(host=envget('redis.host')))
    jobs = q.jobs
    tasks = []
    for job in jobs:
        task = {"date_enqueued": str(
            process_date(job.to_dict().get('enqueued_at')))}
        '''
        to_dict() returns something like this:
        {u'origin': u'task_no_vt', u'status': u'queued', u'description': u"Api.task.generic_task('N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YEC')", u'created_at': '2017-03-03T20:14:47Z', u'enqueued_at': '2017-03-03T20:14:47Z', u'timeout': 31536000, u'data': '\x80\x02(X\x15\x00\x00\x00Api.task.generic_taskq\x01NU(N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YECq\x02\x85q\x03}q\x04tq\x05.'}
        '''
        task_id = re.search('[A-Z0-9]{40}', job.to_dict().get('description'))
        if task_id is None:
            continue
        task['task_id'] = task_id.group(0)
        task['hashes'] = count_valid_hashes_in_task(task['task_id'])
        tasks.append(task)
    return tasks
Beispiel #10
0
def get_tasks_on_queue(queue_name):
    q = Queue(queue_name, connection=Redis(host=envget('redis.host')))
    jobs = q.jobs
    tasks = []
    for job in jobs:
        task = {
            "date_enqueued":
            str(process_date(job.to_dict().get('enqueued_at')))
        }
        '''
        to_dict() returns something like this:
        {u'origin': u'task_no_vt', u'status': u'queued', u'description': u"Api.task.generic_task('N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YEC')", u'created_at': '2017-03-03T20:14:47Z', u'enqueued_at': '2017-03-03T20:14:47Z', u'timeout': 31536000, u'data': '\x80\x02(X\x15\x00\x00\x00Api.task.generic_taskq\x01NU(N7UFZ56FQDITJ34F40TZB50XAWVNW575QGIL4YECq\x02\x85q\x03}q\x04tq\x05.'}
        '''
        task_id = re.search('[A-Z0-9]{40}', job.to_dict().get('description'))
        if task_id is None:
            continue
        task['task_id'] = task_id.group(0)
        task['hashes'] = count_valid_hashes_in_task(task['task_id'])
        tasks.append(task)
    return tasks
Beispiel #11
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
import pathmagic
from pymongo import MongoClient
from env import envget

client = MongoClient(envget('metadata.host'), envget('metadata.port'))
db = client[envget('db_metadata_name')]
coll_meta = db[envget('db_metadata_collection')]

query = {}
query["particular_header.file_entropy"] = {"$gt": 7.999}
res = coll_meta.find(query)
for e in res:
    print(("Found: %s") % (e,))
Beispiel #12
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
from pymongo import MongoClient
from env import envget

if 'client' not in globals():
    print "Creating connection pool..."
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    db = client[envget('db_metadata_name')]
    client_fs = MongoClient(envget('files.host'), envget('files.port'))
    db_fs = client_fs[envget('db_files_name')]
    client_ver = MongoClient(envget('versions.host'), envget('versions.port'))
    db_ver = client_ver[envget('db_versions_name')]
    if (envget('temporal_files_db')):
        client_temp = MongoClient(envget('temp_files.host'),
                                  envget('temp_files.port'))
        db_temp = client_temp[envget('db_temp_files_name')]
Beispiel #13
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
from pymongo import MongoClient
from env import envget

if 'client' not in globals():
    print "Creating connection pool..."
    client = MongoClient(envget('metadata.host'), envget('metadata.port'))
    db = client[envget('db_metadata_name')]
    client_fs = MongoClient(envget('files.host'), envget('files.port'))
    db_fs = client_fs[envget('db_files_name')]
    client_ver = MongoClient(envget('versions.host'), envget('versions.port'))
    db_ver = client_ver[envget('db_versions_name')]
    if(envget('temporal_files_db')):
        client_temp = MongoClient(
            envget('temp_files.host'), envget('temp_files.port'))
        db_temp = client_temp[envget('db_temp_files_name')]
Beispiel #14
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
import pathmagic
from pymongo import MongoClient
import gridfs
from env import envget

# file_id="906f21f436b0dbb2c9cf37b80a90cdeb061ced3d"
# file_id="109bf9de7b82ffd7b8194aa3741b5d42ee878ebb"
file_id = "6abec077e93226f4d9d9a5351092f3e0baef6d78"

client = MongoClient(envget('files.host'), envget('files.port'))
db = client[envget('db_files_name')]
fs = gridfs.GridFS(db)
f = fs.find_one({"filename": file_id})
if(f is None):
    print("File does not exist.")
    exit(0)
data = f.read()
fd = open(file_id, "w+")
fd.write(data)
fd.close()
print("File found")
Beispiel #15
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
import pathmagic
from pymongo import MongoClient
from env import envget

client = MongoClient(envget('metadata.host'), envget('metadata.port'))
db = client[envget('db_metadata_name')]
coll_meta = db[envget('db_metadata_collection')]

query = {}
query["particular_header.file_entropy"] = {"$gt": 7.999}
res = coll_meta.find(query)
for e in res:
    print(("Found: %s") % (e, ))
Beispiel #16
0
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
import pathmagic
from pymongo import MongoClient
import gridfs
from env import envget

# file_id="906f21f436b0dbb2c9cf37b80a90cdeb061ced3d"
# file_id="109bf9de7b82ffd7b8194aa3741b5d42ee878ebb"
file_id = "6abec077e93226f4d9d9a5351092f3e0baef6d78"

client = MongoClient(envget('files.host'), envget('files.port'))
db = client[envget('db_files_name')]
fs = gridfs.GridFS(db)
f = fs.find_one({"filename": file_id})
if (f is None):
    print("File does not exist.")
    exit(0)
data = f.read()
fd = open(file_id, "w+")
fd.write(data)
fd.close()
print("File found")
# Copyright (C) 2016 Deloitte Argentina.
# This file is part of CodexGigas - https://github.com/codexgigassys/
# See the file 'LICENSE' for copying permission.
from redis import Redis
from rq import Queue
import sys
from env import envget
qfail = Queue(sys.argv[1], connection=Redis(host=envget('redis.host')))
qfail.count
qfail.empty()