Python process_file Examples, Utils.Functions.process_file Python Examples

Example #1

0

Show file

def add_file():
    #tags = request.forms.get('name')
    upload = request.files.get('file')
    form_date = request.forms.get('file_date')
    try:  # validate
        process_date(form_date)
    except ValueError:
        #response.status = 422 #status can't be added because angular will not show the message.
        return jsonize({'message': 'Invalid date format'})
    logging.debug("add_file(). date=" + str(form_date))
    if form_date is None:
        form_date = datetime.datetime.now()
    name = upload.filename
    data_bin = upload.file.read()
    file_id = hashlib.sha1(data_bin).hexdigest()
    logging.debug("add_file(): file_id=" + str(file_id))
    status = upload_file(data_bin)
    process_file(file_id)  #ToDo: add a redis job
    update_date(file_id, form_date)
    if (status == "ok"):
        return jsonize({'message': 'Added with ' + str(file_id)})
    elif (status == "already exists"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    elif (status == "virustotal"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    else:
        return jsonize({'message': 'Error'})

Example #2

0

Show file

File: api2.py Project: codexgigassys/codex-backend

def add_file():
    # tags = request.forms.get('name')
    upload = request.files.get('file')
    form_date = request.forms.get('file_date')
    try:  # validate
        process_date(form_date)
    except ValueError:
        # response.status = 422 #status can't be added because angular will not
        # show the message.
        return jsonize({'message': 'Invalid date format'})
    logging.debug("add_file(). date=" + str(form_date))
    if form_date is None:
        form_date = datetime.datetime.now()
    name = upload.filename
    data_bin = upload.file.read()
    file_id = hashlib.sha1(data_bin).hexdigest()
    logging.debug("add_file(): file_id=" + str(file_id))
    status = upload_file(data_bin)
    process_file(file_id)  # ToDo: add a redis job
    update_date(file_id, form_date)
    if(status == "ok"):
        return jsonize({'message': 'Added with ' + str(file_id)})
    elif(status == "already exists"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    elif(status == "virustotal"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    else:
        return jsonize({'message': 'Error'})

Example #3

0

Show file

File: api2.py Project: Bijaye/codex-backend

def add_file():
    #tags = request.forms.get('name')
    upload = request.files.get('file')
    name = upload.filename
    data_bin=upload.file.read()
    file_id=hashlib.sha1(data_bin).hexdigest()
    print "file_id="+str(file_id)
    status=upload_file(data_bin)
    process_file(file_id) #ToDo: add a redis job
    if(status == "ok"):
        return jsonize({'message': 'Added with '+str(file_id)})
    elif(status == "already exists"):
        return jsonize({'message': 'Already exists '+str(file_id)})
    elif(status == "virustotal"):
        return jsonize({'message': 'Already exists '+str(file_id)})
    else:
        return jsonize({'message': 'Error'})

Example #4

0

Show file

def add_file():
    #tags = request.forms.get('name')
    upload = request.files.get('file')
    name = upload.filename
    data_bin = upload.file.read()
    file_id = hashlib.sha1(data_bin).hexdigest()
    print "file_id=" + str(file_id)
    status = upload_file(data_bin)
    process_file(file_id)  #ToDo: add a redis job
    if (status == "ok"):
        return jsonize({'message': 'Added with ' + str(file_id)})
    elif (status == "already exists"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    elif (status == "virustotal"):
        return jsonize({'message': 'Already exists ' + str(file_id)})
    else:
        return jsonize({'message': 'Error'})

Example #5

0

Show file

File: virusTotalApi.py Project: codexgigassys/codex-backend

def save_file_from_vt(hash_id):
    downloaded_file = download_from_virus_total(hash_id)
    if(downloaded_file is None):
        return {"status": "unknown", "hash": None}
    if downloaded_file.get('status') == "out_of_credits":
        return {"status": "out_of_credits", "hash": None}
    if downloaded_file.get('status') == "not_found":
        return {"status": "not_found", "hash": None}
    if downloaded_file.get('status') == 'ok':
        data_bin = downloaded_file.get('file')
        file_id = hashlib.sha1(data_bin).hexdigest()
        pc = PackageController()
        res = pc.searchFile(file_id)
        if(res is None):  # File not found. Add it to the package.
            pc.append(file_id, data_bin, True)
            return {"status": "added", "hash": file_id}
        else:
            process_file(file_id)
            return {"status": "inconsistency_found", "hash": file_id}

Example #6

0

Show file

File: process_hash.py Project: Bijaye/codex-backend

def generic_process_hash(hash_str):
    hash_str = clean_hash(hash_str)
    if(not valid_hash(hash_str)):
        return None
    if(len(hash_str)==32):
        hash_str=get_file_id(hash_str)
    if(hash_str is not None):
        return process_file(hash_str)
    else :
        return None

Example #7

0

Show file

def generic_process_hash(hash_str):
    hash_str = clean_hash(hash_str)
    if (not valid_hash(hash_str)):
        return None
    if (len(hash_str) == 32):
        hash_str = get_file_id(hash_str)
    if (hash_str is not None):
        return process_file(hash_str)
    else:
        return None

Example #8

0

Show file

File: virusTotalApi.py Project: thereal-configmgr/codex-backend

def save_file_from_vt(hash_id):
    downloaded_file=download_from_virus_total(hash_id)
    if(downloaded_file==None):
        return {"status": "unknown", "hash": None}
    if downloaded_file.get('status') == "out_of_credits":
        return {"status": "out_of_credits", "hash": None}
    if downloaded_file.get('status') == "not_found":
        return {"status": "not_found", "hash": None}
    if downloaded_file.get('status') == 'ok':
        data_bin=downloaded_file.get('file')
        file_id=hashlib.sha1(data_bin).hexdigest()
        pc=PackageController()
        res=pc.searchFile(file_id)
        if(res==None): # File not found. Add it to the package.
            pc.append(file_id,data_bin,True)
            return {"status": "added", "hash": file_id}
        else:
            process_file(file_id)
            return {"status": "inconsistency_found", "hash": file_id}

Example #9

0

Show file

File: SearchModule.py Project: threatinteltest/codex-backend

def add_file_from_vt(hash_id):
    downloaded_file=download_from_virus_total(hash_id)
    if(downloaded_file==None):
        print "add_file_from_vt(): "+str(hash_id)+" not found in VT."
        return None

    print "add_file_from_vt(): downloaded_file is not None."+str(hash_id)
    data_bin=downloaded_file
    file_id=hashlib.sha1(data_bin).hexdigest()
    #print "file_id="+str(file_id)
    pc=PackageController()
    res=pc.searchFile(file_id)
    if(res==None): # File not found. Add it to the package.
        pc.append(file_id,data_bin,True)
        print str(hash_id)+" added to DB from VT."
        #print("Added: %s" % (file_id,))
    else:
        print "add_file_from_vt(): "+str(hash_id)+" was found in the DB and asked in VT: BUG. Going to process right now."
        process_file(file_id)
    return file_id

Example #10

0

Show file

File: api2.py Project: Bijaye/codex-backend

def api_process_file():
    file_hash=clean_hash(request.query.file_hash)
    if len(file_hash) != 40:
        response.code = 400
        return jsonize({'message':'Invalid hash format (use sha1)'})
    
    res=process_file(file_hash)
    if res==None:
        response.code = 404
        return jsonize("File not found in the database")
    
    return jsonize("File processed")

Example #11

0

Show file

def api_process_file():
    file_hash = clean_hash(request.query.file_hash)
    if len(file_hash) != 40:
        response.code = 400
        return jsonize({'message': 'Invalid hash format (use sha1)'})

    res = process_file(file_hash)
    if res == None:
        response.code = 404
        return jsonize("File not found in the database")

    return jsonize("File processed")

Example #12

0

Show file

File: 2016-12-22-2045-process_files_without_version.py Project: codexgigassys/codex-backend

def main():
    collection_version = db["version_container"]
    collection_meta = db[envget('db_metadata_collection')]
    start = 0
    count = 0
    test = 0
    mis = 0
    print_flag = 1000000
    res = collection_meta.find(
        {}, {"file_id": 1}, no_cursor_timeout=True).skip(start)
    for r in res:
        count += 1
        test += 1
        file_id = r.get('file_id')
        if not check_if_has_version(file_id, collection_version):
            mis += 1
            process_file(file_id)
        if(test >= print_flag):
            test = 0
            print "count-miss," + str(count) + "," + str(mis)
    print "count-miss," + str(count) + "," + str(mis)

Example #13

0

Show file

File: 2016-12-22-2045-process_files_without_version.py Project: thereal-configmgr/codex-backend

def main():
    collection_version = db["version_container"]
    collection_meta = db[env["db_metadata_collection"]]
    start = 0
    count = 0
    test = 0
    mis = 0
    print_flag = 1000000
    res = collection_meta.find({}, {
        "file_id": 1
    }, no_cursor_timeout=True).skip(start)
    for r in res:
        count += 1
        test += 1
        file_id = r.get('file_id')
        if not check_if_has_version(file_id, collection_version):
            mis += 1
            process_file(file_id)
        if (test >= print_flag):
            test = 0
            print "count-miss," + str(count) + "," + str(mis)
    print "count-miss," + str(count) + "," + str(mis)

Example #14

0

Show file

File: process_hash.py Project: codexgigassys/codex-backend

def generic_process_hash(hash_str):
    if hash_str is None:
        return None
    hash_str = clean_hash(hash_str)
    if(not valid_hash(hash_str)):
        return None
    if(len(hash_str) == 64):
        hash_str = get_file_id(hash_str)
    elif(len(hash_str) == 32):
        pc = PackageController()
        hash_str = pc.md5_to_sha1(hash_str)
        logging.debug("generic_process_hash-->sha1: " + str(hash_str))
    if(hash_str is not None):
        return process_file(hash_str)
    else:
        return None

Example #15

0

Show file

File: process_hash.py Project: youngjun-chang/codex-backend

def generic_process_hash(hash_str):
    if hash_str is None:
        return None
    hash_str = clean_hash(hash_str)
    if (not valid_hash(hash_str)):
        return None
    if (len(hash_str) == 64):
        hash_str = get_file_id(hash_str)
    elif (len(hash_str) == 32):
        pc = PackageController()
        hash_str = pc.md5_to_sha1(hash_str)
        logging.debug("generic_process_hash-->sha1: " + str(hash_str))
    if (hash_str is not None):
        return process_file(hash_str)
    else:
        return None

Example #16

0

Show file

File: SearchModule.py Project: codexgigassys/codex-backend

def search_by_id(data, limit, columns=[], search_on_vt=False):
    # date - mime - packager are needed for stats
    if(len(columns) == 0):
        retrieve = {"file_id": 1, "description": 1, "size": 1,
                    "mime_type": 1, "particular_header.packer_detection": 1, "particular_header.headers.file_header.TimeDateStamp": 1}
    else:
        retrieve = {"file_id": 1, "description": 1,
                    "mime_type": 1, "particular_header.packer_detection": 1, "particular_header.headers.file_header.TimeDateStamp": 1}
        for col in columns:
            dic = tree_menu.ids[int(col)]
            path = str(dic["path"])
            retrieve[path] = 1

    search_list = data.split('&')
    query_list = []
    av_collection_query_list = []
    hash_search = False
    hash_for_search = ""
    for search in search_list:
        if '=' not in search:
            logging.warning("= not in search. search=" + str(search))
            continue
        str_id, str_value = search.split('=')
        id = int(str_id.split('.')[0])
        if(id <= 0):
            id = 0
        if str_value == "":
            continue
        p, v = translate_id(id, str_value)
        if (id == 10 or id == 11 or id == 21):
            res = fuzz_search_fast(id, p, v)
            return res
        if(id == 1 or id == 2 or id == 3):
            hash_search = True
            hash_for_search = v
        if(id >= 10000):   # for adding AVs searchs
            av_collection_query_list.append(
                {p: {"$regex": v, "$options": 'i'}})
            continue
        query_list.append({p: v})

    if(len(query_list) > 0 and len(av_collection_query_list) == 0):
        query = {"$and": query_list}
        res = searchFull(query, limit, retrieve)
        key_manager = KeyManager()
        # searching in VT.
        if(hash_search and len(res) == 0 and search_on_vt and key_manager.check_private_key()):
            logging.debug("search_by_id() -> save_file_from_vt()")
            add_response = save_file_from_vt(hash_for_search)
            sha1 = add_response.get('hash')
            if sha1 is None:
                return []
            process_file(sha1)
            query = {"file_id": sha1}
            res = searchFull(query, 1, retrieve)
        return res

    # if the user seachs only for AV_signature and date.
    # Because VT antivirus analysis are on a seperate collection, we used to
    # search AV signature first, collection the hashes, and then search hash by hash
    # to see if the other restrictions in the query match the hash contents in meta_container.
    # (basically split the query in two). The problem with this began when the av_anaylsis collection
    # started to grow. Possible solutions are, query the av_analysis collection with a count(), then the
    # meta_container also with a count(), and search first the collection with the lower number.
    # This will improve performance a little. Other way was to search both queries and intersect the hashes.
    # Currently, VT antivirus analysis is in a seperate collection because mongo limits the number of indexes
    # to 64. If this limit is removed, then av_analysis and meta_container should merge.
    # Meanwhile, we can get a decent performance for a small query with only
    # date and av_signature.
    if(len(query_list) == 1 and len(av_collection_query_list) > 0 and query_list[0].get('date') is not None):
        query_list.extend(av_collection_query_list)
        query = {"$and": query_list}
        retrieve['sha1'] = 1
        retrieve.pop('description', None)
        retrieve.pop('mime_type', None)
        retrieve.pop('file_id', None)
        retrieve.pop(
            'particular_header.headers.file_header.TimeDateStamp', None)
        retrieve.pop('particular_header.packer_detection', None)
        return searchFull(query, limit, retrieve, "av_analysis")

    if(len(av_collection_query_list) > 0):
        av_query = {"$and": av_collection_query_list}

    # res= ["2fa9672b7507f0e983897cfd18b24d3810bb2160","hashfile2"]

    if(len(av_collection_query_list) == 0):
        return []
    else:
        # do AV search
        db_collection = envget('db_metadata_collection')
        av_coll = db.av_analysis

        if limit == 0:
            av_res = av_coll.find(av_query, {"sha1": 1})
        else:
            av_res = av_coll.find(av_query, {"sha1": 1}).limit(limit*2)
        lista_av = []
        for f in av_res:
            lista_av.append(f)

        res = []
        for l in lista_av:
            query_list_for_combinated = []
            sha1 = l.get("sha1")
            query_list_for_combinated.append({"hash.sha1": sha1})
            query_list_for_combinated = query_list_for_combinated + query_list
            query = {"$and": query_list_for_combinated}
            search = searchFull(query, 1, retrieve)
            res = res + search
        if(limit > 0):
            return res[0:limit]
        else:
            return res

Example #17

0

Show file

File: api2.py Project: codexgigassys/codex-backend

def api_batch_process_debug_file():
    yield "<html><body><pre>"
    yield "Running Batch process\n"
    file_hashes = request.forms.get('file_hash')
    if file_hashes is None:
        response.status = 422
        logging.debug("api_batch_process_debug_file(): file_hash is missing")
        yield "file_hash parameter is missing"

    # transform file_hashes in a list of hashes.
    not_found = []
    added_to_queue = 0
    downloaded_from_vt = 0
    for hash_id in file_hashes.split("\n"):
        hash_id = clean_hash(hash_id)
        if hash_id is None:
            continue
        data = "1=" + hash_id
        if(len(hash_id) == 40 or len(hash_id) == 32):
            pc = PackageController()
            res = pc.getFile(hash_id)
            if res is not None and len(SearchModule.search_by_id(data, 1, [], False)) == 0:
                logging.debug("Processing right now: " + str(hash_id))
                process_file(hash_id)
                if(envget('auto_get_av_result')):
                    add_task_to_download_av_result(hash_id)
                    continue
        res = SearchModule.search_by_id(data, 1, [], False)
        if(len(res) == 0):
            legging.debug("process_debug(): metadata of " + str(hash_id) +
                          " was not found. We will look in Pc. hash length: " + str(len(hash_id)))
            if(len(hash_id) == 40 or len(hash_id) == 32):
                pc = PackageController()
                res = pc.getFile(hash_id)
                if res is not None:
                    logging.debug(
                        "process_debug(): hash was found (" + str(hash_id) + ")")
                else:
                    logging.debug(
                        "process_debug(): hash was not found(" + str(hash_id) + ")")
            logging.debug("process_debug():")
            logging.debug("process_debug(): going to search " +
                          str(hash_id) + " in vt")
            add_response = SearchModule.add_file_from_vt(hash_id)
            sha1 = add_response.get('hash')
            if(sha1 is None):
                logging.debug("process_debug(): sha1 is None: " + str(hash_id))
                not_found.append(hash_id)
                continue
            else:
                downloaded_from_vt += 1
        else:
            sha1 = res[0]["sha1"]

        added_to_queue += 1
        add_hash_to_process_queue(sha1)
        if(envget('auto_get_av_result')):
            add_task_to_download_av_result(sha1)
        yield str(sha1) + "\n"

    responsex = str(added_to_queue) + " files added to the process queue.\n"
    if(downloaded_from_vt > 0):
        responsex += str(downloaded_from_vt) + " new hashes.\n"
    if(len(not_found) != 0):
        responsex += str(len(not_found)) + " hashes not found.\n"
        responsex += "Not Found:\n"
        for aux in not_found:
            responsex = responsex + str(aux) + "\n"
    yield responsex
    yield "END"

Example #18

0

Show file

File: SearchModule.py Project: threatinteltest/codex-backend

def search_by_id(data,limit,columns=[],search_on_vt=False):
    #date - mime - packager are needed for stats
    if(len(columns)==0):
        retrieve={"file_id":1,"description":1,"size":1,
                "mime_type":1,"particular_header.packer_detection":1,"particular_header.headers.file_header.TimeDateStamp":1}
    else:
        retrieve={"file_id":1,"description":1,
                "mime_type":1,"particular_header.packer_detection":1,"particular_header.headers.file_header.TimeDateStamp":1}
        for col in columns:
            dic=tree_menu.ids[int(col)]
            path=str(dic["path"])
            retrieve[path]=1

    search_list=data.split('&')
    #print(len(search_list))
    query_list=[]
    av_collection_query_list=[]
    hash_search=False
    hash_for_search=""
    for search in search_list:
        #print(search)
        str_id,str_value=search.split('=')
        id=int(str_id.split('.')[0])
        if(id<=0):
            id=0
        if str_value=="":
            continue
        p,v=translate_id(id,str_value)
        if (id==10 or id==11 or id==21):
            res=fuzz_search_fast(id,p,v)
            return res
        if(id==1 or id==2 or id==3):
            hash_search=True
            hash_for_search=v
        if(id>=10000):   # for adding AVs searchs
            av_collection_query_list.append({p:{"$regex":v,"$options":'i'}})
            continue
        query_list.append({p:v})

    if(len(query_list)>0 and len(av_collection_query_list)==0):
        query={"$and":query_list}
        res=searchFull(query,limit,retrieve)
        if(hash_search and len(res)==0 and search_on_vt):# searching in VT.
            print "search_by_id() -> add_file_from_vt()"
            sha1=add_file_from_vt(hash_for_search)
            if sha1==None: return []
            process_file(sha1)
            query={"file_id":sha1}
            res=searchFull(query,1,retrieve)
        return res

    if(len(av_collection_query_list)>0):
        av_query={"$and":av_collection_query_list}

    #res= ["2fa9672b7507f0e983897cfd18b24d3810bb2160","hashfile2"]

    if(len(av_collection_query_list)==0):
        return []
    else:
        # do AV search
        db_collection = env["db_metadata_collection"]
        av_coll=db.av_analysis

        if limit==0:
            av_res=av_coll.find(av_query,{"sha1":1})
        else:
            av_res=av_coll.find(av_query,{"sha1":1}).limit(limit)
        lista_av=[]
        for f in av_res:
            lista_av.append(f)
        #print(lista_av)# results of AV searchs

        res=[]
        for l in lista_av:
            query_list_for_combinated=[]
            sha1=l.get("sha1")
            query_list_for_combinated.append({"hash.sha1":sha1})
            query_list_for_combinated=query_list_for_combinated+query_list
            query={"$and":query_list_for_combinated}
            search=searchFull(query,1,retrieve)
            res=res+search
        if(limit > 0):
            return res[0:limit]
        else:
            return res

Example #19

0

Show file

File: SearchModule.py Project: thereal-configmgr/codex-backend

def search_by_id(data, limit, columns=[], search_on_vt=False):
    #date - mime - packager are needed for stats
    if (len(columns) == 0):
        retrieve = {
            "file_id": 1,
            "description": 1,
            "size": 1,
            "mime_type": 1,
            "particular_header.packer_detection": 1,
            "particular_header.headers.file_header.TimeDateStamp": 1
        }
    else:
        retrieve = {
            "file_id": 1,
            "description": 1,
            "mime_type": 1,
            "particular_header.packer_detection": 1,
            "particular_header.headers.file_header.TimeDateStamp": 1
        }
        for col in columns:
            dic = tree_menu.ids[int(col)]
            path = str(dic["path"])
            retrieve[path] = 1

    search_list = data.split('&')
    query_list = []
    av_collection_query_list = []
    hash_search = False
    hash_for_search = ""
    for search in search_list:
        if '=' not in search:
            logging.warning("= not in search. search=" + str(search))
            continue
        str_id, str_value = search.split('=')
        id = int(str_id.split('.')[0])
        if (id <= 0):
            id = 0
        if str_value == "":
            continue
        p, v = translate_id(id, str_value)
        if (id == 10 or id == 11 or id == 21):
            res = fuzz_search_fast(id, p, v)
            return res
        if (id == 1 or id == 2 or id == 3):
            hash_search = True
            hash_for_search = v
        if (id >= 10000):  # for adding AVs searchs
            av_collection_query_list.append(
                {p: {
                    "$regex": v,
                    "$options": 'i'
                }})
            continue
        query_list.append({p: v})

    if (len(query_list) > 0 and len(av_collection_query_list) == 0):
        query = {"$and": query_list}
        res = searchFull(query, limit, retrieve)
        key_manager = KeyManager()
        if (hash_search and len(res) == 0 and search_on_vt
                and key_manager.check_private_key()):  # searching in VT.
            logging.debug("search_by_id() -> save_file_from_vt()")
            add_response = save_file_from_vt(hash_for_search)
            sha1 = add_response.get('hash')
            if sha1 == None:
                return []
            process_file(sha1)
            query = {"file_id": sha1}
            res = searchFull(query, 1, retrieve)
        return res

    # if the user seachs only for AV_signature and date.
    # Because VT antivirus analysis are on a seperate collection, we used to
    # search AV signature first, collection the hashes, and then search hash by hash
    # to see if the other restrictions in the query match the hash contents in meta_container.
    # (basically split the query in two). The problem with this began when the av_anaylsis collection
    # started to grow. Possible solutions are, query the av_analysis collection with a count(), then the
    # meta_container also with a count(), and search first the collection with the lower number.
    # This will improve performance a little. Other way was to search both queries and intersect the hashes.
    # Currently, VT antivirus analysis is in a seperate collection because mongo limits the number of indexes
    # to 64. If this limit is removed, then av_analysis and meta_container should merge.
    # Meanwhile, we can get a decent performance for a small query with only date and av_signature.
    if (len(query_list) == 1 and len(av_collection_query_list) > 0
            and query_list[0].get('date') is not None):
        query_list.extend(av_collection_query_list)
        query = {"$and": query_list}
        retrieve['sha1'] = 1
        retrieve.pop('description', None)
        retrieve.pop('mime_type', None)
        retrieve.pop('file_id', None)
        retrieve.pop('particular_header.headers.file_header.TimeDateStamp',
                     None)
        retrieve.pop('particular_header.packer_detection', None)
        return searchFull(query, limit, retrieve, "av_analysis")

    if (len(av_collection_query_list) > 0):
        av_query = {"$and": av_collection_query_list}

    #res= ["2fa9672b7507f0e983897cfd18b24d3810bb2160","hashfile2"]

    if (len(av_collection_query_list) == 0):
        return []
    else:
        # do AV search
        db_collection = env["db_metadata_collection"]
        av_coll = db.av_analysis

        if limit == 0:
            av_res = av_coll.find(av_query, {"sha1": 1})
        else:
            av_res = av_coll.find(av_query, {"sha1": 1}).limit(limit)
        lista_av = []
        for f in av_res:
            lista_av.append(f)

        res = []
        for l in lista_av:
            query_list_for_combinated = []
            sha1 = l.get("sha1")
            query_list_for_combinated.append({"hash.sha1": sha1})
            query_list_for_combinated = query_list_for_combinated + query_list
            query = {"$and": query_list_for_combinated}
            search = searchFull(query, 1, retrieve)
            res = res + search
        if (limit > 0):
            return res[0:limit]
        else:
            return res

Example #20

0

Show file

def api_batch_process_debug_file():
    yield "<html><body><pre>"
    yield "Running Batch process\n"
    file_hashes = request.forms.get('file_hash')
    if file_hashes is None:
        response.status = 422
        logging.debug("api_batch_process_debug_file(): file_hash is missing")
        yield "file_hash parameter is missing"

    # transform file_hashes in a list of hashes.
    not_found = []
    added_to_queue = 0
    downloaded_from_vt = 0
    for hash_id in file_hashes.split("\n"):
        hash_id = clean_hash(hash_id)
        if hash_id is None:
            continue
        data = "1=" + hash_id
        if (len(hash_id) == 40 or len(hash_id) == 32):
            pc = PackageController()
            res = pc.getFile(hash_id)
            if res is not None and len(
                    SearchModule.search_by_id(data, 1, [], False)) == 0:
                logging.debug("Processing right now: " + str(hash_id))
                process_file(hash_id)
                if (env['auto_get_av_result']):
                    add_task_to_download_av_result(hash_id)
                    continue
        res = SearchModule.search_by_id(data, 1, [], False)
        if (len(res) == 0):
            legging.debug("process_debug(): metadata of " + str(hash_id) +
                          " was not found. We will look in Pc. hash length: " +
                          str(len(hash_id)))
            if (len(hash_id) == 40 or len(hash_id) == 32):
                pc = PackageController()
                res = pc.getFile(hash_id)
                if res is not None:
                    logging.debug("process_debug(): hash was found (" +
                                  str(hash_id) + ")")
                else:
                    logging.debug("process_debug(): hash was not found(" +
                                  str(hash_id) + ")")
            logging.debug("process_debug():")
            logging.debug("process_debug(): going to search " + str(hash_id) +
                          " in vt")
            add_response = SearchModule.add_file_from_vt(hash_id)
            sha1 = add_response.get('hash')
            if (sha1 == None):
                logging.debug("process_debug(): sha1 is None: " + str(hash_id))
                not_found.append(hash_id)
                continue
            else:
                downloaded_from_vt += 1
        else:
            sha1 = res[0]["sha1"]

        added_to_queue += 1
        add_hash_to_process_queue(sha1)
        if (env['auto_get_av_result']):
            add_task_to_download_av_result(sha1)
        yield str(sha1) + "\n"

    responsex = str(added_to_queue) + " files added to the process queue.\n"
    if (downloaded_from_vt > 0):
        responsex += str(downloaded_from_vt) + " new hashes.\n"
    if (len(not_found) != 0):
        responsex += str(len(not_found)) + " hashes not found.\n"
        responsex += "Not Found:\n"
        for aux in not_found:
            responsex = responsex + str(aux) + "\n"
    yield responsex
    yield "END"