Example #1
0
def get_result_from_av():
    hash_id=request.query.file_hash
    if len(hash_id) == 0:
        response.code = 400
        return jsonize({'error': 4, 'error_message':'file_hash parameter is missing.'})
    hash_id=clean_hash(hash_id)
    if not valid_hash(hash_id):
        return jsonize({'error': 5, 'error_message':'Invalid hash format.'})
    if(len(hash_id)!=40):
        data="1="+str(hash_id)
        res=SearchModule.search_by_id(data,1,[],True)
        if(len(res)==0):
            response.code = 400
            return jsonize({'error': 6, 'error_message':'File not found'})
        else:
            sha1=res[0]["sha1"]
    else:
        sha1=hash_id
    if(vt_key()):
        av_result=get_av_result(sha1)
    else:
        return jsonize({'error': 7, "error_message": "Error: VirusTotal API key missing from secrets.py file"})
    if(av_result==None):
        return jsonize({"error": 8, "error_message": "Cannot get analysis (hash not found in VT? out of credits?)"})
    return jsonize({"message": "AV scans downloaded."})
Example #2
0
def api_batch_process_file():
    print("Running Batch process")
    file_hashes = request.forms.get('file_hash')
    #print(dir(request.forms))
    #print(request.forms.keys())
    #transformar file_hashes a una lista de hashes
    not_found = []
    added_to_queue = 0
    downloaded_from_vt = 0
    for hash_id in file_hashes.split("\n"):
        hash_id = clean_hash(hash_id)
        if hash_id is None:
            continue
        data = "1=" + str(hash_id)
        res = SearchModule.search_by_id(data, 1, [], True)
        if (len(res) == 0):
            not_found.append(hash_id)
            continue
            """
            print "downloading "+str(hash_id)+" from vt"
            sha1=SearchModule.add_file_from_vt(hash_id)
            if(sha1==None):
                print "not found on vt: "+str(hash_id)
                not_found.append(hash_id)
                continue
            else:
                downloaded_from_vt+=1
            """
        else:
            sha1 = res[0]["sha1"]

        added_to_queue += 1
        print str(hash_id) + " added to queue"
        add_hash_to_process_queue(sha1)
        if (env['auto_get_av_result']):
            get_av_result(sha1)

    responsex = str(added_to_queue) + " files added to the process queue.\n"
    if (downloaded_from_vt > 0):
        responsex += str(downloaded_from_vt) + " new hashes.\n"
    if (len(not_found) != 0):
        responsex += str(len(not_found)) + " hashes not found.\n"
        responsex += "Not Found:\n"
        for aux in not_found:
            responsex = responsex + str(aux) + "\n"

    return jsonize({"message": responsex})
Example #3
0
def get_result_from_av():
    file_hash=clean_hash(request.query.file_hash)
    if len(file_hash) != 40:
        response.code = 400
        return jsonize({'message':'Invalid hash format (use sha1)'})
    
    av_result=get_av_result(file_hash)
    if(av_result==None): return jsonize("Can not get analysis")
    
    return jsonize("File processed")
Example #4
0
def get_result_from_av():
    file_hash = clean_hash(request.query.file_hash)
    if len(file_hash) != 40:
        response.code = 400
        return jsonize({'message': 'Invalid hash format (use sha1)'})

    av_result = get_av_result(file_hash)
    if (av_result == None): return jsonize("Can not get analysis")

    return jsonize("File processed")
Example #5
0
def api_batch_process_debug_file():
    yield "<html><body><pre>"
    yield "Running Batch process\n"
    file_hashes = request.forms.get('file_hash')
    #print(dir(request.forms))
    #print(request.forms.keys())
    #transformar file_hashes a una lista de hashes
    not_found = []
    added_to_queue = 0
    downloaded_from_vt = 0
    for hash_id in file_hashes.split("\n"):
        hash_id = clean_hash(hash_id)
        if hash_id is None:
            continue
        data = "1=" + hash_id
        res = SearchModule.search_by_id(data, 1, [], False)
        if (len(res) == 0):
            print "downloading " + str(hash_id) + " from vt"
            sha1 = SearchModule.add_file_from_vt(hash_id)
            if (sha1 == None):
                not_found.append(hash_id)
                continue
            else:
                downloaded_from_vt += 1
        else:
            sha1 = res[0]["sha1"]

        added_to_queue += 1
        add_hash_to_process_queue(sha1)
        if (env['auto_get_av_result']):
            get_av_result(sha1)
        yield str(sha1) + "\n"

    responsex = str(added_to_queue) + " files added to the process queue.\n"
    if (downloaded_from_vt > 0):
        responsex += str(downloaded_from_vt) + " new hashes.\n"
    if (len(not_found) != 0):
        responsex += str(len(not_found)) + " hashes not found.\n"
        responsex += "Not Found:\n"
        for aux in not_found:
            responsex = responsex + str(aux) + "\n"
    yield responsex
    yield "END"
Example #6
0
def get_result_from_av():
    hash_id = request.query.file_hash
    if len(hash_id) == 0:
        response.status = 400
        return jsonize({
            'error': 4,
            'error_message': 'file_hash parameter is missing.'
        })
    hash_id = clean_hash(hash_id)
    if not valid_hash(hash_id):
        return jsonize({'error': 5, 'error_message': 'Invalid hash format.'})
    if (len(hash_id) != 40):
        data = "1=" + str(hash_id)
        res = SearchModule.search_by_id(data, 1, [], True)
        if (len(res) == 0):
            response.status = 400
            return jsonize({'error': 6, 'error_message': 'File not found'})
        else:
            sha1 = res[0]["sha1"]
    else:
        sha1 = hash_id
    key_manager = KeyManager()

    if (key_manager.check_keys_in_secrets()):
        av_result = get_av_result(sha1, 'high')
    else:
        return jsonize({
            'error':
            7,
            "error_message":
            "Error: VirusTotal API key missing from secrets.py file"
        })
    if (av_result.get('status') == "added"):
        return jsonize({"message": "AV scans downloaded."})
    elif (av_result.get('status') == "already_had_it"):
        return jsonize({"message": "File already have AV scans."})
    elif (av_result.get('status') == "not_found"):
        return jsonize({"error": 10, "error_message": "Not found on VT."})
    elif (av_result.get('status') == "no_key_available"):
        return jsonize({
            "error":
            11,
            "error_message":
            "No key available right now. Please try again later."
        })
    else:
        logging.error("av_result for hash=" + str(sha1))
        logging.error("av_result=" + str(av_result))
        return jsonize({"error": 9, "error_message": "Cannot get analysis."})
Example #7
0
def get_result_from_av():
    hash_id = request.query.file_hash
    if len(hash_id) == 0:
        response.status = 400
        return jsonize({'error': 4, 'error_message': 'file_hash parameter is missing.'})
    hash_id = clean_hash(hash_id)
    if not valid_hash(hash_id):
        return jsonize({'error': 5, 'error_message': 'Invalid hash format.'})
    if(len(hash_id) != 40):
        data = "1=" + str(hash_id)
        res = SearchModule.search_by_id(data, 1, [], True)
        if(len(res) == 0):
            response.status = 400
            return jsonize({'error': 6, 'error_message': 'File not found'})
        else:
            sha1 = res[0]["sha1"]
    else:
        sha1 = hash_id
    key_manager = KeyManager()

    if(key_manager.check_keys_in_secrets()):
        av_result = get_av_result(sha1, 'high')
    else:
        return jsonize({'error': 7, "error_message": "Error: VirusTotal API key missing from secrets.py file"})
    if(av_result.get('status') == "added"):
        return jsonize({"message": "AV scans downloaded."})
    elif(av_result.get('status') == "already_had_it"):
        return jsonize({"message": "File already have AV scans."})
    elif(av_result.get('status') == "not_found"):
        return jsonize({"error": 10, "error_message": "Not found on VT."})
    elif(av_result.get('status') == "no_key_available"):
        return jsonize({"error": 11, "error_message": "No key available right now. Please try again later."})
    else:
        logging.error("av_result for hash=" + str(sha1))
        logging.error("av_result=" + str(av_result))
        return jsonize({"error": 9, "error_message": "Cannot get analysis."})
Example #8
0
def generic_task(process,
                 file_hash,
                 vt_av,
                 vt_samples,
                 email,
                 task_id,
                 document_name=""):
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.info("task_id=" + str(task_id))
    generic_count = 0
    response = {}
    response["date_start"] = datetime.datetime.now()
    response["document_name"] = document_name
    response["task_id"] = task_id
    check_hashes_output = check_hashes(file_hash)
    errors = check_hashes_output.get('errors')
    for error in errors:
        key = error.get('error')
        value = error.get('error_message')
        logging.error("errors (key=" + str(key) + ", value=" + str(value) +
                      ")")
        response = add_error(response, key, value)
    hashes = check_hashes_output.get('hashes')
    remove_dups_output = remove_dups(hashes)
    # remove duplicated hashes
    hashes = remove_dups_output.get('list')
    response["duplicated_hashes"] = remove_dups_output.get('dups')
    response["hashes"] = hashes

    hash_dicts = []
    mc = MetaController()
    for x in hashes:
        x_dict = {}
        x_dict["original"] = x
        x_dict["sha1"] = get_file_id(x)
        if (x_dict["sha1"] is not None):
            doc = mc.read(x_dict["sha1"])
            if doc is not None and doc.get('hash') is not None:
                if doc.get('hash').get('md5') is not None:
                    x_dict["md5"] = doc.get('hash').get('md5')
                if doc.get('hash').get('sha2') is not None:
                    x_dict["sha2"] = doc.get('hash').get('sha2')
        hash_dicts.append(x_dict)
    response["duplicated_samples"] = []
    for x in hash_dicts:
        for y in hash_dicts:
            if x.get('original') != y.get('original') and (
                    x.get('original') == y.get('sha1') or x.get('original')
                    == y.get('md5') or x.get('original') == y.get('sha2')):
                response["duplicated_samples"].append(y.get('original'))
                hash_dicts.remove(y)
    hashes = []
    for x in hash_dicts:
        hashes.append(x.get('original'))
    response["hashes"] = hashes

    if (len(hashes) == 0):
        response = add_error(response, 6, "No valid hashes provided.")
        response["date_end"] = datetime.datetime.now()
        save(response)
        return change_date_to_str(response)

    save(response)

    response["inconsistencies"] = []
    if (vt_samples or process):
        for hash_id in hashes:
            if fix_inconsistency(hash_id) == 1:
                response["inconsistencies"].append(hash_id)

    save(response)

    response["not_found_on_vt"] = []
    response["private_credits_spent"] = 0
    if vt_samples:
        response["downloaded"] = []
        for hash_id in hashes:
            if (get_file_id(hash_id) is None or db_inconsistency(hash_id)):
                logging.debug("task(): " + hash_id +
                              " was not found (get_file_id returned None). ")
                generic_count += 1
                if (generic_count % 20 == 0):
                    save(response)
                output = save_file_from_vt(hash_id)
                sha1 = output.get('hash')
                if (output.get('status') == 'out_of_credits'):
                    request_successful = False
                    while not request_successful:
                        output = save_file_from_vt(hash_id)
                        if output.get('status') != 'out_of_credits':
                            request_successful = True
                if (output.get('status') == 'added'):
                    response["downloaded"].append(hash_id)
                    # we need to process the sha1, and not the sha2 because
                    # the grid does not save the sha2.
                    generic_process_hash(sha1)
                    response["private_credits_spent"] += 1
                elif (output.get('status') == 'inconsistency_found'):
                    response["private_credits_spent"] += 1
                    generic_process_hash(sha1)
                elif (output.get('status') == 'not_found'):
                    response["not_found_on_vt"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error(str(output))
                    response = add_error(
                        response, 11,
                        "Unknown error when downloading sample from VT.")
                save(response)
    save(response)
    response["processed"] = []
    response["not_found_for_processing"] = []
    if process:
        logging.debug("process=true")
        for hash_id in hashes:
            logging.debug("task: hash_id=" + str(hash_id))
            process_start_time = datetime.datetime.now()
            generic_count += 1
            if (generic_count % 20 == 0):
                save(response)
            if (generic_process_hash(hash_id) == 0):
                process_end_time = datetime.datetime.now()
                response["processed"].append({
                    "hash":
                    hash_id,
                    "seconds": (process_end_time - process_start_time).seconds
                })
            else:
                response["not_found_for_processing"].append(hash_id)
    save(response)
    if vt_av:
        response["vt_av_added"] = []
        response["vt_av_out_of_credits"] = []
        response["not_found_on_vt_av"] = []
        response["vt_av_already_downloaded"] = []
        response["public_credits_spent"] = 0
        for hash_id in hashes:
            sha1 = get_file_id(hash_id)
            if (sha1 is not None):
                av_result_output = get_av_result(sha1)
                if (av_result_output.get('status') == 'out_of_credits'):
                    request_successful = False
                    count = 0
                    while not request_successful:
                        av_result_output = get_av_result(sha1)
                        count += 1
                        if av_result_output.get('status') != 'out_of_credits':
                            response["vt_av_out_of_credits"].append(hash_id)
                            response = add_error(
                                response, 10, "Had to retried " + str(count) +
                                " times in av_result(out_of_credits) for hash= "
                                + str(hash_id) +
                                ". Is someone else using the same public key?")
                            request_successful = True
                if (av_result_output.get('status') == "added"):
                    response["vt_av_added"].append(hash_id)
                    response["public_credits_spent"] += 1
                elif (av_result_output.get('status') == "already_had_it"):
                    response["vt_av_already_downloaded"].append(hash_id)
                elif (av_result_output.get('status') == 'error'):
                    response = add_error(
                        response, 9, "Error in av_result: " +
                        str(av_result_output.get('error_message')))
                elif (av_result_output.get('status') == 'not_found'):
                    response["not_found_on_vt_av"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error("unknown error in av_result: " +
                                  str(hash_id) + " ; " + str(av_result_output))
                    response = add_error(response, 12,
                                         "Unknown error in av_result()")
                save(response)

    if (bool(email)):
        send_mail(email, "task done", str(response))
    response["date_end"] = datetime.datetime.now()
    save(response)
    return response
Example #9
0
def generic_task(task_id):
    response = load_task(task_id)
    if response.get('date_end') is not None:
        logging.error(
            "Task already done. why was this on the queue? task_id=" + str(task_id))
        return response

    process = response['requested']['process']
    file_hash = response['requested']['file_hash']
    vt_av = response['requested']['vt_av']
    vt_samples = response['requested']['vt_samples']
    email = response['requested']['email']
    document_name = response['requested'].get('document_name', '')
    ip = response['requested']['ip']

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.info("task_id=" + str(task_id))
    logging.info("response['requested']=" + str(response['requested']))
    generic_count = 0
    response = {}
    response["date_start"] = datetime.datetime.now()
    response["document_name"] = document_name
    response["task_id"] = task_id
    response["ip"] = ip
    check_hashes_output = check_hashes(file_hash)
    errors = check_hashes_output.get('errors')
    for error in errors:
        key = error.get('error')
        value = error.get('error_message')
        logging.error("errors (key=" + str(key) +
                      ", value=" + str(value) + ")")
        response = add_error(response, key, value)
    hashes = check_hashes_output.get('hashes')
    remove_dups_output = remove_dups(hashes)
    # remove duplicated hashes
    hashes = remove_dups_output.get('list')
    response["duplicated_hashes"] = remove_dups_output.get('dups')
    response["hashes"] = hashes

    hash_dicts = []
    mc = MetaController()
    for x in hashes:
        x_dict = {}
        x_dict["original"] = x
        x_dict["sha1"] = get_file_id(x)
        if(x_dict["sha1"] is not None):
            doc = mc.read(x_dict["sha1"])
            if doc is not None and doc.get('hash') is not None:
                if doc.get('hash').get('md5') is not None:
                    x_dict["md5"] = doc.get('hash').get('md5')
                if doc.get('hash').get('sha2') is not None:
                    x_dict["sha2"] = doc.get('hash').get('sha2')
        hash_dicts.append(x_dict)
    response["duplicated_samples"] = []
    for x in hash_dicts:
        for y in hash_dicts:
            if x.get('original') != y.get('original') and (
                    x.get('original') == y.get('sha1') or
                    x.get('original') == y.get('md5') or
                    x.get('original') == y.get('sha2')):
                response["duplicated_samples"].append(y.get('original'))
                hash_dicts.remove(y)
    hashes = []
    for x in hash_dicts:
        hashes.append(x.get('original'))
    response["hashes"] = hashes

    if(len(hashes) == 0):
        response = add_error(response, 6, "No valid hashes provided.")
        response["date_end"] = datetime.datetime.now()
        save(response)
        return change_date_to_str(response)

    save(response)
    response["private_credits_spent"] = 0

    response["inconsistencies"] = []
    if(vt_samples or process):
        for hash_id in hashes:
            inconsistency_output = fix_inconsistency(hash_id)
            if inconsistency_output.get('inconsistency'):
                response["inconsistencies"].append(hash_id)
                if inconsistency_output.get('credit_spent'):
                    response["private_credits_spent"] += 1

    save(response)

    response["not_found_on_vt"] = []
    if vt_samples:
        response["downloaded"] = []
        for hash_id in hashes:
            if(get_file_id(hash_id) is None or db_inconsistency(hash_id)):
                logging.debug("task(): " + hash_id +
                              " was not found (get_file_id returned None). ")
                generic_count += 1
                if (generic_count % 20 == 0):
                    save(response)
                output = save_file_from_vt(hash_id)
                sha1 = output.get('hash')
                if(output.get('status') == 'out_of_credits'):
                    request_successful = False
                    while not request_successful:
                        output = save_file_from_vt(hash_id)
                        if output.get('status') != 'out_of_credits':
                            request_successful = True
                if(output.get('status') == 'added'):
                    response["downloaded"].append(hash_id)
                    # we need to process the sha1, and not the sha2 because
                    # the grid does not save the sha2.
                    generic_process_hash(sha1)
                    response["private_credits_spent"] += 1
                elif(output.get('status') == 'inconsistency_found'):
                    response["private_credits_spent"] += 1
                    generic_process_hash(sha1)
                elif(output.get('status') == 'not_found'):
                    response["not_found_on_vt"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error(str(output))
                    response = add_error(
                        response, 11, "Unknown error when downloading sample from VT.")
                save(response)
    save(response)
    response["processed"] = []
    response["not_found_for_processing"] = []
    if process:
        logging.debug("process=true")
        for hash_id in hashes:
            logging.debug("task: hash_id=" + str(hash_id))
            process_start_time = datetime.datetime.now()
            generic_count += 1
            if (generic_count % 20 == 0):
                save(response)
            if(generic_process_hash(hash_id) == 0):
                process_end_time = datetime.datetime.now()
                response["processed"].append({"hash": hash_id,
                                              "seconds": (process_end_time - process_start_time).seconds})
            else:
                response["not_found_for_processing"].append(hash_id)
    save(response)
    if vt_av:
        response["vt_av_added"] = []
        response["vt_av_out_of_credits"] = []
        response["not_found_on_vt_av"] = []
        response["vt_av_already_downloaded"] = []
        response["public_credits_spent"] = 0
        for hash_id in hashes:
            sha1 = get_file_id(hash_id)
            if(sha1 is not None):
                av_result_output = get_av_result(sha1)
                if (av_result_output.get('status') == 'out_of_credits'):
                    request_successful = False
                    count = 0
                    while not request_successful:
                        av_result_output = get_av_result(sha1)
                        count += 1
                        if av_result_output.get('status') != 'out_of_credits':
                            response["vt_av_out_of_credits"].append(hash_id)
                            response = add_error(response, 10, "Had to retried " + str(count) + " times in av_result(out_of_credits) for hash= " + str(
                                hash_id) + ". Is someone else using the same public key?")
                            request_successful = True
                if(av_result_output.get('status') == "added"):
                    response["vt_av_added"].append(hash_id)
                    response["public_credits_spent"] += 1
                elif(av_result_output.get('status') == "already_had_it"):
                    response["vt_av_already_downloaded"].append(hash_id)
                elif(av_result_output.get('status') == 'error'):
                    response = add_error(
                        response, 9, "Error in av_result: " + str(av_result_output.get('error_message')))
                elif(av_result_output.get('status') == 'not_found'):
                    response["not_found_on_vt_av"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error("unknown error in av_result: " +
                                  str(hash_id) + " ; " + str(av_result_output))
                    response = add_error(
                        response, 12, "Unknown error in av_result()")
                save(response)

    if(bool(email)):
        send_mail(email, "task done", str(response))
    response["date_end"] = datetime.datetime.now()
    save(response)
    return response
Example #10
0
def api_batch_process_debug_file():
    yield "<html><body><pre>"
    yield "Running Batch process\n"
    file_hashes=request.forms.get('file_hash')
    if file_hashes is None:
        response.status=422
        print "file_hash is missing"
        yield "file_hash parameter is missing"

    #print(dir(request.forms))
    #print(request.forms.keys())
    # transform file_hashes in a list of hashes.
    not_found=[]
    added_to_queue=0
    downloaded_from_vt=0
    for hash_id in file_hashes.split("\n"):
        hash_id=clean_hash(hash_id)
        if hash_id is None:
            continue
        data="1="+hash_id
        if(len(hash_id)==40 or len(hash_id)==32):
            pc=PackageController()
            res=pc.getFile(hash_id)
            if res is not None and len(SearchModule.search_by_id(data,1,[],False))==0:
                print "Processing right now: "+str(hash_id)
                process_file(hash_id)
                if(env['auto_get_av_result']):
                    get_av_result(hash_id)
                    continue
        res=SearchModule.search_by_id(data,1,[],False)
        if(len(res)==0):
            print "process_debug(): metadata of "+str(hash_id)+" was not found. We will look in Pc. hash length: "+str(len(hash_id))
            if(len(hash_id)==40 or len(hash_id) == 32):
                pc=PackageController()
                res=pc.getFile(hash_id)
                if res is not None:
                    print "process_debug(): hash was found ("+str(hash_id)+")"
                else:
                    print "process_debug(): hash was not found("+str(hash_id)+")"
            print "process_debug():"
            print "process_debug(): going to search "+str(hash_id)+" in vt"
            sha1=SearchModule.add_file_from_vt(hash_id)
            if(sha1==None):
                print "process_debug(): sha1 is None: "+str(hash_id)
                not_found.append(hash_id)
                continue
            else:
                downloaded_from_vt+=1
        else:
            sha1=res[0]["sha1"]

        added_to_queue+=1
        add_hash_to_process_queue(sha1)
        if(env['auto_get_av_result']):
            get_av_result(sha1)
        yield str(sha1)+"\n"

    responsex=str(added_to_queue)+" files added to the process queue.\n"
    if(downloaded_from_vt > 0):
        responsex+=str(downloaded_from_vt)+" new hashes.\n"
    if(len(not_found)!=0):
        responsex+=str(len(not_found))+ " hashes not found.\n"
        responsex+="Not Found:\n"
        for aux in not_found:
            responsex=responsex+str(aux)+"\n"
    yield responsex
    yield "END"