Example #1
0
def export_metadata():
    mdc=MetaController()
    hashes = request.forms.dict.get("file_hash[]")
    dump_to_save=""
    for hash in hashes:
        hash = clean_hash(hash.replace('\r',''))
    
        res=mdc.read(hash)
        dump=dumps(res,indent=4)
        line="\n\n#### File:%s\n"%hash
        dump_to_save=dump_to_save+line+dump
    
    id_random=id_generator()
    
    tmp_folder="/tmp/meta_export"
    subprocess.call(["mkdir","-p",tmp_folder]) 
    
    file_name=os.path.join(tmp_folder,str(id_random)+'.txt')
    
    fd=open(file_name,"w")
    fd.write(dump_to_save)
    fd.close()
    
    resp =  static_file(str(id_random)+'.txt',root=tmp_folder,download=True)
    resp.set_cookie('fileDownload','true');
    return resp
Example #2
0
def get_task(task_id):
    mc = MetaController()
    task_report = mc.read_task(task_id)
    if task_report is not None:
        return change_date_to_str(task_report)
    else:
        return add_error({}, 8, "Task not found")
Example #3
0
def export_metadata():
    mdc = MetaController()
    hashes = request.forms.dict.get("file_hash[]")
    dump_to_save = ""
    for hash in hashes:
        hash = clean_hash(hash.replace('\r', ''))

        res = mdc.read(hash)
        dump = dumps(res, indent=4)
        line = "\n\n#### File:%s\n" % hash
        dump_to_save = dump_to_save + line + dump

    id_random = id_generator()

    tmp_folder = "/tmp/meta_export"
    subprocess.call(["mkdir", "-p", tmp_folder])

    file_name = os.path.join(tmp_folder, str(id_random) + '.txt')

    fd = open(file_name, "w")
    fd.write(dump_to_save)
    fd.close()

    resp = static_file(str(id_random) + '.txt', root=tmp_folder, download=True)
    resp.set_cookie('fileDownload', 'true')
    return resp
Example #4
0
def check_lib():
    lib = str(request.query.q)
    mdc = MetaController()
    res = mdc.searchDllByName("'" + lib.lower() + "'")

    if (res != None):
        return jsonize({"valid": True})
    else:
        return jsonize({"valid": False})
Example #5
0
def check_imp():
    imp = str(request.query.q)
    mdc = MetaController()
    res = mdc.searchImportByName("'" + imp.lower() + "'")

    if (res != None):
        return jsonize({"valid": True})
    else:
        return jsonize({"valid": False})
Example #6
0
def check_lib():
    lib=str(request.query.q)
    mdc=MetaController()
    res=mdc.searchDllByName("'"+lib.lower()+"'")
    
    if(res!= None):
        return jsonize({"valid":True})
    else:
        return jsonize({"valid":False})
Example #7
0
def check_imp():
    imp=str(request.query.q)
    mdc=MetaController()
    res=mdc.searchImportByName("'"+imp.lower()+"'")
    
    if(res!= None):
        return jsonize({"valid":True})
    else:
        return jsonize({"valid":False})
def main():
    mdc = MetaController()

    collection = db["av_analysis"]
    all_analysis = collection.find({"date": None})
    count = 0
    reset = 0
    for analysis in all_analysis:
        count += 1
        reset += 1
        if reset == 1000:
            reset = 0
            print("Count: %s" % count)
        file_id = analysis.get('sha1')
        date_stored = analysis.get('date')
        if (date_stored is not None):
            # mdc.save_first_seen(file_id,date_stored) #Uncoment to copy all av
            # dates to meta dates
            continue

        # Trying to get the best date
        date_registers = [
            'first_seen', 'additional_info.first_seen_itw', 'scan_date'
        ]
        for register in date_registers:
            vt_date = read_from_dictionary(register, analysis)
            if vt_date is not None:
                break

        try:
            # The "date" value is use to speed up time queries for av
            # signatures
            new_date = process_date(vt_date)
        except ValueError:
            new_date = None
            print "fix_dates_in_av: invalid date in AV_metda:" + str(vt_date)

        command = {"$set": {"date": new_date}}
        try:
            collection.update_one({"sha1": file_id}, command, upsert=False)
        except WriteError:
            print("**** Error File: %s ****" % (file_id, ))
            print(command)
            err = str(traceback.format_exc())
            print(err)
            continue
        mdc.save_first_seen(file_id, new_date)
        print("%s fixed -> new date: %s" % (file_id, new_date))
def main():
    mdc = MetaController()

    collection = db["av_analysis"]
    all_analysis = collection.find({"date": None})
    count = 0
    reset = 0
    for analysis in all_analysis:
        count += 1
        reset += 1
        if reset == 1000:
            reset = 0
            print("Count: %s" % count)
        file_id = analysis.get('sha1')
        date_stored = analysis.get('date')
        if(date_stored is not None):
            # mdc.save_first_seen(file_id,date_stored) #Uncoment to copy all av
            # dates to meta dates
            continue

        # Trying to get the best date
        date_registers = ['first_seen',
                          'additional_info.first_seen_itw', 'scan_date']
        for register in date_registers:
            vt_date = read_from_dictionary(register, analysis)
            if vt_date is not None:
                break

        try:
            # The "date" value is use to speed up time queries for av
            # signatures
            new_date = process_date(vt_date)
        except ValueError:
            new_date = None
            print "fix_dates_in_av: invalid date in AV_metda:" + str(vt_date)

        command = {"$set": {"date": new_date}}
        try:
            collection.update_one({"sha1": file_id}, command, upsert=False)
        except WriteError:
            print("**** Error File: %s ****" % (file_id,))
            print(command)
            err = str(traceback.format_exc())
            print(err)
            continue
        mdc.save_first_seen(file_id, new_date)
        print("%s fixed -> new date: %s" % (file_id, new_date))
Example #10
0
    def __init__(self):
        formato = '[%(asctime)-15s][%(levelname)s] %(message)s'
        path = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
        logfile = os.path.join(path, "launcher.log")
        logging.basicConfig(format=formato,
                            filename=logfile,
                            level=logging.INFO)

        self.vc = VersionController()
        self.pc = PackageController()
        self.mdc = MetaController()

        def launchOnlyHashingByID(self, sample):
            sample.setPackageController(self.pc)
            sample.setMetaController(self.mdc)
            sample.setVersionController(self.vc)
            category = sample.getCategory()
            if (category == None):
                category = Cataloger().catalog(sample.getBinary())
                logging.debug("Category not found in DB, categorized as %s",
                              str(category))
            else:
                logging.debug("Category found in DB, categorized as %s",
                              str(category))
            processor = ProcessorFactory().getHashProcessor(category, sample)
            result_dic = processor.process()
            result_version = processor.getVersion()

            if (len(result_version) > 0):
                logging.debug("Updating metadata")

                if (self.mdc.write(sample.getID(), result_dic) != 0):
                    logging.error("Error writing Metadata to DB, sample:%s",
                                  sample.getID())
                    return -1
                logging.debug("Metadata writed in DB")

                self.vc.updateVersion(sample.getID(), result_version)
                logging.debug("Versions writed to DB")
            else:

                logging.debug("Nothing to update")

            logging.debug("Analysis Finished OK")
            return 0
Example #11
0
def get_metadata():
    if request.query.file_hash == '':
        response.status = 400
        return jsonize({'message': 'file_hash parameter is missing'})
    file_hash = clean_hash(request.query.file_hash)
    if not valid_hash(file_hash):
        response.status = 400
        return jsonize({'message': 'Invalid hash format (use MD5, SHA1 or SHA2)'})
    file_hash = get_file_id(file_hash)
    if file_hash is None:
        response.status = 404
        return jsonize({'message': 'Metadata not found in the database'})

    mdc = MetaController()
    res = mdc.read(file_hash)
    if res is None:
        log_event("metadata", file_hash)
    return dumps(change_date_to_str(res))
Example #12
0
def get_metadata():
    file_hash=clean_hash(request.query.file_hash)
    if file_hash is None:
        return
    if len(file_hash) == 32: #ToDo: validate hash
        key = 'md5'
    elif len(file_hash) == 40:
        key = 'sha1'
    else:
        response.code = 400
        return jsonize({'message':'Invalid hash format (use MD5, SHA1 or SHA2)'})

    mdc=MetaController()
    res=mdc.read(file_hash)
    if res==None:
        response.code = 404
        return jsonize({'message':'Metadata not found in the database'})
    log_event("metadata",file_hash)

    return dumps(change_date_to_str(res))
Example #13
0
def get_metadata():
    file_hash=clean_hash(request.query.file_hash)
    if file_hash is None:
        return
    if len(file_hash) == 32: #ToDo: validate hash
        key = 'md5'
    elif len(file_hash) == 40:
       key = 'sha1'
    #elif len(file_hash) == 64:
    #   key = 'sha256'
    else:
        response.code = 400
        return jsonize({'message':'Invalid hash format (use MD5, SHA1 or SHA2)'})
    
    mdc=MetaController()
    res=mdc.read(file_hash)
    if res==None:
        response.code = 404
        return jsonize({'message':'Metadata not found in the database'})
    log_event("metadata",file_hash)
    
    return dumps(res)
Example #14
0
def get_av_result(file_id):
    #buscar si ya existe
    mdc=MetaController()
    analysis_result=mdc.search_av_analysis(file_id)
    
    if analysis_result==None:
        print("Buscando analysis de %s en VT" % file_id)
        analysis_result=get_vt_av_result(file_id)
        #guardar en la base de datos
        if(analysis_result==None): return None
        mdc.save_av_analysis(file_id,analysis_result)
        
    scans=analysis_result.get("scans")
    for s in scans:
        av_name=s.get("name")
        if(av_name=="ESET-NOD32" or av_name=="NOD32" or av_name=="NOD32v2"):
            type=s.get("result")
            positives=analysis_result.get("positives")
            total=analysis_result.get("total")
            return (type,positives,total)
    
    return None
Example #15
0
def export_metadata():
    mdc = MetaController()
    hashes = request.forms.dict.get("file_hash[]")
    dump_to_save = ""
    random_id = id_generator()
    tmp_path = "/tmp/meta_export"
    tmp_folder = os.path.join(tmp_path, random_id)
    call_with_output(["mkdir", "-p", tmp_folder])
    for hash in hashes:
        hash = clean_hash(hash.replace('\r', ''))
        res = mdc.read(hash)
        dump = dumps(res, indent=4)
        file_name = os.path.join(tmp_folder, str(hash) + '.txt')
        fd = open(file_name, "w")
        fd.write(dump)
        fd.close()
    zip_path = os.path.join(tmp_path, random_id + '.zip')
    call_with_output(["zip", "-jr", zip_path, tmp_folder])
    resp = static_file(str(random_id) + '.zip', root=tmp_path, download=True)
    resp.set_cookie('fileDownload', 'true')
    shutil.rmtree(tmp_folder)
    os.remove(zip_path)
    return resp
Example #16
0
def export_metadata():
    mdc = MetaController()
    hashes = request.forms.dict.get("file_hash[]")
    dump_to_save = ""
    random_id = id_generator()
    tmp_path = "/tmp/meta_export"
    tmp_folder = os.path.join(tmp_path, random_id)
    call_with_output(["mkdir", "-p", tmp_folder])
    for hash in hashes:
        hash = clean_hash(hash.replace('\r', ''))
        res = mdc.read(hash)
        dump = dumps(res, indent=4)
        file_name = os.path.join(tmp_folder, str(hash) + '.txt')
        fd = open(file_name, "w")
        fd.write(dump)
        fd.close()
    zip_path = os.path.join(tmp_path, random_id + '.zip')
    call_with_output(["zip", "-jr", zip_path, tmp_folder])
    resp = static_file(str(random_id) + '.zip', root=tmp_path, download=True)
    resp.set_cookie('fileDownload', 'true')
    shutil.rmtree(tmp_folder)
    os.remove(zip_path)
    return resp
def get_av_result(file_id, priority="low"):
    if not valid_hash(file_id):
        raise ValueError("Invalid hash")

    mdc = MetaController()
    analysis_result = mdc.search_av_analysis(file_id)
    added = False
    status = None
    if analysis_result is None:
        logging.info("Searching analysis of %s in VT" % file_id)
        vt_av_result = get_vt_av_result(file_id, priority)
        status = vt_av_result.get('status')
        if vt_av_result.get('status') == "ok":
            vt_av_result_response = vt_av_result.get('response')
            analysis_result = parse_vt_response(vt_av_result_response)
            # Save in mongo
            if(analysis_result is not None):
                logging.info("saving vt av from " + str(file_id) + " in mongo")
                mdc.save_av_analysis(file_id, analysis_result)
            status = "added"
        elif vt_av_result.get('status') == "error":
            return {"scans": None, "hash": file_id, "status": "error", "error_message": vt_av_result.get('error_message')}
    else:
        status = "already_had_it"

    if analysis_result is not None:
        scans = analysis_result.get("scans")
        positives = analysis_result.get('positives')
        total = analysis_result.get('total')
    else:
        positives = 0
        total = 0
        scans = None
    response = {"scans": scans, "positives": positives,
                "total": total, "hash": file_id, "status": status}
    return response
def get_av_result(file_id,priority="low"):
    if not valid_hash(file_id):
        raise ValueError("Invalid hash")

    mdc=MetaController()
    analysis_result=mdc.search_av_analysis(file_id)
    added=False
    status = None
    if analysis_result==None:
        logging.info("Searching analysis of %s in VT" % file_id)
        vt_av_result = get_vt_av_result(file_id,priority)
        status = vt_av_result.get('status')
        if vt_av_result.get('status') == "ok":
            vt_av_result_response = vt_av_result.get('response')
            analysis_result=parse_vt_response(vt_av_result_response)
            # Save in mongo
            if(analysis_result is not None):
                logging.info( "saving vt av from "+str(file_id)+ " in mongo")
                mdc.save_av_analysis(file_id,analysis_result)
            status = "added"
        elif vt_av_result.get('status') == "error":
            return {"scans": None, "hash": file_id, "status": "error", "error_message": vt_av_result.get('error_message')}
    else:
        status = "already_had_it"

    if analysis_result is not None:
        scans=analysis_result.get("scans")
        positives = analysis_result.get('positives')
        total = analysis_result.get('total')
    else:
        positives = 0
        total = 0
        scans = None
    response = {"scans": scans, "positives": positives,
            "total": total, "hash": file_id, "status": status}
    return response
Example #19
0
def get_av_result(file_id):
    mdc = MetaController()
    analysis_result = mdc.search_av_analysis(file_id)
    #analysis_result = None #while we test VT function

    if analysis_result == None:
        print("Searching analysis of %s in VT" % file_id)
        analysis_result = parse_vt_response(get_vt_av_result(file_id))
        # Save in mongo
        if (analysis_result == None):
            return None
        mdc.save_av_analysis(file_id, analysis_result)

    scans = analysis_result.get("scans")
    for s in scans:
        av_name = s.get("name")
        if (av_name == "ESET-NOD32" or av_name == "NOD32"
                or av_name == "NOD32v2"):
            type = s.get("result")
            positives = analysis_result.get("positives")
            total = analysis_result.get("total")
            return (type, positives, total)

    return None
Example #20
0
    def __init__(self):
        formato = '[%(asctime)-15s][%(levelname)s] %(message)s'
        path = os.path.abspath(os.path.dirname(os.path.abspath(__file__)))
        logfile = os.path.join(path, "launcher.log")
        logging.basicConfig(
            format=formato, filename=logfile, level=logging.INFO)

        self.vc = VersionController()
        self.pc = PackageController()
        self.mdc = MetaController()

        def launchOnlyHashingByID(self, sample):
            sample.setPackageController(self.pc)
            sample.setMetaController(self.mdc)
            sample.setVersionController(self.vc)
            category = sample.getCategory()
            if(category is None):
                category = Cataloger().catalog(sample.getBinary())
                logging.debug(
                    "Category not found in DB, categorized as %s", str(category))
            else:
                logging.debug(
                    "Category found in DB, categorized as %s", str(category))
            processor = ProcessorFactory().getHashProcessor(category, sample)
            result_dic = processor.process()
            result_version = processor.getVersion()

            if(len(result_version) > 0):
                logging.debug("Updating metadata")

                if(self.mdc.write(sample.getID(), result_dic) != 0):
                    logging.error(
                        "Error writing Metadata to DB, sample:%s", sample.getID())
                    return -1
                logging.debug("Metadata writed in DB")

                self.vc.updateVersion(sample.getID(), result_version)
                logging.debug("Versions writed to DB")
            else:

                logging.debug("Nothing to update")

            logging.debug("Analysis Finished OK")
            return 0
Example #21
0
def save(document):
    mc = MetaController()
    task_id = document["task_id"]
    return mc.write_task(task_id, document)
Example #22
0
def update_date(file_id, date):
    if file_id is None or date is None:
        return
    mdc = MetaController()
    res = mdc.save_first_seen(file_id, date)
Example #23
0
def load_task(task_id):
    mc = MetaController()
    return mc.read_task(task_id)
Example #24
0
def save(document):
    mc = MetaController()
    task_id = document["task_id"]
    return mc.write_task(task_id, document)
Example #25
0
def load_task(task_id):
    mc = MetaController()
    return mc.read_task(task_id)
def update_date(file_id, date):
    if file_id is None or date is None:
        return
    mdc = MetaController()
    res = mdc.save_first_seen(file_id, date)
Example #27
0
def generic_task(task_id):
    response = load_task(task_id)
    if response.get('date_end') is not None:
        logging.error(
            "Task already done. why was this on the queue? task_id=" + str(task_id))
        return response

    process = response['requested']['process']
    file_hash = response['requested']['file_hash']
    vt_av = response['requested']['vt_av']
    vt_samples = response['requested']['vt_samples']
    email = response['requested']['email']
    document_name = response['requested'].get('document_name', '')
    ip = response['requested']['ip']

    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.info("task_id=" + str(task_id))
    logging.info("response['requested']=" + str(response['requested']))
    generic_count = 0
    response = {}
    response["date_start"] = datetime.datetime.now()
    response["document_name"] = document_name
    response["task_id"] = task_id
    response["ip"] = ip
    check_hashes_output = check_hashes(file_hash)
    errors = check_hashes_output.get('errors')
    for error in errors:
        key = error.get('error')
        value = error.get('error_message')
        logging.error("errors (key=" + str(key) +
                      ", value=" + str(value) + ")")
        response = add_error(response, key, value)
    hashes = check_hashes_output.get('hashes')
    remove_dups_output = remove_dups(hashes)
    # remove duplicated hashes
    hashes = remove_dups_output.get('list')
    response["duplicated_hashes"] = remove_dups_output.get('dups')
    response["hashes"] = hashes

    hash_dicts = []
    mc = MetaController()
    for x in hashes:
        x_dict = {}
        x_dict["original"] = x
        x_dict["sha1"] = get_file_id(x)
        if(x_dict["sha1"] is not None):
            doc = mc.read(x_dict["sha1"])
            if doc is not None and doc.get('hash') is not None:
                if doc.get('hash').get('md5') is not None:
                    x_dict["md5"] = doc.get('hash').get('md5')
                if doc.get('hash').get('sha2') is not None:
                    x_dict["sha2"] = doc.get('hash').get('sha2')
        hash_dicts.append(x_dict)
    response["duplicated_samples"] = []
    for x in hash_dicts:
        for y in hash_dicts:
            if x.get('original') != y.get('original') and (
                    x.get('original') == y.get('sha1') or
                    x.get('original') == y.get('md5') or
                    x.get('original') == y.get('sha2')):
                response["duplicated_samples"].append(y.get('original'))
                hash_dicts.remove(y)
    hashes = []
    for x in hash_dicts:
        hashes.append(x.get('original'))
    response["hashes"] = hashes

    if(len(hashes) == 0):
        response = add_error(response, 6, "No valid hashes provided.")
        response["date_end"] = datetime.datetime.now()
        save(response)
        return change_date_to_str(response)

    save(response)
    response["private_credits_spent"] = 0

    response["inconsistencies"] = []
    if(vt_samples or process):
        for hash_id in hashes:
            inconsistency_output = fix_inconsistency(hash_id)
            if inconsistency_output.get('inconsistency'):
                response["inconsistencies"].append(hash_id)
                if inconsistency_output.get('credit_spent'):
                    response["private_credits_spent"] += 1

    save(response)

    response["not_found_on_vt"] = []
    if vt_samples:
        response["downloaded"] = []
        for hash_id in hashes:
            if(get_file_id(hash_id) is None or db_inconsistency(hash_id)):
                logging.debug("task(): " + hash_id +
                              " was not found (get_file_id returned None). ")
                generic_count += 1
                if (generic_count % 20 == 0):
                    save(response)
                output = save_file_from_vt(hash_id)
                sha1 = output.get('hash')
                if(output.get('status') == 'out_of_credits'):
                    request_successful = False
                    while not request_successful:
                        output = save_file_from_vt(hash_id)
                        if output.get('status') != 'out_of_credits':
                            request_successful = True
                if(output.get('status') == 'added'):
                    response["downloaded"].append(hash_id)
                    # we need to process the sha1, and not the sha2 because
                    # the grid does not save the sha2.
                    generic_process_hash(sha1)
                    response["private_credits_spent"] += 1
                elif(output.get('status') == 'inconsistency_found'):
                    response["private_credits_spent"] += 1
                    generic_process_hash(sha1)
                elif(output.get('status') == 'not_found'):
                    response["not_found_on_vt"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error(str(output))
                    response = add_error(
                        response, 11, "Unknown error when downloading sample from VT.")
                save(response)
    save(response)
    response["processed"] = []
    response["not_found_for_processing"] = []
    if process:
        logging.debug("process=true")
        for hash_id in hashes:
            logging.debug("task: hash_id=" + str(hash_id))
            process_start_time = datetime.datetime.now()
            generic_count += 1
            if (generic_count % 20 == 0):
                save(response)
            if(generic_process_hash(hash_id) == 0):
                process_end_time = datetime.datetime.now()
                response["processed"].append({"hash": hash_id,
                                              "seconds": (process_end_time - process_start_time).seconds})
            else:
                response["not_found_for_processing"].append(hash_id)
    save(response)
    if vt_av:
        response["vt_av_added"] = []
        response["vt_av_out_of_credits"] = []
        response["not_found_on_vt_av"] = []
        response["vt_av_already_downloaded"] = []
        response["public_credits_spent"] = 0
        for hash_id in hashes:
            sha1 = get_file_id(hash_id)
            if(sha1 is not None):
                av_result_output = get_av_result(sha1)
                if (av_result_output.get('status') == 'out_of_credits'):
                    request_successful = False
                    count = 0
                    while not request_successful:
                        av_result_output = get_av_result(sha1)
                        count += 1
                        if av_result_output.get('status') != 'out_of_credits':
                            response["vt_av_out_of_credits"].append(hash_id)
                            response = add_error(response, 10, "Had to retried " + str(count) + " times in av_result(out_of_credits) for hash= " + str(
                                hash_id) + ". Is someone else using the same public key?")
                            request_successful = True
                if(av_result_output.get('status') == "added"):
                    response["vt_av_added"].append(hash_id)
                    response["public_credits_spent"] += 1
                elif(av_result_output.get('status') == "already_had_it"):
                    response["vt_av_already_downloaded"].append(hash_id)
                elif(av_result_output.get('status') == 'error'):
                    response = add_error(
                        response, 9, "Error in av_result: " + str(av_result_output.get('error_message')))
                elif(av_result_output.get('status') == 'not_found'):
                    response["not_found_on_vt_av"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error("unknown error in av_result: " +
                                  str(hash_id) + " ; " + str(av_result_output))
                    response = add_error(
                        response, 12, "Unknown error in av_result()")
                save(response)

    if(bool(email)):
        send_mail(email, "task done", str(response))
    response["date_end"] = datetime.datetime.now()
    save(response)
    return response
Example #28
0
def generic_task(process,
                 file_hash,
                 vt_av,
                 vt_samples,
                 email,
                 task_id,
                 document_name=""):
    logging.basicConfig(stream=sys.stdout, level=logging.INFO)
    logging.info("task_id=" + str(task_id))
    generic_count = 0
    response = {}
    response["date_start"] = datetime.datetime.now()
    response["document_name"] = document_name
    response["task_id"] = task_id
    check_hashes_output = check_hashes(file_hash)
    errors = check_hashes_output.get('errors')
    for error in errors:
        key = error.get('error')
        value = error.get('error_message')
        logging.error("errors (key=" + str(key) + ", value=" + str(value) +
                      ")")
        response = add_error(response, key, value)
    hashes = check_hashes_output.get('hashes')
    remove_dups_output = remove_dups(hashes)
    # remove duplicated hashes
    hashes = remove_dups_output.get('list')
    response["duplicated_hashes"] = remove_dups_output.get('dups')
    response["hashes"] = hashes

    hash_dicts = []
    mc = MetaController()
    for x in hashes:
        x_dict = {}
        x_dict["original"] = x
        x_dict["sha1"] = get_file_id(x)
        if (x_dict["sha1"] is not None):
            doc = mc.read(x_dict["sha1"])
            if doc is not None and doc.get('hash') is not None:
                if doc.get('hash').get('md5') is not None:
                    x_dict["md5"] = doc.get('hash').get('md5')
                if doc.get('hash').get('sha2') is not None:
                    x_dict["sha2"] = doc.get('hash').get('sha2')
        hash_dicts.append(x_dict)
    response["duplicated_samples"] = []
    for x in hash_dicts:
        for y in hash_dicts:
            if x.get('original') != y.get('original') and (
                    x.get('original') == y.get('sha1') or x.get('original')
                    == y.get('md5') or x.get('original') == y.get('sha2')):
                response["duplicated_samples"].append(y.get('original'))
                hash_dicts.remove(y)
    hashes = []
    for x in hash_dicts:
        hashes.append(x.get('original'))
    response["hashes"] = hashes

    if (len(hashes) == 0):
        response = add_error(response, 6, "No valid hashes provided.")
        response["date_end"] = datetime.datetime.now()
        save(response)
        return change_date_to_str(response)

    save(response)

    response["inconsistencies"] = []
    if (vt_samples or process):
        for hash_id in hashes:
            if fix_inconsistency(hash_id) == 1:
                response["inconsistencies"].append(hash_id)

    save(response)

    response["not_found_on_vt"] = []
    response["private_credits_spent"] = 0
    if vt_samples:
        response["downloaded"] = []
        for hash_id in hashes:
            if (get_file_id(hash_id) is None or db_inconsistency(hash_id)):
                logging.debug("task(): " + hash_id +
                              " was not found (get_file_id returned None). ")
                generic_count += 1
                if (generic_count % 20 == 0):
                    save(response)
                output = save_file_from_vt(hash_id)
                sha1 = output.get('hash')
                if (output.get('status') == 'out_of_credits'):
                    request_successful = False
                    while not request_successful:
                        output = save_file_from_vt(hash_id)
                        if output.get('status') != 'out_of_credits':
                            request_successful = True
                if (output.get('status') == 'added'):
                    response["downloaded"].append(hash_id)
                    # we need to process the sha1, and not the sha2 because
                    # the grid does not save the sha2.
                    generic_process_hash(sha1)
                    response["private_credits_spent"] += 1
                elif (output.get('status') == 'inconsistency_found'):
                    response["private_credits_spent"] += 1
                    generic_process_hash(sha1)
                elif (output.get('status') == 'not_found'):
                    response["not_found_on_vt"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error(str(output))
                    response = add_error(
                        response, 11,
                        "Unknown error when downloading sample from VT.")
                save(response)
    save(response)
    response["processed"] = []
    response["not_found_for_processing"] = []
    if process:
        logging.debug("process=true")
        for hash_id in hashes:
            logging.debug("task: hash_id=" + str(hash_id))
            process_start_time = datetime.datetime.now()
            generic_count += 1
            if (generic_count % 20 == 0):
                save(response)
            if (generic_process_hash(hash_id) == 0):
                process_end_time = datetime.datetime.now()
                response["processed"].append({
                    "hash":
                    hash_id,
                    "seconds": (process_end_time - process_start_time).seconds
                })
            else:
                response["not_found_for_processing"].append(hash_id)
    save(response)
    if vt_av:
        response["vt_av_added"] = []
        response["vt_av_out_of_credits"] = []
        response["not_found_on_vt_av"] = []
        response["vt_av_already_downloaded"] = []
        response["public_credits_spent"] = 0
        for hash_id in hashes:
            sha1 = get_file_id(hash_id)
            if (sha1 is not None):
                av_result_output = get_av_result(sha1)
                if (av_result_output.get('status') == 'out_of_credits'):
                    request_successful = False
                    count = 0
                    while not request_successful:
                        av_result_output = get_av_result(sha1)
                        count += 1
                        if av_result_output.get('status') != 'out_of_credits':
                            response["vt_av_out_of_credits"].append(hash_id)
                            response = add_error(
                                response, 10, "Had to retried " + str(count) +
                                " times in av_result(out_of_credits) for hash= "
                                + str(hash_id) +
                                ". Is someone else using the same public key?")
                            request_successful = True
                if (av_result_output.get('status') == "added"):
                    response["vt_av_added"].append(hash_id)
                    response["public_credits_spent"] += 1
                elif (av_result_output.get('status') == "already_had_it"):
                    response["vt_av_already_downloaded"].append(hash_id)
                elif (av_result_output.get('status') == 'error'):
                    response = add_error(
                        response, 9, "Error in av_result: " +
                        str(av_result_output.get('error_message')))
                elif (av_result_output.get('status') == 'not_found'):
                    response["not_found_on_vt_av"].append(hash_id)
                else:
                    logging.error("task_id=" + str(task_id))
                    logging.error("unknown error in av_result: " +
                                  str(hash_id) + " ; " + str(av_result_output))
                    response = add_error(response, 12,
                                         "Unknown error in av_result()")
                save(response)

    if (bool(email)):
        send_mail(email, "task done", str(response))
    response["date_end"] = datetime.datetime.now()
    save(response)
    return response