def get_result_from_av(): hash_id=request.query.file_hash if len(hash_id) == 0: response.code = 400 return jsonize({'error': 4, 'error_message':'file_hash parameter is missing.'}) hash_id=clean_hash(hash_id) if not valid_hash(hash_id): return jsonize({'error': 5, 'error_message':'Invalid hash format.'}) if(len(hash_id)!=40): data="1="+str(hash_id) res=SearchModule.search_by_id(data,1,[],True) if(len(res)==0): response.code = 400 return jsonize({'error': 6, 'error_message':'File not found'}) else: sha1=res[0]["sha1"] else: sha1=hash_id if(vt_key()): av_result=get_av_result(sha1) else: return jsonize({'error': 7, "error_message": "Error: VirusTotal API key missing from secrets.py file"}) if(av_result==None): return jsonize({"error": 8, "error_message": "Cannot get analysis (hash not found in VT? out of credits?)"}) return jsonize({"message": "AV scans downloaded."})
def api_batch_process_file(): print("Running Batch process") file_hashes = request.forms.get('file_hash') #print(dir(request.forms)) #print(request.forms.keys()) #transformar file_hashes a una lista de hashes not_found = [] added_to_queue = 0 downloaded_from_vt = 0 for hash_id in file_hashes.split("\n"): hash_id = clean_hash(hash_id) if hash_id is None: continue data = "1=" + str(hash_id) res = SearchModule.search_by_id(data, 1, [], True) if (len(res) == 0): not_found.append(hash_id) continue """ print "downloading "+str(hash_id)+" from vt" sha1=SearchModule.add_file_from_vt(hash_id) if(sha1==None): print "not found on vt: "+str(hash_id) not_found.append(hash_id) continue else: downloaded_from_vt+=1 """ else: sha1 = res[0]["sha1"] added_to_queue += 1 print str(hash_id) + " added to queue" add_hash_to_process_queue(sha1) if (env['auto_get_av_result']): get_av_result(sha1) responsex = str(added_to_queue) + " files added to the process queue.\n" if (downloaded_from_vt > 0): responsex += str(downloaded_from_vt) + " new hashes.\n" if (len(not_found) != 0): responsex += str(len(not_found)) + " hashes not found.\n" responsex += "Not Found:\n" for aux in not_found: responsex = responsex + str(aux) + "\n" return jsonize({"message": responsex})
def get_result_from_av(): file_hash=clean_hash(request.query.file_hash) if len(file_hash) != 40: response.code = 400 return jsonize({'message':'Invalid hash format (use sha1)'}) av_result=get_av_result(file_hash) if(av_result==None): return jsonize("Can not get analysis") return jsonize("File processed")
def get_result_from_av(): file_hash = clean_hash(request.query.file_hash) if len(file_hash) != 40: response.code = 400 return jsonize({'message': 'Invalid hash format (use sha1)'}) av_result = get_av_result(file_hash) if (av_result == None): return jsonize("Can not get analysis") return jsonize("File processed")
def api_batch_process_debug_file(): yield "<html><body><pre>" yield "Running Batch process\n" file_hashes = request.forms.get('file_hash') #print(dir(request.forms)) #print(request.forms.keys()) #transformar file_hashes a una lista de hashes not_found = [] added_to_queue = 0 downloaded_from_vt = 0 for hash_id in file_hashes.split("\n"): hash_id = clean_hash(hash_id) if hash_id is None: continue data = "1=" + hash_id res = SearchModule.search_by_id(data, 1, [], False) if (len(res) == 0): print "downloading " + str(hash_id) + " from vt" sha1 = SearchModule.add_file_from_vt(hash_id) if (sha1 == None): not_found.append(hash_id) continue else: downloaded_from_vt += 1 else: sha1 = res[0]["sha1"] added_to_queue += 1 add_hash_to_process_queue(sha1) if (env['auto_get_av_result']): get_av_result(sha1) yield str(sha1) + "\n" responsex = str(added_to_queue) + " files added to the process queue.\n" if (downloaded_from_vt > 0): responsex += str(downloaded_from_vt) + " new hashes.\n" if (len(not_found) != 0): responsex += str(len(not_found)) + " hashes not found.\n" responsex += "Not Found:\n" for aux in not_found: responsex = responsex + str(aux) + "\n" yield responsex yield "END"
def get_result_from_av(): hash_id = request.query.file_hash if len(hash_id) == 0: response.status = 400 return jsonize({ 'error': 4, 'error_message': 'file_hash parameter is missing.' }) hash_id = clean_hash(hash_id) if not valid_hash(hash_id): return jsonize({'error': 5, 'error_message': 'Invalid hash format.'}) if (len(hash_id) != 40): data = "1=" + str(hash_id) res = SearchModule.search_by_id(data, 1, [], True) if (len(res) == 0): response.status = 400 return jsonize({'error': 6, 'error_message': 'File not found'}) else: sha1 = res[0]["sha1"] else: sha1 = hash_id key_manager = KeyManager() if (key_manager.check_keys_in_secrets()): av_result = get_av_result(sha1, 'high') else: return jsonize({ 'error': 7, "error_message": "Error: VirusTotal API key missing from secrets.py file" }) if (av_result.get('status') == "added"): return jsonize({"message": "AV scans downloaded."}) elif (av_result.get('status') == "already_had_it"): return jsonize({"message": "File already have AV scans."}) elif (av_result.get('status') == "not_found"): return jsonize({"error": 10, "error_message": "Not found on VT."}) elif (av_result.get('status') == "no_key_available"): return jsonize({ "error": 11, "error_message": "No key available right now. Please try again later." }) else: logging.error("av_result for hash=" + str(sha1)) logging.error("av_result=" + str(av_result)) return jsonize({"error": 9, "error_message": "Cannot get analysis."})
def get_result_from_av(): hash_id = request.query.file_hash if len(hash_id) == 0: response.status = 400 return jsonize({'error': 4, 'error_message': 'file_hash parameter is missing.'}) hash_id = clean_hash(hash_id) if not valid_hash(hash_id): return jsonize({'error': 5, 'error_message': 'Invalid hash format.'}) if(len(hash_id) != 40): data = "1=" + str(hash_id) res = SearchModule.search_by_id(data, 1, [], True) if(len(res) == 0): response.status = 400 return jsonize({'error': 6, 'error_message': 'File not found'}) else: sha1 = res[0]["sha1"] else: sha1 = hash_id key_manager = KeyManager() if(key_manager.check_keys_in_secrets()): av_result = get_av_result(sha1, 'high') else: return jsonize({'error': 7, "error_message": "Error: VirusTotal API key missing from secrets.py file"}) if(av_result.get('status') == "added"): return jsonize({"message": "AV scans downloaded."}) elif(av_result.get('status') == "already_had_it"): return jsonize({"message": "File already have AV scans."}) elif(av_result.get('status') == "not_found"): return jsonize({"error": 10, "error_message": "Not found on VT."}) elif(av_result.get('status') == "no_key_available"): return jsonize({"error": 11, "error_message": "No key available right now. Please try again later."}) else: logging.error("av_result for hash=" + str(sha1)) logging.error("av_result=" + str(av_result)) return jsonize({"error": 9, "error_message": "Cannot get analysis."})
def generic_task(process, file_hash, vt_av, vt_samples, email, task_id, document_name=""): logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.info("task_id=" + str(task_id)) generic_count = 0 response = {} response["date_start"] = datetime.datetime.now() response["document_name"] = document_name response["task_id"] = task_id check_hashes_output = check_hashes(file_hash) errors = check_hashes_output.get('errors') for error in errors: key = error.get('error') value = error.get('error_message') logging.error("errors (key=" + str(key) + ", value=" + str(value) + ")") response = add_error(response, key, value) hashes = check_hashes_output.get('hashes') remove_dups_output = remove_dups(hashes) # remove duplicated hashes hashes = remove_dups_output.get('list') response["duplicated_hashes"] = remove_dups_output.get('dups') response["hashes"] = hashes hash_dicts = [] mc = MetaController() for x in hashes: x_dict = {} x_dict["original"] = x x_dict["sha1"] = get_file_id(x) if (x_dict["sha1"] is not None): doc = mc.read(x_dict["sha1"]) if doc is not None and doc.get('hash') is not None: if doc.get('hash').get('md5') is not None: x_dict["md5"] = doc.get('hash').get('md5') if doc.get('hash').get('sha2') is not None: x_dict["sha2"] = doc.get('hash').get('sha2') hash_dicts.append(x_dict) response["duplicated_samples"] = [] for x in hash_dicts: for y in hash_dicts: if x.get('original') != y.get('original') and ( x.get('original') == y.get('sha1') or x.get('original') == y.get('md5') or x.get('original') == y.get('sha2')): response["duplicated_samples"].append(y.get('original')) hash_dicts.remove(y) hashes = [] for x in hash_dicts: hashes.append(x.get('original')) response["hashes"] = hashes if (len(hashes) == 0): response = add_error(response, 6, "No valid hashes provided.") response["date_end"] = datetime.datetime.now() save(response) return change_date_to_str(response) save(response) response["inconsistencies"] = [] if (vt_samples or process): for hash_id in hashes: if fix_inconsistency(hash_id) == 1: response["inconsistencies"].append(hash_id) save(response) response["not_found_on_vt"] = [] response["private_credits_spent"] = 0 if vt_samples: response["downloaded"] = [] for hash_id in hashes: if (get_file_id(hash_id) is None or db_inconsistency(hash_id)): logging.debug("task(): " + hash_id + " was not found (get_file_id returned None). ") generic_count += 1 if (generic_count % 20 == 0): save(response) output = save_file_from_vt(hash_id) sha1 = output.get('hash') if (output.get('status') == 'out_of_credits'): request_successful = False while not request_successful: output = save_file_from_vt(hash_id) if output.get('status') != 'out_of_credits': request_successful = True if (output.get('status') == 'added'): response["downloaded"].append(hash_id) # we need to process the sha1, and not the sha2 because # the grid does not save the sha2. generic_process_hash(sha1) response["private_credits_spent"] += 1 elif (output.get('status') == 'inconsistency_found'): response["private_credits_spent"] += 1 generic_process_hash(sha1) elif (output.get('status') == 'not_found'): response["not_found_on_vt"].append(hash_id) else: logging.error("task_id=" + str(task_id)) logging.error(str(output)) response = add_error( response, 11, "Unknown error when downloading sample from VT.") save(response) save(response) response["processed"] = [] response["not_found_for_processing"] = [] if process: logging.debug("process=true") for hash_id in hashes: logging.debug("task: hash_id=" + str(hash_id)) process_start_time = datetime.datetime.now() generic_count += 1 if (generic_count % 20 == 0): save(response) if (generic_process_hash(hash_id) == 0): process_end_time = datetime.datetime.now() response["processed"].append({ "hash": hash_id, "seconds": (process_end_time - process_start_time).seconds }) else: response["not_found_for_processing"].append(hash_id) save(response) if vt_av: response["vt_av_added"] = [] response["vt_av_out_of_credits"] = [] response["not_found_on_vt_av"] = [] response["vt_av_already_downloaded"] = [] response["public_credits_spent"] = 0 for hash_id in hashes: sha1 = get_file_id(hash_id) if (sha1 is not None): av_result_output = get_av_result(sha1) if (av_result_output.get('status') == 'out_of_credits'): request_successful = False count = 0 while not request_successful: av_result_output = get_av_result(sha1) count += 1 if av_result_output.get('status') != 'out_of_credits': response["vt_av_out_of_credits"].append(hash_id) response = add_error( response, 10, "Had to retried " + str(count) + " times in av_result(out_of_credits) for hash= " + str(hash_id) + ". Is someone else using the same public key?") request_successful = True if (av_result_output.get('status') == "added"): response["vt_av_added"].append(hash_id) response["public_credits_spent"] += 1 elif (av_result_output.get('status') == "already_had_it"): response["vt_av_already_downloaded"].append(hash_id) elif (av_result_output.get('status') == 'error'): response = add_error( response, 9, "Error in av_result: " + str(av_result_output.get('error_message'))) elif (av_result_output.get('status') == 'not_found'): response["not_found_on_vt_av"].append(hash_id) else: logging.error("task_id=" + str(task_id)) logging.error("unknown error in av_result: " + str(hash_id) + " ; " + str(av_result_output)) response = add_error(response, 12, "Unknown error in av_result()") save(response) if (bool(email)): send_mail(email, "task done", str(response)) response["date_end"] = datetime.datetime.now() save(response) return response
def generic_task(task_id): response = load_task(task_id) if response.get('date_end') is not None: logging.error( "Task already done. why was this on the queue? task_id=" + str(task_id)) return response process = response['requested']['process'] file_hash = response['requested']['file_hash'] vt_av = response['requested']['vt_av'] vt_samples = response['requested']['vt_samples'] email = response['requested']['email'] document_name = response['requested'].get('document_name', '') ip = response['requested']['ip'] logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.info("task_id=" + str(task_id)) logging.info("response['requested']=" + str(response['requested'])) generic_count = 0 response = {} response["date_start"] = datetime.datetime.now() response["document_name"] = document_name response["task_id"] = task_id response["ip"] = ip check_hashes_output = check_hashes(file_hash) errors = check_hashes_output.get('errors') for error in errors: key = error.get('error') value = error.get('error_message') logging.error("errors (key=" + str(key) + ", value=" + str(value) + ")") response = add_error(response, key, value) hashes = check_hashes_output.get('hashes') remove_dups_output = remove_dups(hashes) # remove duplicated hashes hashes = remove_dups_output.get('list') response["duplicated_hashes"] = remove_dups_output.get('dups') response["hashes"] = hashes hash_dicts = [] mc = MetaController() for x in hashes: x_dict = {} x_dict["original"] = x x_dict["sha1"] = get_file_id(x) if(x_dict["sha1"] is not None): doc = mc.read(x_dict["sha1"]) if doc is not None and doc.get('hash') is not None: if doc.get('hash').get('md5') is not None: x_dict["md5"] = doc.get('hash').get('md5') if doc.get('hash').get('sha2') is not None: x_dict["sha2"] = doc.get('hash').get('sha2') hash_dicts.append(x_dict) response["duplicated_samples"] = [] for x in hash_dicts: for y in hash_dicts: if x.get('original') != y.get('original') and ( x.get('original') == y.get('sha1') or x.get('original') == y.get('md5') or x.get('original') == y.get('sha2')): response["duplicated_samples"].append(y.get('original')) hash_dicts.remove(y) hashes = [] for x in hash_dicts: hashes.append(x.get('original')) response["hashes"] = hashes if(len(hashes) == 0): response = add_error(response, 6, "No valid hashes provided.") response["date_end"] = datetime.datetime.now() save(response) return change_date_to_str(response) save(response) response["private_credits_spent"] = 0 response["inconsistencies"] = [] if(vt_samples or process): for hash_id in hashes: inconsistency_output = fix_inconsistency(hash_id) if inconsistency_output.get('inconsistency'): response["inconsistencies"].append(hash_id) if inconsistency_output.get('credit_spent'): response["private_credits_spent"] += 1 save(response) response["not_found_on_vt"] = [] if vt_samples: response["downloaded"] = [] for hash_id in hashes: if(get_file_id(hash_id) is None or db_inconsistency(hash_id)): logging.debug("task(): " + hash_id + " was not found (get_file_id returned None). ") generic_count += 1 if (generic_count % 20 == 0): save(response) output = save_file_from_vt(hash_id) sha1 = output.get('hash') if(output.get('status') == 'out_of_credits'): request_successful = False while not request_successful: output = save_file_from_vt(hash_id) if output.get('status') != 'out_of_credits': request_successful = True if(output.get('status') == 'added'): response["downloaded"].append(hash_id) # we need to process the sha1, and not the sha2 because # the grid does not save the sha2. generic_process_hash(sha1) response["private_credits_spent"] += 1 elif(output.get('status') == 'inconsistency_found'): response["private_credits_spent"] += 1 generic_process_hash(sha1) elif(output.get('status') == 'not_found'): response["not_found_on_vt"].append(hash_id) else: logging.error("task_id=" + str(task_id)) logging.error(str(output)) response = add_error( response, 11, "Unknown error when downloading sample from VT.") save(response) save(response) response["processed"] = [] response["not_found_for_processing"] = [] if process: logging.debug("process=true") for hash_id in hashes: logging.debug("task: hash_id=" + str(hash_id)) process_start_time = datetime.datetime.now() generic_count += 1 if (generic_count % 20 == 0): save(response) if(generic_process_hash(hash_id) == 0): process_end_time = datetime.datetime.now() response["processed"].append({"hash": hash_id, "seconds": (process_end_time - process_start_time).seconds}) else: response["not_found_for_processing"].append(hash_id) save(response) if vt_av: response["vt_av_added"] = [] response["vt_av_out_of_credits"] = [] response["not_found_on_vt_av"] = [] response["vt_av_already_downloaded"] = [] response["public_credits_spent"] = 0 for hash_id in hashes: sha1 = get_file_id(hash_id) if(sha1 is not None): av_result_output = get_av_result(sha1) if (av_result_output.get('status') == 'out_of_credits'): request_successful = False count = 0 while not request_successful: av_result_output = get_av_result(sha1) count += 1 if av_result_output.get('status') != 'out_of_credits': response["vt_av_out_of_credits"].append(hash_id) response = add_error(response, 10, "Had to retried " + str(count) + " times in av_result(out_of_credits) for hash= " + str( hash_id) + ". Is someone else using the same public key?") request_successful = True if(av_result_output.get('status') == "added"): response["vt_av_added"].append(hash_id) response["public_credits_spent"] += 1 elif(av_result_output.get('status') == "already_had_it"): response["vt_av_already_downloaded"].append(hash_id) elif(av_result_output.get('status') == 'error'): response = add_error( response, 9, "Error in av_result: " + str(av_result_output.get('error_message'))) elif(av_result_output.get('status') == 'not_found'): response["not_found_on_vt_av"].append(hash_id) else: logging.error("task_id=" + str(task_id)) logging.error("unknown error in av_result: " + str(hash_id) + " ; " + str(av_result_output)) response = add_error( response, 12, "Unknown error in av_result()") save(response) if(bool(email)): send_mail(email, "task done", str(response)) response["date_end"] = datetime.datetime.now() save(response) return response
def api_batch_process_debug_file(): yield "<html><body><pre>" yield "Running Batch process\n" file_hashes=request.forms.get('file_hash') if file_hashes is None: response.status=422 print "file_hash is missing" yield "file_hash parameter is missing" #print(dir(request.forms)) #print(request.forms.keys()) # transform file_hashes in a list of hashes. not_found=[] added_to_queue=0 downloaded_from_vt=0 for hash_id in file_hashes.split("\n"): hash_id=clean_hash(hash_id) if hash_id is None: continue data="1="+hash_id if(len(hash_id)==40 or len(hash_id)==32): pc=PackageController() res=pc.getFile(hash_id) if res is not None and len(SearchModule.search_by_id(data,1,[],False))==0: print "Processing right now: "+str(hash_id) process_file(hash_id) if(env['auto_get_av_result']): get_av_result(hash_id) continue res=SearchModule.search_by_id(data,1,[],False) if(len(res)==0): print "process_debug(): metadata of "+str(hash_id)+" was not found. We will look in Pc. hash length: "+str(len(hash_id)) if(len(hash_id)==40 or len(hash_id) == 32): pc=PackageController() res=pc.getFile(hash_id) if res is not None: print "process_debug(): hash was found ("+str(hash_id)+")" else: print "process_debug(): hash was not found("+str(hash_id)+")" print "process_debug():" print "process_debug(): going to search "+str(hash_id)+" in vt" sha1=SearchModule.add_file_from_vt(hash_id) if(sha1==None): print "process_debug(): sha1 is None: "+str(hash_id) not_found.append(hash_id) continue else: downloaded_from_vt+=1 else: sha1=res[0]["sha1"] added_to_queue+=1 add_hash_to_process_queue(sha1) if(env['auto_get_av_result']): get_av_result(sha1) yield str(sha1)+"\n" responsex=str(added_to_queue)+" files added to the process queue.\n" if(downloaded_from_vt > 0): responsex+=str(downloaded_from_vt)+" new hashes.\n" if(len(not_found)!=0): responsex+=str(len(not_found))+ " hashes not found.\n" responsex+="Not Found:\n" for aux in not_found: responsex=responsex+str(aux)+"\n" yield responsex yield "END"