def repeat_task(): """ Parse the artificial CVE table, crawl the CVE official website data, and store it in the database return None """ print("CVE官网数据为空的CVE进行再次抓取") mysql = Mysql() sql = "SELECT cve_num,pack_name,cve_version " \ "FROM cve_origin_excel WHERE score_type = %s " \ "OR score_type IS NULL OR cve_desc IS NULL OR nvd_score IS NULL" val = ('', ) result = mysql.getMany(sql, val) print("CVE官网数据为空的 数据:", result) for i in result: crawllist = crawltask.crawling("https://nvd.nist.gov/vuln/detail/" + i["cve_num"]) print("更新CVE数据") sql = "update cve_origin_excel set nvd_score=%s, cve_level=%s, cve_desc=%s, " \ "repair_time=%s, vector_value=%s, attack_vector=%s, access_vector=%s, " \ "attack_complexity=%s, access_complexity=%s, privilege_required=%s, " \ "user_interaction=%s, scope=%s, confidentiality=%s, integrity=%s, " \ "availability=%s, authentication=%s, cve_status=%s, update_time=%s, " \ "score_type=%s where cve_num=%s and pack_name = %s and cve_version = %s" val = (crawllist[0], crawllist[1], crawllist[2], crawllist[3], crawllist[4], crawllist[5], crawllist[6], crawllist[7], crawllist[8], crawllist[9], crawllist[10], crawllist[11], crawllist[12], crawllist[13], crawllist[14], crawllist[15], 1, cur_date(), crawllist[16], i["cve_num"], i["pack_name"], i["cve_version"]) mysql.update(sql, val) mysql.dispose() mysql.close()
def select_yaml_lastdata(): """ Query the last data """ mysql = Mysql() sql = "SELECT package_id FROM cve_git_open_euler order by git_id desc" package_ids = mysql.getOne(sql) if package_ids and len(package_ids) > 0: return package_ids else: return None
def update_status(): """ Change the data with is_export 1 to 2 to indicate that it has been exported in excel """ mysql = Mysql() sql = "update cve_origin_excel set is_export=2 where cve_status=3 or cve_status=4" mysql.update(sql) mysql.dispose() mysql.close()
def store_yaml_data(yaml_data): """ parse dict Store yaml data to mysql return None """ mysql = Mysql() if yaml_data is not None and len(yaml_data) > 0: # Delete all data before updating update_yaml_origin_mark(mysql) for repo_key, repo_value in yaml_data.items(): try: for yaml_key, yaml_value in repo_value.items(): if "version" not in yaml_value or not yaml_value["version"]: yaml_value["version"] = "" origin_data = select_yaml_origin_data( yaml_key, yaml_value["version"], mysql, repo_key) if origin_data: update_yaml_origin_data(origin_data["id"], yaml_value, mysql) else: insert_yaml_origin_data(yaml_key, yaml_value, mysql, repo_key) packages_data = select_yaml_data(yaml_key, yaml_value, mysql) if packages_data is not None: if len(packages_data) > 1: for pd in packages_data[1:]: delete_yaml_openeuler_detail_data( mysql, pd["git_id"]) delete_yaml_openeuler_data(mysql, pd["git_id"]) pk = packages_data[0] update_yaml_data(pk["package_id"], yaml_key, yaml_value, mysql) update_yaml_detail_data(pk["git_id"], yaml_key, yaml_value, mysql) else: package_ids = select_yaml_lastdata() if package_ids is not None and package_ids[ "package_id"] >= 10000000: packg_id = add_package_id( package_ids["package_id"]) else: packg_id = add_package_id(0) last_id = insert_yaml_data(packg_id, yaml_key, yaml_value, mysql) insert_yaml_detail_data(last_id, yaml_key, yaml_value, mysql) mysql.dispose() except pymysql.err.IntegrityError: print(pymysql.err.IntegrityError) # except Exception as e: # print(e) mysql.dispose(2) delete_yaml_origin_mark(mysql) mysql.close()
def update_status(status): """ Execute mysql :param status: int :return: list """ mysql = Mysql() sql = 'update cve_issue_create_record set status = %s,update_time = %s where status = %s' val = (4, time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), status) mysql.update(sql, val) mysql.dispose() mysql.close()
def delete_yaml_data(): """ delete yaml data """ mysql = Mysql() # Delete historical data packages_data = select_openeuler_yaml_data(mysql) if packages_data is not None: for pk in packages_data: print("pk: ", pk) if pk["package_id"] >= 10000000: gauss_data = select_gauss_yaml_origin_data( pk["package_name"], pk["version"], mysql) spore_data = select_mindspore_yaml_origin_data( pk["package_name"], pk["version"], mysql) lookeng_data = select_openlookeng_yaml_origin_data( pk["package_name"], pk["version"], mysql) if not gauss_data and not spore_data and not lookeng_data: print("ID of the currently deleted data: ", pk["git_id"]) delete_yaml_openeuler_detail_data(mysql, pk["git_id"]) delete_yaml_openeuler_data(mysql, pk["git_id"]) mysql.close()
def sending(): """ Query all recipients """ mysql = Mysql() sql = "select email_name from cve_email_list where email_type=1" result = mysql.getMany(sql) mysql.close() print(result) for i in result: subject = "This time cve creates an issue record" content = "The email attachment may contain two attached excel documents, " \ "which are the issue data that has been successfully " \ "created and the issue data that has not been created." # sendemail.send_email("smtp.gmail.com", 587, # os.getenv("CVE_EMAIL_SENDADDR"), # os.getenv("CVE_EMAIL_PASSWORD"), # str(i['email_name']), './export_excels', subject, content) sendemail.send_email('mailman-exim4-service.mail.svc.cluster.local', 25, os.getenv("CVE_EMAIL_SENDADDR"), os.getenv("CVE_EMAIL_PASSWORD"), str(i['email_name']), './export_excels', subject, content)
def get_results(): """Query error data :return results: list """ mysql = Mysql() last_month = times.last_month_date() sql = "select * from cve_origin_excel where (cve_status = 3 or cve_status=4) and is_export = 1 and create_time > %s" val = (last_month, ) results = mysql.getMany(sql, val) print(results) mysql.dispose() mysql.close() return results
def get_published_date_task(): """ Get the release date of nvd return None """ print("Get the release date of nvd") mysql = Mysql() sql = "SELECT cve_id, cve_num, repair_time " \ "FROM cve_vuln_center WHERE repair_time = '' " \ "OR repair_time IS NULL" result = mysql.getMany(sql) if result is not None and result[0] is not None: for i in result: crawllist = crawltask.crawling( "https://nvd.nist.gov/vuln/detail/" + i["cve_num"]) if crawllist is not None and len(crawllist) >= 4 and crawllist[3]: sql = "update cve_vuln_center set repair_time = %s where cve_id = %s" val = (crawllist[3], i["cve_id"]) mysql.update(sql, val) mysql.dispose() mysql.close()
def get_results(status): """ Execute mysql :param status: int :return: list """ mysql = Mysql() last_month = times.last_month_date() sql = "select * from cve_issue_create_record where status = %s and create_time > %s" val = (status, last_month) results = mysql.getMany(sql, val) print(results) mysql.dispose() mysql.close() return results
def get_issue_excel(): """ import excel :param :return none """ file_dir = "issue_statistics" downloadfiletask.download_excel(file_dir) mysql = Mysql() files = os.listdir('./' + file_dir) for filename in files: file_vaule = "./%s/%s" % (file_dir, filename) with open(file_vaule, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename,) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("File parsed:" + filename) os.remove(file_vaule) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() email_dict = proc_excel_data(file_vaule) if email_dict is not None and len(email_dict) > 0: to_email_name_list = email_dict["to"] cc_email_name_list = email_dict["cc"] else: continue if to_email_name_list is not None and len(to_email_name_list) > 0: del_sql = "delete from cve_issue_statistics_mail_list where email_type = %s" mysql.delete(del_sql, (1,)) mysql.dispose() for email_name in to_email_name_list: try: insert_sql = "insert into cve_issue_statistics_mail_list" \ "(email_name, email_type, create_time) values(%s,%s, %s)" val = (email_name, 1, cur_date()) mysql.insertOne(insert_sql, val) mysql.dispose() except pymysql.err.IntegrityError as e: print(e) mysql.dispose(2) del_sql = "delete from cve_issue_statistics_mail_list where email_type = %s" mysql.delete(del_sql, (2,)) mysql.dispose() if cc_email_name_list is not None and len(cc_email_name_list) > 0: for email_name in cc_email_name_list: try: insert_sql = "insert into cve_issue_statistics_mail_list" \ "(email_name, email_type, create_time) values(%s,%s, %s)" val = (email_name, 2, cur_date()) mysql.insertOne(insert_sql, val) mysql.dispose() except pymysql.err.IntegrityError as e: print(e) mysql.dispose(2) os.remove(file_vaule) mysql.close()
def import_data(): """ import excel :param :return none """ downloadfiletask.handle_four() mysql = Mysql() files = os.listdir('./package_committer_excels') for filename in files: with open('./package_committer_excels/' + filename, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + filename) os.remove('./package_committer_excels/' + filename) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() result = parse_excel(filename) for i in range(0, len(result[0])): sql = 'insert ignore into cve_spec_issue_assigness (package_name, issue_assignee, status, create_time) ' \ 'values (%s, %s, %s, %s)' val = (result[0][i], result[1][i], 1, cur_date()) mysql.insertOne(sql, val) mysql.dispose() print('插入数据{}:{}'.format(result[0][i], result[1][i])) os.remove('./package_committer_excels/' + filename) mysql.close()
def handle_data(): """ CVSS official website data crawling data storage database """ # downloadfiletask.handle_one() path = "./newexcels" if not os.path.isdir(path): print("There is currently no excel data executable") return files = os.listdir(path) if files: mysql = Mysql() for fileName in files: with open('./newexcels/' + fileName, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(fileName, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (fileName, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + fileName) exceltask.move_file(fileName) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (fileName, hash_value) mysql.insertOne(sql, val) mysql.dispose() print("File name currently being processed: ", fileName) cve_num_list = exceltask.crawl_cve_num(fileName) urls = exceltask.crawl_urls(fileName) cve_version_list = exceltask.crawl_cve_version(fileName) pack_name_list = exceltask.crawl_packname(fileName) for i in range(0, len(urls)): cve_num = str(cve_num_list[i]).strip() cve_version = str(cve_version_list[i]) pack_name = str(pack_name_list[i]) print(fileName, cve_num, pack_name, cve_version) # Database query results sql = "select * from cve_origin_excel where " \ "cve_num= %s and pack_name = %s and cve_version = %s" val = (cve_num, pack_name, cve_version) result_dict = mysql.getOne(sql, val) # Determine whether CVE exists in the database if result_dict: # Crawler web data crawl_list = crawltask.crawling(urls[i]) print(crawl_list) # Determine whether the database content is the latest data if crawl_list[0] or crawl_list[2]: if str(result_dict["nvd_score"]) == str(crawl_list[0]) \ and str(result_dict["vector_value"]) == str(crawl_list[4]): if result_dict['cve_status'] in [3, 4, 7]: print("update data:" + cve_num) update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) try: sql = "update cve_origin_excel set nvd_score=%s, cve_level=%s, cve_desc=%s, " \ "repair_time=%s, vector_value=%s, attack_vector=%s, access_vector=%s, " \ "attack_complexity=%s, access_complexity=%s, privilege_required=%s, " \ "user_interaction=%s, scope=%s, confidentiality=%s, integrity=%s, " \ "availability=%s, authentication=%s, cve_status=%s, update_time=%s " \ "where cve_num=%s and pack_name = %s and cve_version = %s" val = (crawl_list[0], crawl_list[1], crawl_list[2], crawl_list[3], crawl_list[4], crawl_list[5], crawl_list[6], crawl_list[7], crawl_list[8], crawl_list[9], crawl_list[10], crawl_list[11], crawl_list[12], crawl_list[13], crawl_list[14], crawl_list[15], 1, update_time, cve_num, pack_name, cve_version) mysql.update(sql, val) mysql.dispose() except IndexError as e: mysql.dispose(0) print("Subscript out of bounds", e) else: print("The database is the latest data:" + cve_num) else: print("update data:" + cve_num) update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) try: sql = "update cve_origin_excel set nvd_score=%s, cve_level=%s, cve_desc=%s, " \ "repair_time=%s, vector_value=%s, attack_vector=%s, access_vector=%s, " \ "attack_complexity=%s, access_complexity=%s, privilege_required=%s, " \ "user_interaction=%s, scope=%s, confidentiality=%s, integrity=%s, " \ "availability=%s, authentication=%s, cve_status=%s, update_time=%s, " \ "score_type=%s " \ "where cve_num=%s and pack_name = %s and cve_version = %s" val = (crawl_list[0], crawl_list[1], crawl_list[2], crawl_list[3], crawl_list[4], crawl_list[5], crawl_list[6], crawl_list[7], crawl_list[8], crawl_list[9], crawl_list[10], crawl_list[11], crawl_list[12], crawl_list[13], crawl_list[14], crawl_list[15], 1, update_time, crawl_list[16], cve_num, pack_name, cve_version) mysql.update(sql, val) mysql.dispose() except IndexError as e: print("Subscript out of bounds", e) mysql.dispose(0) else: print("error: ", result_dict) else: print("insert data") create_time = update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) delete_time = None # State 0 means new, 1 means modified cve_status = 0 try: cve_url = str(urls[i]) listx = crawltask.crawling(cve_url) sql = "INSERT INTO cve_origin_excel (cve_num, cve_url, cve_version, pack_name, score_type, " \ "nvd_score, cve_level, cve_desc, repair_time, vector_value, attack_vector, " \ "access_vector, attack_complexity, access_complexity, privilege_required, " \ "user_interaction, scope, confidentiality, integrity, availability, " \ "authentication, cve_status, " \ "create_time, update_time, delete_time) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" val = (cve_num, cve_url, cve_version, pack_name, listx[16], listx[0], listx[1], listx[2], listx[3], listx[4], listx[5], listx[6], listx[7], listx[8], listx[9], listx[10], listx[11], listx[12], listx[13], listx[14], listx[15], cve_status, create_time, update_time, delete_time) mysql.insertOne(sql, val) mysql.dispose() except IndexError as e: print("Subscript out of bounds", e) mysql.dispose(0) sql = "select * from cve_spec_error where cve_num = %s" val = (cve_num, ) result_spec_error = mysql.getOne(sql, val) if result_spec_error: print("过滤,修改status为6:{}".format(cve_num)) sql = "update cve_origin_excel set cve_desc = %s, cve_status = %s where cve_num = %s" val = (result_spec_error["cve_desc"], 6, cve_num) mysql.update(sql, val) mysql.dispose() exceltask.move_file(fileName) mysql.close() else: print( "error: There are no manually added tables in the newexcels folder" )
def import_data(): """ import excel :param :return none """ downloadfiletask.handle_three() mysql = Mysql() files = os.listdir('./import_excels') for filename in files: with open('./import_excels/' + filename, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + filename) os.remove('./import_excels/' + filename) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() result = parse_excel(filename) for i in range(0, len(result[0])): # sql = 'select * from cve_issue_repo_whitelist where package_name = %s and version = %s' # val = (result[0][i], result[1][i]) sql = 'select * from cve_issue_repo_whitelist where package_name = %s' val = (result[0][i]) mysql_data = mysql.getOne(sql, val) if mysql_data: print("更新数据package_name:" + result[0][i]) sql = 'update cve_issue_repo_whitelist set status = %s, branchs = %s, update_time = %s where ' \ 'package_name = %s and version = %s' val = (result[2][i], result[3][i], cur_date(), result[0][i], result[1][i]) mysql.update(sql, val) mysql.dispose() else: print('插入数据') sql = 'insert into cve_issue_repo_whitelist (package_name, version, status, branchs, create_time, ' \ 'update_time, delete_time) values (%s, %s, %s, %s, %s, %s, %s)' val = (result[0][i], result[1][i], result[2][i], result[3][i], cur_date(), None, None) mysql.insertOne(sql, val) mysql.dispose() os.remove('./import_excels/' + filename) mysql.close()