def import_data(): """ import excel :param :return none """ downloadfiletask.handle_three() mysql = Mysql() files = os.listdir('./import_excels') for filename in files: with open('./import_excels/' + filename, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + filename) os.remove('./import_excels/' + filename) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() result = parse_excel(filename) for i in range(0, len(result[0])): # sql = 'select * from cve_issue_repo_whitelist where package_name = %s and version = %s' # val = (result[0][i], result[1][i]) sql = 'select * from cve_issue_repo_whitelist where package_name = %s' val = (result[0][i]) mysql_data = mysql.getOne(sql, val) if mysql_data: print("更新数据package_name:" + result[0][i]) sql = 'update cve_issue_repo_whitelist set status = %s, branchs = %s, update_time = %s where ' \ 'package_name = %s and version = %s' val = (result[2][i], result[3][i], cur_date(), result[0][i], result[1][i]) mysql.update(sql, val) mysql.dispose() else: print('插入数据') sql = 'insert into cve_issue_repo_whitelist (package_name, version, status, branchs, create_time, ' \ 'update_time, delete_time) values (%s, %s, %s, %s, %s, %s, %s)' val = (result[0][i], result[1][i], result[2][i], result[3][i], cur_date(), None, None) mysql.insertOne(sql, val) mysql.dispose() os.remove('./import_excels/' + filename) mysql.close()
def import_data(): """ import excel :param :return none """ downloadfiletask.handle_four() mysql = Mysql() files = os.listdir('./package_committer_excels') for filename in files: with open('./package_committer_excels/' + filename, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + filename) os.remove('./package_committer_excels/' + filename) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() result = parse_excel(filename) for i in range(0, len(result[0])): sql = 'insert ignore into cve_spec_issue_assigness (package_name, issue_assignee, status, create_time) ' \ 'values (%s, %s, %s, %s)' val = (result[0][i], result[1][i], 1, cur_date()) mysql.insertOne(sql, val) mysql.dispose() print('插入数据{}:{}'.format(result[0][i], result[1][i])) os.remove('./package_committer_excels/' + filename) mysql.close()
def get_issue_excel(): """ import excel :param :return none """ file_dir = "issue_statistics" downloadfiletask.download_excel(file_dir) mysql = Mysql() files = os.listdir('./' + file_dir) for filename in files: file_vaule = "./%s/%s" % (file_dir, filename) with open(file_vaule, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(filename, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (filename,) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("File parsed:" + filename) os.remove(file_vaule) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (filename, hash_value) mysql.insertOne(sql, val) mysql.dispose() email_dict = proc_excel_data(file_vaule) if email_dict is not None and len(email_dict) > 0: to_email_name_list = email_dict["to"] cc_email_name_list = email_dict["cc"] else: continue if to_email_name_list is not None and len(to_email_name_list) > 0: del_sql = "delete from cve_issue_statistics_mail_list where email_type = %s" mysql.delete(del_sql, (1,)) mysql.dispose() for email_name in to_email_name_list: try: insert_sql = "insert into cve_issue_statistics_mail_list" \ "(email_name, email_type, create_time) values(%s,%s, %s)" val = (email_name, 1, cur_date()) mysql.insertOne(insert_sql, val) mysql.dispose() except pymysql.err.IntegrityError as e: print(e) mysql.dispose(2) del_sql = "delete from cve_issue_statistics_mail_list where email_type = %s" mysql.delete(del_sql, (2,)) mysql.dispose() if cc_email_name_list is not None and len(cc_email_name_list) > 0: for email_name in cc_email_name_list: try: insert_sql = "insert into cve_issue_statistics_mail_list" \ "(email_name, email_type, create_time) values(%s,%s, %s)" val = (email_name, 2, cur_date()) mysql.insertOne(insert_sql, val) mysql.dispose() except pymysql.err.IntegrityError as e: print(e) mysql.dispose(2) os.remove(file_vaule) mysql.close()
def handle_data(): """ CVSS official website data crawling data storage database """ # downloadfiletask.handle_one() path = "./newexcels" if not os.path.isdir(path): print("There is currently no excel data executable") return files = os.listdir(path) if files: mysql = Mysql() for fileName in files: with open('./newexcels/' + fileName, 'rb') as f: sha1obj = hashlib.sha1() sha1obj.update(f.read()) hash_value = sha1obj.hexdigest() print(fileName, hash_value) f.close() sql = "select file_hash from cve_file_hash where file_name = %s" val = (fileName, ) file_hash = mysql.getOne(sql, val) if file_hash: if hash_value == file_hash['file_hash']: print("文件已解析:" + fileName) exceltask.move_file(fileName) continue sql = "insert into cve_file_hash (file_name, file_hash) values (%s, %s)" val = (fileName, hash_value) mysql.insertOne(sql, val) mysql.dispose() print("File name currently being processed: ", fileName) cve_num_list = exceltask.crawl_cve_num(fileName) urls = exceltask.crawl_urls(fileName) cve_version_list = exceltask.crawl_cve_version(fileName) pack_name_list = exceltask.crawl_packname(fileName) for i in range(0, len(urls)): cve_num = str(cve_num_list[i]).strip() cve_version = str(cve_version_list[i]) pack_name = str(pack_name_list[i]) print(fileName, cve_num, pack_name, cve_version) # Database query results sql = "select * from cve_origin_excel where " \ "cve_num= %s and pack_name = %s and cve_version = %s" val = (cve_num, pack_name, cve_version) result_dict = mysql.getOne(sql, val) # Determine whether CVE exists in the database if result_dict: # Crawler web data crawl_list = crawltask.crawling(urls[i]) print(crawl_list) # Determine whether the database content is the latest data if crawl_list[0] or crawl_list[2]: if str(result_dict["nvd_score"]) == str(crawl_list[0]) \ and str(result_dict["vector_value"]) == str(crawl_list[4]): if result_dict['cve_status'] in [3, 4, 7]: print("update data:" + cve_num) update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) try: sql = "update cve_origin_excel set nvd_score=%s, cve_level=%s, cve_desc=%s, " \ "repair_time=%s, vector_value=%s, attack_vector=%s, access_vector=%s, " \ "attack_complexity=%s, access_complexity=%s, privilege_required=%s, " \ "user_interaction=%s, scope=%s, confidentiality=%s, integrity=%s, " \ "availability=%s, authentication=%s, cve_status=%s, update_time=%s " \ "where cve_num=%s and pack_name = %s and cve_version = %s" val = (crawl_list[0], crawl_list[1], crawl_list[2], crawl_list[3], crawl_list[4], crawl_list[5], crawl_list[6], crawl_list[7], crawl_list[8], crawl_list[9], crawl_list[10], crawl_list[11], crawl_list[12], crawl_list[13], crawl_list[14], crawl_list[15], 1, update_time, cve_num, pack_name, cve_version) mysql.update(sql, val) mysql.dispose() except IndexError as e: mysql.dispose(0) print("Subscript out of bounds", e) else: print("The database is the latest data:" + cve_num) else: print("update data:" + cve_num) update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) try: sql = "update cve_origin_excel set nvd_score=%s, cve_level=%s, cve_desc=%s, " \ "repair_time=%s, vector_value=%s, attack_vector=%s, access_vector=%s, " \ "attack_complexity=%s, access_complexity=%s, privilege_required=%s, " \ "user_interaction=%s, scope=%s, confidentiality=%s, integrity=%s, " \ "availability=%s, authentication=%s, cve_status=%s, update_time=%s, " \ "score_type=%s " \ "where cve_num=%s and pack_name = %s and cve_version = %s" val = (crawl_list[0], crawl_list[1], crawl_list[2], crawl_list[3], crawl_list[4], crawl_list[5], crawl_list[6], crawl_list[7], crawl_list[8], crawl_list[9], crawl_list[10], crawl_list[11], crawl_list[12], crawl_list[13], crawl_list[14], crawl_list[15], 1, update_time, crawl_list[16], cve_num, pack_name, cve_version) mysql.update(sql, val) mysql.dispose() except IndexError as e: print("Subscript out of bounds", e) mysql.dispose(0) else: print("error: ", result_dict) else: print("insert data") create_time = update_time = str( time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) delete_time = None # State 0 means new, 1 means modified cve_status = 0 try: cve_url = str(urls[i]) listx = crawltask.crawling(cve_url) sql = "INSERT INTO cve_origin_excel (cve_num, cve_url, cve_version, pack_name, score_type, " \ "nvd_score, cve_level, cve_desc, repair_time, vector_value, attack_vector, " \ "access_vector, attack_complexity, access_complexity, privilege_required, " \ "user_interaction, scope, confidentiality, integrity, availability, " \ "authentication, cve_status, " \ "create_time, update_time, delete_time) " \ "VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, " \ "%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" val = (cve_num, cve_url, cve_version, pack_name, listx[16], listx[0], listx[1], listx[2], listx[3], listx[4], listx[5], listx[6], listx[7], listx[8], listx[9], listx[10], listx[11], listx[12], listx[13], listx[14], listx[15], cve_status, create_time, update_time, delete_time) mysql.insertOne(sql, val) mysql.dispose() except IndexError as e: print("Subscript out of bounds", e) mysql.dispose(0) sql = "select * from cve_spec_error where cve_num = %s" val = (cve_num, ) result_spec_error = mysql.getOne(sql, val) if result_spec_error: print("过滤,修改status为6:{}".format(cve_num)) sql = "update cve_origin_excel set cve_desc = %s, cve_status = %s where cve_num = %s" val = (result_spec_error["cve_desc"], 6, cve_num) mysql.update(sql, val) mysql.dispose() exceltask.move_file(fileName) mysql.close() else: print( "error: There are no manually added tables in the newexcels folder" )