def __init__(self): """实例化对象""" self.db = DataBase() # 实例数据库对象 self.INNAspider = spider() # 实例化一个爬虫对象 self.staic = ConfigParser.ConfigParser() # 实例化配置文件 self.staic.read('GetTLDs.conf') self.Intervals = self.staic.getfloat('Spider', 'getIntervals') # 获取间隔 self.delta = self.staic.getint('Spider', 'delta') # 更新间隔 self.INNA_url = 'http://www.iana.org/domains/root/db' # TLD数据地址 self.db.get_connect() # 连接到数据库
def __Demo(): # 测试数据 test_data = { 'org_name': 'Nexperian Holding Limited', 'updated_date': '22211223Z', 'domain': '17fv.com', 'reg_phone': '+86.57185022088', 'reg_email': 'YuMing@����֤ȯ.netYinSiBaoHu.AliYun.com', 'expiration_date': '2017-11-22T04:10:03Z', 'reg_name': 'Nexperian Holding Limited', 'top_whois_server': 'ccwhois.verisign-grs.com', 'name_server': 'dns10.hichina.com;dns9.hichina.com', 'creation_date': '2016-11-22T04:10:03Z', 'flag': 1, 'domain_status': '4', 'details': 'Domain N����֤ȯ.netame: ccbanz.cc\r\nRegistry Domain ID: 127221640_DOMAIN_CC-VRSN\r\nRegistrar WHOIS Server: grs-whois.hichina.com\r\nRegistrar URL: http://whois.aliyun.com/\r\nUpdated Date: 2016-11-22T04:10:03Z\r\nCreation Date: 2016-11-22T04:10:03Z\r\nRegistrar Registration Expiration Date: 2017-11-22T04:10:03Z\r\nRegistrar: HICHINA ZHICHENG TECHNOLOGY LTD.\r\nRegistrar IANA ID: 420\r\nReseller:\r\nDomain Status: ok http://www.icann.org/epp#OK\r\nRegistry Registrant ID: Not Available From Registry\r\nRegistrant Name: Nexperian Holding Limited\r\nRegistrant Organization: Nexperian Holding Limited\r\nRegistrant Street: Le Jia International No.999 Liang Mu Road Yuhang District\r\nRegistrant City: Hangzhou\r\nRegistrant State/Province: Zhejiang\r\nRegistrant Postal Code: 311121\r\nRegistrant Country: CN\r\nRegistrant Phone: +86.57185022088\r\nRegistrant Phone Ext: \r\nRegistrant Fax: +86.57186562951\r\nRegistrant Fax Ext: \r\nRegistrant Email: [email protected]\r\nRegistry Admin ID: Not Available From Registry\r\nAdmin Name: Nexperian Holding Limited\r\nAdmin Organization: Nexperian Holding Limited\r\nAdmin Street: Le Jia International No.999 Liang Mu Road Yuhang District\r\nAdmin City: Hangzhou\r\nAdmin State/Province: Zhejiang\r\nAdmin Postal Code: 311121\r\nAdmin Country: CN\r\nAdmin Phone: +86.57185022088\r\nAdmin Phone Ext: \r\nAdmin Fax:+86.57186562951\r\nAdmin Fax Ext: \r\nAdmin Email: [email protected]\r\nRegistry Tech ID: Not Available From Registry\r\nTech Name: Nexperian Holding Limited\r\nTech Organization: Nexperian Holding Limited\r\nTech Street: Le Jia International No.999 Liang Mu Road Yuhang District\r\nTech City: Hangzhou\r\nTech State/Province: Zhejiang\r\nTech Postal Code: 311121\r\nTech Country: CN\r\nTech Phone: +86.57185022088\r\nTech Phone Ext: \r\nTech Fax: +86.57186562951\r\nTech Fax Ext: \r\nTech Email: [email protected]\r\nName Server: dns10.hichina.com\r\nName Server: dns9.hichina.com\r\nDNSSEC: unsigned\r\nRegistrar Abuse Contact Email: [email protected]\r\nRegistrar Abuse Contact Phone: +86.95187\r\nURL of the ICANN WHOIS Data Problem Reporting System: http://wdprs.internic.net/\r\n>>>Last update of WHOIS database: 2016-11-22T04:10:03Z <<<\r\n\r\nFor more information on Whois status codes, please visit https://icann.org/epp\r\n\r\nRegistry Billing ID: Not Available From Registry\r\nBilling Name: Nexperian Holding Limited\r\nBilling Organization: Nexperian Holding Limited\r\nBilling Street: Le Jia International No.999 Liang Mu Road Yuhang District\r\nBilling City: Hangzhou\r\nBilling State/Province: Zhejiang\r\nBilling Postal Code: 311121\r\nBilling Country: CN\r\nBilling Phone: +86.57185022088\r\nBilling Phone Ext: \r\nBilling Fax: +86.57186562951\r\nBilling Fax Ext: \r\nBilling Email: [email protected]\r\n\r\nImportant Reminder: Per ICANN 2013RAA`s request, Hichina has modified domain names`whois format of dot com/net/cc/tv, you could refer to section 1.4 posted by ICANN on http://www.icann.org/en/resources/registrars/raa/approved-with-specs-27jun13-en.htm#whois The data in this whois database is provided to you for information purposes only, that is, to assist you in obtaining information about or related to a domain name registration record. We make this information available \\"as is,\\" and do not guarantee its accuracy. By submitting a whois query, you agree that you will use this data only for lawful purposes and that, under no circumstances will you use this data to: (1)enable high volume, automated, electronic processes that stress or load this whois database system providing you this information; or (2) allow, enable, or otherwise support the transmission of mass unsolicited, commercial advertising or solicitations via direct mail, electronic mail, or by telephone. The compilation, repackaging, dissemination or other use of this data is expressly prohibited without prior written consent from us. We reserve the right to modify these terms at any time. By submitting this query, you agree to abide by these terms.For complete domain details go to:http://whois.aliyun.com/whois/domain/ccbanz.cc\r\n\n', 'sponsoring_registrar': 'sss', 'tld': u'cc', 'sec_whois_server': 'grs-whois.hichina.com' } DB = DataBase() DB.db_connect() demo = WhoisRecord(DB) demo.Update(test_data, 1) DB.db_commit() DB.db_close()
def getDatabaseTime(): """:return 数据库当前时间""" DB = DataBase() DB.get_connect() SQL = """SELECT now()""" currentTime = DB.execute(SQL)[0][0] DB.db_close() return currentTime
def GetWhoisInfo(tld=None): """ :param tld: 需要获取的tld的数据 / 不支持列表 :return: 此tld在数据库的数据信息 """ db = DataBase() db.get_connect() finsh_num = 0 for table_num in range(1, TABLE_NUM + 1): SQL = """SELECT COUNT(*) FROM domain_whois.domain_whois_{num} WHERE`tld` = '{tld}' AND `flag` > 0""".format( num='1', tld=tld) finsh_num += db.execute(SQL)[0][0] print finsh_num un_finsh_num = 0 for table_num in range(1, TABLE_NUM + 1): SQL = """SELECT COUNT(*) FROM domain_whois.domain_whois_{num} WHERE`tld` = '{tld}' AND `flag` < 0""".format( num='1', tld=tld) un_finsh_num += db.execute(SQL)[0][0] print un_finsh_num
def calculateClientSpeed(self): """ 计算客户端时间 :return @clientSpeedInfo 各个客户端速度信息组成的字符串 :return @client2_status 二类客户端状态信息""" clientSpeedInfo = '' # 客户端速度信息 client2_status = '' # 清空状态信息 currentTime = self.getDatabaseTime() # 当前数据库时间 earlyTime = currentTime - datetime.timedelta(days=1) # 前一天数据库时间 DB = DataBase() # 实例化数据库对象 DB.get_connect() for ip in self.client1_ip_list: SQL = """SELECT max(`count`),min(`count`) from whois_sys_log.client_count_log_{client_num} WHERE insert_time > '{early_time}' AND insert_time < '{current_time}';""".format( client_num=ip.split('.')[-1], early_time=earlyTime, current_time=currentTime) max_count, min_count = DB.execute(SQL)[0] clientSpeed_perHour = (max_count - min_count) / 23.5 # 客户端平均每小时速度 clientSpeedInfo += ip.split('.')[-1] + "客户端平均处理速度为:\t" + str( clientSpeed_perHour)[:7] + "(个/小时)\n" for ip in self.client2_ip_list: SQL = """SELECT max(`count`),min(`count`) from whois_sys_log.client_count_log_{client_num} WHERE insert_time > '{early_time}' AND insert_time < '{current_time}';""".format( client_num=ip.split('.')[-1], early_time=earlyTime, current_time=currentTime) max_count, min_count = DB.execute(SQL)[0] clientSpeed_perHour = (max_count - min_count) / 23.5 # 客户端平均每小时速度 client2_status += ip.split('.')[-1] + "客户端状态:\t" if clientSpeed_perHour > 100: client2_status += "正常\n" else: client2_status += "崩溃!\n" clientSpeedInfo += ip.split('.')[-1] + "客户端平均处理速度为:\t" + str( clientSpeed_perHour)[:7] + "(个/小时)\n" return clientSpeedInfo, client2_status
class GetTLD: """TLD内容更新类""" def __init__(self): """实例化对象""" self.db = DataBase() # 实例数据库对象 self.INNAspider = spider() # 实例化一个爬虫对象 self.staic = ConfigParser.ConfigParser() # 实例化配置文件 self.staic.read('GetTLDs.conf') self.Intervals = self.staic.getfloat('Spider', 'getIntervals') # 获取间隔 self.delta = self.staic.getint('Spider', 'delta') # 更新间隔 self.INNA_url = 'http://www.iana.org/domains/root/db' # TLD数据地址 self.db.get_connect() # 连接到数据库 def getCurrentTime(self): """:return 当前时间(datetime)""" return datetime.datetime.now() def isexist(self, tld): """判断这个TLD是否存在 :return False-不存在 or insertTime:上次更新时间-存在""" SQL = """SELECT * FROM domain_whois_summary.whois_tld_summary WHERE TLD = '{tld}'""".format(tld=tld) # SQL语句 result = self.db.execute(SQL) if result is None: return False elif result[0][4] == '': return 'No-whois' else: return self.db.execute(SQL)[0][6] def SQL_Generate(self, GenType='INSERT', **TLDinfo): """SQL语句生成 :return 生成插入TLD信息的SQL代码""" TLD = TLDinfo['TLD'] punycode = TLDinfo['punycode'] Type = TLDinfo['type'] WhoisSrv = TLDinfo['WhoisSrv'] SponsoringOrganisation = TLDinfo['SponsoringOrganisation'] # 生成SQL语句 if GenType == 'INSERT': SQL = """INSERT domain_whois_summary.whois_tld_summary """ SQL += """(`TLD`,`Punycode`,`Type`,`whois_addr`,`SponsoringOrganization`)""" SQL += """VALUES('{T}','{PC}','{Ty}','{WS}','{SO}');""".\ format(T=TLD, PC=punycode, Ty=Type, WS=WhoisSrv, SO=SponsoringOrganisation) elif GenType == 'UPDATE': SQL = """UPDATE domain_whois_summary.whois_tld_summary """ SQL += """SET `whois_addr`='{WS}' """.format(WS=WhoisSrv) SQL += """WHERE `TLD` = '{T}';""".format(T=TLD) else: print "[Error_SQL]未预计到的生成SQL语句模式" return None return SQL def insertInfo(self, getIntervals=3): """插入信息 :param @getIntervals 获取间隔""" # 获取基础页面信息 print "[ HTTP ]获取页面信息中...", try: HtmlData = self.INNAspider.getPageText(self.INNA_url) except Exception as e: print "失败!" print "[Error_HTTP] 获取内容出现问题" self.db.db_close() print e print "成功" # 处理信息 for TLDinfo in self.INNAspider.getTLDinfo( HtmlData, intervalsTime=self.Intervals): Curtime = self.getCurrentTime() existFlag = self.isexist(TLDinfo['TLD']) delta = datetime.timedelta(days=15) # 更新期限 # 判断应该进行的操作 if not existFlag: TLDinfo = spider.getTLDWhoisSrv(**TLDinfo) SQL = self.SQL_Generate(**TLDinfo) print "[INSERT]获取了" + str(TLDinfo['TLD']) + "的相关信息" elif existFlag == 'No-whois' or delta < Curtime - existFlag: TLDinfo = spider.getTLDWhoisSrv(**TLDinfo) SQL = self.SQL_Generate(GenType='UPDATE', **TLDinfo) print "[UPDATE]更新了" + str(TLDinfo['TLD']) + "的相关信息" else: SQL = None print "[ SKIP ]跳过了" + str(TLDinfo['TLD']) + "的相关信息" # 数据库更新 if SQL is not None: try: self.db.execute(SQL) self.db.db_commit() except MySQLdb.Error as e: print "[Error_DB] 数据库操作出现问题" print e self.db.db_close() self.db.db_commit() # 一轮循环提交一次事物
def OldWhois2NewFlga(): """ 工作函数 将就数据库的有内容的数据在新库中进行标记 """ commit_count = 0 no_exist_count = 0 ODB = DataBase(section='Old_DataBase') NDB = DataBase(section='New_DataBase') NDB.get_connect() for domain in GetOldDatabaseDomain(ODB): # print domain SQL_serch = SQL_refactor.isFlag_100(domain) result = NDB.execute(SQL_serch) if result: flag = str(NDB.execute(SQL_serch)[0][0]) if int(flag) <= -98: # 将暂时未处理的domain的flag改为-97 SQL_update = SQL_refactor.Update_by_domain(domain, UpdateCon=['flag'], UpdateVal=['-97']) # print SQL_update NDB.execute(SQL_update) # 操作 # print domain, # print 'flag'+str(flag)+'->97' commit_count += 1 if commit_count == 1000: # 一定数量操作开始提交事物 NDB.db_commit() # 提交事物 print '[Commit]数据库提交' commit_count = 0 else: print '[Domain]新库中不存在的域名:', print domain no_exist_count += 1 log.write('[Domain]新库中不存在的域名:') log.write(domain) log.write('\n') # 关闭 NDB.db_commit() NDB.db_close() log.write('[Domain]新库中不存在的域名个数 : ') log.write(str(no_exist_count)) log.write('\n') log.close()
def update_black_list(): """ 将反查数据更新到数据库中 """ DB = DataBase() DB2 = DataBase() DB.db_connect() DB2.db_connect() DB.execute_no_return("""USE malicious_domain_sys""") DB2.execute_no_return("""USE malicious_domain_sys""") for results in DB.execute_Iterator("""SELECT info,info_type FROM info_reverse_search"""): for info, info_type in results: SQL = """INSERT IGNORE INTO reg_info_black_lists SET info = '{i}',type = {t},domain_count = -1""".format( i=info, t=info_type) DB2.execute_no_return(SQL) DB2.db_commit() DB2.db_commit() for results in DB.execute_Iterator("""SELECT info,type FROM reg_info_black_lists WHERE flag < 0"""): for info, type in results: if type == 2: reg_info_type = 'reg_name' elif type == 3: reg_info_type = 'reg_email' else: reg_info_type = 'reg_phone' SQL2 = """SELECT judge_flag,COUNT(*) FROM whois INNER JOIN domain_index ON whois.ID = domain_index.ID WHERE {info_type} = '{info}' GROUP BY judge_flag """.format( info_type=reg_info_type, info=info ) domain_count = 0 malicious_count = 0 results_sql2 = DB2.execute(SQL2) if not results_sql2: continue for judge_flag, count in DB2.execute(SQL2): domain_count += count if judge_flag < 0: malicious_count += count SQL3 = """UPDATE reg_info_black_lists SET domain_count = {d}, malicious_count={m}, flag = 1 WHERE info = '{info}'""".format( d=domain_count, m=malicious_count, info=info ) print info DB2.execute_no_return(SQL3) DB2.db_commit() DB2.db_commit() DB.db_close() DB2.db_close()
def input_domain(file, commitNum=200): """ 将域名导入数据库 """ from db_opreation import DataBase from WhoisData.domain_analyse import DomainAnalyse DB = DataBase() DB.get_connect() DB.execute("""USE malicious_domain_sys""") f = open(file, 'r') count = 0 for line in f.readlines(): domain = line.split(' ')[0].strip() judge_flag = line.split(' ')[1].strip() D = DomainAnalyse(domain) print D.get_punycode_domain(), print judge_flag result = SQL = """INSERT INTO domain_index SET ID = {id}, domain = '{d}', judge_flag = {f} ;""".format( id=hash(D.get_punycode_domain().strip()), d=D.get_punycode_domain().strip(), f=judge_flag) if count >= commitNum: DB.db_commit() count = 0 print D.get_punycode_domain().strip() DB.execute(SQL) f.close() DB.db_commit() DB.db_close()