def __init__(self, key=''): ''' 完成初始化工作,version可以任意,但是格式必须为*.*,api_version必须为3.1版本,key为google api key,需在网上申请 ''' self.key = key self.version = '1.0' self.api_version = '3.1' if self.key == '': raise ValueError("缺少Google API,请于 Google Developers Console中申请 API Key") self.url_google = 'https://sb-ssl.google.com/safebrowsing/api/lookup?client=%s&key=%s&appver=%s&pver=%s' % ('python', self.key, self.version, self.api_version) mysql = MysqlConnection() self.conn = mysql.return_conn()
def main(): conn = MysqlConnection().return_conn() cursor = conn.cursor() cursor.execute('select ip from ip_location') IPL = IPLocator( ) for ip in cursor.fetchall(): address = IPL.getIpAddr( IPL.str2ip(str(ip[0])) ) #str2ip使查询ip的格式与数据库中的相同 print ip[0],address IPL.getIndexCount() #输出版本信息和记录总数 cursor.close() conn.close()
def main(): conn = MysqlConnection().return_conn() cursor = conn.cursor() cursor.execute('select ip from ip_location') IPL = IPLocator() for ip in cursor.fetchall(): address = IPL.getIpAddr(IPL.str2ip(str( ip[0]))) #str2ip使查询ip的格式与数据库中的相同 print ip[0], address IPL.getIndexCount() #输出版本信息和记录总数 cursor.close() conn.close()
def __init__(self): ''' 初始化,key需要在phishtank网站申请,以及数据网址 ''' mysql = MysqlConnection() self.conn = mysql.return_conn() key = self.phishtank_key_selection() #得到有效phitank开发者api if str(key) == "False": #没有可用key exit(0) self.key = key self.phishsite = "http://data.phishtank.com/data/" self.fileformat = '/online-valid.json' self.total = 0.0 #数据大小
def domain2ip_batch(): """批量解析""" sql = 'select domain from url_detail_info limit 20' mysql = MysqlConnection() conn = mysql.return_conn() cursor = conn.cursor() cursor.execute(sql) ips = cursor.fetchall() for ip in ips: try: result = socket.getaddrinfo(ip[0], 'http') print ip[0],result except socket.error, err_msg: print err_msg #回显异常信息 continue
def main(): sql = 'SELECT url from url_detail_info limit 1000' conn = MysqlConnection().return_conn() cursor = conn.cursor() cursor.execute(sql) url_list = cursor.fetchall() rowcount = cursor.rowcount #需要检测的url数量 count = 0 while count*THREAD_NUM < rowcount: input_urls = url_list[count * THREAD_NUM:(count+1) * THREAD_NUM] for url in input_urls: t = threading.Thread(target = online,args = (url[0],rowcount,)) t.start() count += 1 time.sleep(2) #防止线程过多,造成丢包 cursor.close() conn.close()
def main(): #获取需要检测urls列表 sql = 'select id,url from url_detail_info where virustotal_detail is NULL or virustotal_detail = "" limit 1' conn = MysqlConnection().return_conn() cursor = conn.cursor() cursor.execute(sql) urls = cursor.fetchall() cursor.close() conn.close() if len(urls) == 0: print 'There is not url needs to be scaned with virustotal' sys.exit(0) for url in urls: url_scan = VirusTotalUrls(url[1]) content = url_scan.retrieving_url_report() if not content: print url[1] + " , Please check this url later" continue url_scan.analysis_report(url[0],content)
class File2Sql: def __init__(self,file_source = 'ip_block_source/anhui.txt'): '''初始化函数''' self.conn = MysqlConnection().return_conn() #连接数据库 self.file_source = file_source #ip段txt文件路径和文件名 def file2sql(self,num = 500): '''文件ip段导入到数据库中''' sql = 'INSERT INTO ip_block (start_ip,end_ip,region_id,state) VALUES (%s,%s,"1","0")' cursor = self.conn.cursor() ip_block = file(self.file_source,mode = 'r') #打开文件 ip_block_lines = ip_block.readlines() #得到所有ip段 linecount = len(ip_block_lines) #得到行数 for line in range(linecount): list_ip = ip_block_lines[line].strip().split('\t') #分隔为列表 long_start_ip = ip2long(str(list_ip[0])) long_end_ip = ip2long(str(list_ip[1])) row_count = long_end_ip - long_start_ip+1 counts = row_count/num for count in range(counts): change_ip = long_start_ip + num if change_ip<long_end_ip: print long2ip(long_start_ip),long2ip(change_ip) cursor.execute(sql,(long2ip(long_start_ip),long2ip(change_ip))) long_start_ip = change_ip+1 else: print long2ip(long_start_ip),long2ip(long_end_ip) cursor.execute(sql,(long2ip(long_start_ip),long2ip(change_ip))) break self.conn.commit() ip_block.close() cursor.close() self.conn.close()
def __init__(self,DHOST='114.114.114.114'): ''' 初始化类DomainToIp,连接数据库,DNS服务器ip地址可以更改 ''' self.DHOST = DHOST #DNS 服务器的地址 self.DPORT = 53 #默认端口是53 self.tid = random.randint(0,65535) #tid为随机数 self.opcode = Opcode.QUERY #标准查询 self.qtype = Type.A #查询类型为A self.qclass = Class.IN #查询类IN self.rd = 1 #期望递归查询 self.mysql = MysqlConnection() #连接数据库 self.conn = self.mysql.return_conn()
def analysis_report(self,url_id,content): """解析检测报告,并将结果返回,否则返回False""" conn = MysqlConnection().return_conn() cursor = conn.cursor() response_code = json.loads(content)['response_code'] if response_code == 0: #不同的response_code,代表情况不同 print self.url + ' ,This site is not present virustotal\'s database' elif response_code == -2: print self.url + ' ,the requested item is still queued for analysis' elif response_code == 1: url = json.loads(content)['url'] #查询的url scan_date = json.loads(content)['scan_date'] #扫描时间 positives = json.loads(content)['positives'] #是恶意网址的扫描引擎数量 total = json.loads(content)['total'] #总共使用扫描引擎数量 scans = json.loads(content)['scans'] result_list=[] i = 0 for key in scans.keys(): if scans[key]['detected'] == True: result_list.append({str(key):str(scans[key]['result'])}) i = i + 1 sql = 'INSERT INTO virustotal_details(id,url,subtime,total,positives' + createfield(i) + ')' num = ' "%s"' * i li = num.split(' ') sql = sql + ' VALUES ("%s","%s","%s","%s","%s" ' + ','.join(li) + ')' value = (str(url_id),str(url),str(scan_date),str(total),str(positives))+tuple(result_list) sql = sql % value print sql cursor.execute(sql) conn.commit() cursor.close() conn.close()
#!/usr/bin/python #encoding:utf-8 import sys from mysql_connection import MysqlConnection reload(sys) sys.setdefaultencoding( "utf-8" ) if len(sys.argv) <3: print "wrong format,eg. python check_malicious_url.py input.txt output.txt" sys.exit(0) fr_check_url = open(sys.argv[1],'r') fw_result_url = open(sys.argv[2],'w') mysql = MysqlConnection() conn = mysql.return_conn() cursor = conn.cursor() check_url_list = fr_check_url.readlines() for url in check_url_list: sql = 'SELECT * FROM url_detail_info WHERE url LIKE ' +'\"' + str(url.strip())+'%'+'\"' cursor.execute(sql) result = cursor.fetchall() if not result: fw_result_url.write(url.strip() + '\t' + 'No' +'\n') else: fw_result_url.write(url.strip() + '\t' + 'Yes' +'\n') print 'Check end' fr_check_url.close()
def __init__(self,file_source = 'ip_block_source/anhui.txt'): '''初始化函数''' self.conn = MysqlConnection().return_conn() #连接数据库 self.file_source = file_source #ip段txt文件路径和文件名
dir_url = c.getinfo(c.EFFECTIVE_URL) #得到重定向网址 print checkurl + ' ' + str(http_code) +' ' + dir_url c.close() b.close() lock.acquire() #线程锁 result_urls.append({'url': checkurl,'http_code': http_code,'dir_url': dir_url}) URL_COUNT += 1 if URL_COUNT % RESULTS_NUM == 0 or URL_COUNT == rowcount: #每隔RESULTS_NUM个url时候,将结果存入到数据库中 print result_urls sql = 'UPDATE url_detail_info SET online = %s WHERE url = %s ' conn = MysqlConnection().return_conn() cursor = conn.cursor() for url in result_urls: cursor.execute(sql,(url['http_code'],url['url'])) conn.commit() #更新 cursor.close() #关闭 conn.close() del result_urls[:] #清除已存入的url探测结果 if URL_COUNT == rowcount: print 'end' lock.release() #解锁
def __init__(self): ''' 初始化,连接数据库''' self.mysql = MysqlConnection() self.conn = self.mysql.return_conn()
class PhishsiteData: ''' phishwebsite类 ''' def __init__(self): ''' 初始化,连接数据库''' self.mysql = MysqlConnection() self.conn = self.mysql.return_conn() def json2mysql(self): ''' 把json导入mysql数据库data表中 ''' filedata = open('data.json') jsdata = json.load(filedata) cursor = self.conn.cursor() cursor.execute('SELECT hash from url_detail_info') hash_list = cursor.fetchall() sql = "INSERT INTO url_detail_info (url,domain,domain_type,domain_info,type,target,submission_time,verification_time,online,verified,url_source,hash,virustotal_detail) VALUES( %s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" update_num = 0 #统计本次更新url数量 print 'Updating....' try: for item in jsdata: #添加最新的url url_hash = hash(item['url']) if not (str(url_hash),) in hash_list: #使用hash值来去重 print 'Add url : ' + item.get('url','0') url = item.get('url','0') #得到url domain = urlparse(item.get('url','0')).netloc #得到domain if not item.get('details'): ip_address = '0' else: ip_address = item.get('details','0')[0].get('ip_address','0') #得到ip target = item.get('target','0') #得到target submission_time = item.get('submission_time','0') #得到submission_time verification_time = item.get('verification_time','0')#得到verification_time online = item.get('online','0') #得到online verified = item.get('verified','0') #得到verified phish_id = item.get('phish_id','0') #得到phish_id update_num = update_num + cursor.execute(sql,(url,domain,'1',ip_address,'phishing',target,submission_time,verification_time,online,verified,phish_id,url_hash,'1')) cursor.execute('INSERT INTO virustotal_info (url,hash,source) VALUES(%s,%s,%s) ',(url,url_hash,'1')) if update_num % 800 == 0: #每800个result插入到数据库中 self.conn.commit() self.conn.commit() #更新 print 'Success update '+str(update_num)+' url(s)' filedata.close() cursor.close() self.conn.close() except MySQLdb.Error,e: #异常处理 print "Mysql Error %d: %s" % (e.args[0], e.args[1])
class DomainToIp: def __init__(self,DHOST='114.114.114.114'): ''' 初始化类DomainToIp,连接数据库,DNS服务器ip地址可以更改 ''' self.DHOST = DHOST #DNS 服务器的地址 self.DPORT = 53 #默认端口是53 self.tid = random.randint(0,65535) #tid为随机数 self.opcode = Opcode.QUERY #标准查询 self.qtype = Type.A #查询类型为A self.qclass = Class.IN #查询类IN self.rd = 1 #期望递归查询 self.mysql = MysqlConnection() #连接数据库 self.conn = self.mysql.return_conn() def send_domain_receive_ip(self): ''' 解析domain name对应的ip,并保存到数据库 ''' sql = 'SELECT domain from url_detail_info limit 20' #where domain_type is NULL OR domain_type = ""' cursor = self.conn.cursor() cursor.execute(sql) domains = cursor.fetchall() #获取数据库中ip为空的数据 try: s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) #建立一个UDP套接字(SOCK_DGRAM,代表UDP,AF_INET表示IPv4) except socket.error,msg: print "无法创建socket.Error code:" +str(msg[0])+',Error message:'+msg[1] #error sys.exit(1) source_port = random.randint(1024, 65535) #随机port s.bind(('', source_port)) #绑定,检测所有接口 domain_source = [] #发送的domain数量 domain_result = [] #接收到的domain数量,这两个变量主要用来判断丢包情况 result = [] #得到的结果 '''循环发送需要解析的domain name''' count = 0 rowcount = len(domains) while count * UPDATE_RATE < rowcount: #google每次最多查询500个 inputs = domains[count * UPDATE_RATE : (count + 1) * UPDATE_RATE] for domain in inputs: domain_source.append(domain[0]) m = Lib.Mpacker() m.addHeader(self.tid, 0, self.opcode, 0, 0, self.rd, 0, 0, 0, 1, 0, 0, 0) m.addQuestion(domain[0],self.qtype,self.qclass) request = m.getbuf() try: s.sendto(request,(self.DHOST, self.DPORT)) print 'domain: ',domain[0]," send to Dns server:",self.DHOST except socket.error,reason: print reason continue # result = [] #得到的结果 '''循环接收收到的返回header''' while 1: try: r,w,e = select.select([s], [], [],3) if not (r or w or e): break (data,addr) = s.recvfrom(65535) u = Lib.Munpacker(data) r = Lib.DnsResult(u,{}) if r.header['status'] == 'NOERROR': #print 'answers',len(r.answers),r.questions[0]['qname'] if len(r.answers) != 0: if r.answers[0]['typename'] == 'A': result.append({'domain' : r.questions[0]['qname'],'domain_type': A_FLAG,'domain_info':r.answers[0]['data']}) domain_result.append(r.questions[0]['qname']) elif r.answers[0]['typename'] == 'CNAME': result.append({'domain' : r.questions[0]['qname'],'domain_type': CNAME_FLAG,'domain_info':[r.answers[1]['name'],r.answers[1]['data']]}) domain_result.append(r.questions[0]['qname']) else: print '没有这种类型,请修改程序' else: result.append({'domain' : r.questions[0]['qname'],'domain_type': ANSWER_EMPTY_FLAG,'domain_info': 'answerempty'}) domain_result.append(r.questions[0]['qname']) elif r.header['status'] == 'NXDOMAIN': result.append({'domain' : r.questions[0]['qname'],'domain_type': NXDOMAIN_FLAG,'domain_info':[r.authority[0]['name'],r.authority[0]['data'][0]]}) domain_result.append(r.questions[0]['qname']) elif r.header['status'] == 'SERVFAIL': result.append({'domain' : r.questions[0]['qname'],'domain_type': SERVFAIL_FLAG,'domain_info': 'servfail'}) #status ='SERVFAIL'情况的判断 domain_result.append(r.questions[0]['qname']) else: print 'No this type' except socket.error, reason: print reason continue