class UserAgentType: def __init__(self): self.uas_parser = UASparser('/home/natty/FCDDOS/UASparserCache') def isBrowser(self,userAgent): if not userAgent or userAgent == None: return True if userAgent == '-': return True #print agentType result = self.uas_parser.parse(str(userAgent)) agentType = result["typ"] #print agentType,":::",userAgent,"\n" if agentType in USER_AGENT_BROWSER_TYPES: return True else: return False def isRobot(self,userAgent): if not userAgent or (userAgent == None): return False if userAgent == "-" or len(userAgent) ==0: return False result = self.uas_parser.parse(str(userAgent)) agentType = result["typ"] #print agentType,":::",userAgent,"\n" if agentType == USER_AGENT_ROBOT: return True else: return False
def uas_parser(logRows=[]): uas_parser = UASparser() total_rows_in_log = len(logRows) #only 'ua_icon' or 'os_icon' or both are allowed in entire_url now = str(date.strftime(datetime.now(), '%d%m%y%H%M')) out = open('ua_' + now + '.log', 'w') for row_log in range(total_rows_in_log): #todo : progress bar #perc=(float(row_log)/float(total_rows_in_log))*100 #progress(perc) sys.stdout.write("\rParsing row log #%s of %s" % (str(row_log), str(total_rows_in_log))) sys.stdout.flush() ua_string = str(logRows[row_log][8]) parsed_ua = uas_parser.parse(ua_string) if (parsed_ua['ua_name'] == "unknown"): out.write("%s\t%s\t%s\t%s\n" % (parsed_ua['typ'], parsed_ua['ua_name'], parsed_ua['os_name'], ua_string)) else: out.write( "%s\t%s\t%s\n" % (parsed_ua['typ'], parsed_ua['ua_name'], parsed_ua['os_name'])) #save in file out.close()
def main(): uas_parser = UASparser('.') ua = 'SonyEricssonK750i/R1L Browser/SEMC-Browser/4.2 Profile/MIDP-2.0 Configuration/CLDC-1.1' print('User Agent: {0}'.format(ua)) # only 'ua_icon' or 'os_icon' or both are allowed in entire_url result = uas_parser.parse(ua, entire_url='ua_icon,os_icon') pprint(result)
def uas_parser(logRows=[]): uas_parser = UASparser() total_rows_in_log = len(logRows) #only 'ua_icon' or 'os_icon' or both are allowed in entire_url now = str(date.strftime(datetime.now(), '%d%m%y%H%M')) out = open('ua_' + now + '.log', 'w') for row_log in range(total_rows_in_log): #todo : progress bar #perc=(float(row_log)/float(total_rows_in_log))*100 #progress(perc) sys.stdout.write("\rParsing row log #%s of %s" % (str(row_log), str(total_rows_in_log))) sys.stdout.flush() ua_string = str(logRows[row_log][8]) parsed_ua = uas_parser.parse(ua_string) if(parsed_ua['ua_name'] == "unknown"): out.write("%s\t%s\t%s\t%s\n" % (parsed_ua['typ'], parsed_ua['ua_name'], parsed_ua['os_name'], ua_string)) else: out.write("%s\t%s\t%s\n" % (parsed_ua['typ'], parsed_ua['ua_name'], parsed_ua['os_name'])) #save in file out.close()
from uasparser import UASparser uas_parser = UASparser('/home/natty/FCDDOS/UASparserCache') userAgents = ["Bimbot/1.0","Mozilla/5.0 (compatible; U; ABrowse 0.6; Syllable) AppleWebKit/420+ (KHTML, like Gecko)","Mozilla/5.0 (Linux; U; Android 4.0.3; de-ch; HTC Sensation Build/IML74K) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30","Bunjalloo/0.7.6(Nintendo DS;U;en)","Wget/1.9+cvs-stable (Red Hat modified)","Mozilla/5.0 (X11; Linux i686; rv:7.0.1) Gecko/20110929 Thunderbird/7.0.1","Mozilla/5.0 (compatible; AbiLogicBot/1.0; +http://www.abilogic.com/bot.html)","EmailSiphon","CSE HTML Validator Lite Online (http://online.htmlvalidator.com/php/onlinevallite.php)","GreatNews/1.0","BinGet/1.00.A (http://www.bin-co.com/php/scripts/load/)","AppEngine-Google; (+http://code.google.com/appengine; appid: unblock4myspace)","AppEngine-Google; (+http://code.google.com/appengine; appid: webetrex)","amaya/11.3.1 libwww/5.4.1"] userAgents = ['Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Win 9x4.90;http://www.Abolimba.de)', 'GetRight/3.2', 'Mozilla/5.0 (X11; U; Linux armv6l; rv: 1.8.1.5pre) Gecko/20070619 Minimo/0.020', 'GcMail Browser/1.0 (compatible; MSIE 5.0; Windows 98) ', 'Chilkat/1.0.0 (+http://www.chilkatsoft.com/ChilkatHttpUA.asp)', 'Klondike/1.50 (HTTP Win32)', 'HTMLParser/1.6', 'Abilon', 'Banshee 1.5.1 (http://banshee-project.org/)', 'ApacheBench/2.3', 'http://Anonymouse.org/ (Unix)', 'Mozilla/5.0 (compatible; WASALive-Bot ; http://blog.wasalive.com/wasalive-bots/)'] for userAgent in userAgents: result = uas_parser.parse(userAgent) print result["typ"]
import json from uasparser import UASparser up = UASparser() uas_list = [] ua_file = open('user_agents_sample.txt', 'r').read().split('\n') ua_file += open('user_agents.txt', 'r').read().split('\n')[:10000] c = 0 for uas in ua_file: if uas: c += 1 if c % 1000 == 0: print c, '/', len(ua_file) uas_list.append((uas, up.parse(uas))) json.dump(uas_list, open('uas.json', 'w'))
import json from uasparser import UASparser up = UASparser() uas_list = [] ua_file = open('user_agents_sample.txt', 'r').read().split('\n') ua_file += open('user_agents.txt', 'r').read().split('\n')[:10000] c = 0 for uas in ua_file: if uas: c += 1 if c % 1000 == 0: print c, '/', len(ua_file) uas_list.append((uas, up.parse(uas))) json.dump(uas_list, open('uas.json', 'w'))