def makePGTSComparisonYAML(self): import codecs outfile = codecs.open('outfile.yaml', 'w', 'utf-8') print >> outfile, "test_cases:" yamlFile = open(os.path.join(TEST_RESOURCES_DIR, 'pgts_browser_list.yaml')) yamlContents = yaml.load(yamlFile) yamlFile.close() for test_case in yamlContents['test_cases']: user_agent_string = test_case['user_agent_string'] kwds = {} if 'js_ua' in test_case: kwds = eval(test_case['js_ua']) (family, major, minor, patch) = user_agent_parser.ParseUserAgent(user_agent_string, **kwds) # Escape any double-quotes in the UA string user_agent_string = re.sub(r'"', '\\"', user_agent_string) print >> outfile, ' - user_agent_string: "' + user_agent_string + '"' + "\n" +\ ' family: "' + family + "\"\n" +\ " major: " + ('' if (major is None) else "'" + major + "'") + "\n" +\ " minor: " + ('' if (minor is None) else "'" + minor + "'") + "\n" +\ " patch: " + ('' if (patch is None) else "'" + patch + "'") outfile.close()
def runUserAgentTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile) yamlFile.close() for test_case in yamlContents['test_cases']: # Inputs to Parse() user_agent_string = test_case['user_agent_string'] kwds = {} if 'js_ua' in test_case: kwds = eval(test_case['js_ua']) # The expected results expected = {'family': test_case['family'], 'major': test_case['major'], 'minor': test_case['minor'], 'patch': test_case['patch']} result = {} result = user_agent_parser.ParseUserAgent(user_agent_string, **kwds) self.assertEqual( result, expected, "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format( user_agent_string, expected['family'], expected['major'], expected['minor'], expected['patch'], result['family'], result['major'], result['minor'], result['patch']))
def makePGTSComparisonYAML(self): import codecs outfile = codecs.open("outfile.yaml", "w", "utf-8") print >> outfile, "test_cases:" yamlFile = open( os.path.join(TEST_RESOURCES_DIR, "pgts_browser_list.yaml")) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) (family, major, minor, patch) = user_agent_parser.ParseUserAgent(user_agent_string, **kwds) # Escape any double-quotes in the UA string user_agent_string = re.sub(r'"', '\\"', user_agent_string) print >> outfile, ' - user_agent_string: "' + user_agent_string + '"' + "\n" + ' family: "' + family + '"\n' + " major: " + ( "" if (major is None) else "'" + major + "'") + "\n" + " minor: " + ( "" if (minor is None) else "'" + minor + "'") + "\n" + " patch: " + ("" if (patch is None) else "'" + patch + "'") outfile.close()
def checkTags(self, fp): ua = fp["User-Agent"] #We check first for UA from Tor browsers if ua == "Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0": return [torbrowser80] elif ua == "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0": return [torbrowser80] elif ua == "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0": return [torbrowser80] elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0": return [torbrowser70] elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0": return [tor6] elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0": return [tor5] elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0": return [tor4] else: #We parse the UA with a more powerful parser parsedUA = user_agent_parser.ParseUserAgent(ua) family = parsedUA["family"] if "Chrome" in family: return [chrome] elif "Firefox" in family: return [firefox] elif "Edge" in family: return [edge] elif "Bot" in family: return [bot] elif "IE" in family: return [ie] else: return [others]
def get(self, request, *args, **kwargs): self.object = self.get_object() context = self.get_context_data( object=self.object, user_agent=user_agent_parser.ParseUserAgent(request.META['HTTP_USER_AGENT']) ) return self.render_to_response(context)
def set_cookie(self, response, name, value, max_age, httponly=True, samesite='Strict'): secure = True if 'https:' in self._app_url else False is_http = flask.request.environ.get( 'wsgi.url_scheme', flask.request.environ.get('HTTP_X_FORWARDED_PROTO', 'http')) == 'http' ua = user_agent_parser.ParseUserAgent( flask.request.environ.get('HTTP_USER_AGENT', '')) if ua.get('family') == 'Electron' and is_http: secure = False response.set_cookie( name, value=value, max_age=max_age, secure=secure, path=self._app.config['requests_pathname_prefix'].rstrip('/'), httponly=httponly, samesite=samesite)
def mapper(self, _, line): matches = re.match(regex, line) if matches: groups = matches.groups() self.increment_counter('Browsers', user_agent_parser.ParseUserAgent(groups[6])['family'], 1) yield groups[0], (1, self.get_bytes(groups[4])) else: self.increment_counter('Incorrect input', 'Incorrect input', 1)
def parse_file(input_file_path, output_file_path): """ read input file and parse each user agent string to get browser name and main version Args: input_file_path: the path of the input file """ with open(input_file_path, 'r') as f1, open(output_file_path, 'a') as f2: reader = csv.reader(f1, delimiter='\t') writer = csv.writer(f2, delimiter='\t') count = 0 correct_count = 0 incorrect_count = 0 error_count = 0 for row in reader: count += 1 user_agent_string = row[0] true_browser_name = row[1] true_main_version = row[2] try: result_dict = user_agent_parser.ParseUserAgent(user_agent_string) browser_name = result_dict['family'] main_version = result_dict['major'] #correct browser name and main version for some special cases browser_name, main_version = correct_special_cases(user_agent_string, browser_name, main_version) if browser_name == true_browser_name and main_version == true_main_version: correct_count += 1 else: #print(user_agent_string) #print(true_browser_name, true_main_version, browser_name, main_version) #print(" ") incorrect_count += 1 writer.writerow([user_agent_string, true_browser_name, true_main_version, browser_name, main_version]) except Exception as e: error_count += 1 print(type(e)) print(e) print(user_agent_string) print(result_dict) #print(parser.simple_detect(user_agent_string)) print(true_browser_name, ", ", true_main_version) print(" ") #if count > 100000: # break #print(count, correct_count, incorrect_count, error_count) print("Total number of user agents processed: ", count) print("correct number: ", correct_count) print("incorrect number: ", incorrect_count) print("number of records unable to handle: ", error_count)
def is_safari(request): try: from ua_parser import user_agent_parser except ImportError: # pragma: no cover return None else: user_agent = request.META.get("HTTP_USER_AGENT", "") browser = user_agent_parser.ParseUserAgent(user_agent)["family"] return browser == "Safari"
def handle(self, request): try: integration = get_integration_from_request(request, "jira") except AtlassianConnectValidationError: return self.get_response({"error_message": "Unable to verify installation."}) except ExpiredSignatureError: return self.get_response({"refresh_required": True}) if not request.user.is_authenticated(): parsed_user_agent = user_agent_parser.ParseUserAgent( request.META.get("HTTP_USER_AGENT", "") ) # not enabling cross site cookies seems to be a common problem with Safari # as a result, there is a Safari specific link to instructions when is_safari=true is_safari = parsed_user_agent.get("family") == "Safari" return self.get_response( { "login_required": True, "is_safari": is_safari, "login_url": absolute_uri(reverse("sentry-login")), } ) organizations = list( request.user.get_orgs().filter( id__in=OrganizationMember.objects.filter( role__in=[r.id for r in roles.get_all() if r.is_global], user=request.user ).values("organization") ) ) form = JiraConfigForm(organizations, request.POST) if request.method == "GET" or not form.is_valid(): active_orgs = OrganizationIntegration.objects.filter( integration__provider="jira", integration=integration, organization__in=organizations, ).values_list("organization_id", flat=True) form = JiraConfigForm(organizations, initial={"organizations": active_orgs}) return self.get_response({"form": form, "organizations": organizations}) enabled_orgs = [o for o in organizations if o.id in form.cleaned_data["organizations"]] disabled_orgs = list(set(organizations) - set(enabled_orgs)) # Remove Jira integrations not in the set of enabled organizations OrganizationIntegration.objects.filter( integration__provider="jira", integration=integration, organization__in=disabled_orgs ).delete() # Ensure all enabled integrations. for org in enabled_orgs: integration.add_organization(org, request.user) return self.get_response({"form": form, "completed": True})
def process_tcp_packet(packet): if not packet.haslayer(http.HTTPRequest): # This packet doesn't contain an HTTP request so we skip it return http_layer = packet.getlayer(http.HTTPRequest) ip_layer = packet.getlayer(IP) try: ip = '{0[src]}'.format(ip_layer.fields, http_layer.fields) ua_string = '{1[User-Agent]}'.format(ip_layer.fields, http_layer.fields) agent = user_agent_parser.ParseUserAgent(ua_string) print ua_string device = user_agent_parser.ParseDevice(ua_string) os = user_agent_parser.ParseOS(ua_string) browser_family = agent['family'] browser_major = agent['major'] browser_minor = agent['minor'] browser_patch = agent['patch'] device_brand = device['brand'] device_family = device['family'] device_model = device['model'] os_family = os['family'] os_major = os['major'] os_minor = os['minor'] os_patch = os['patch'] os_patch_minor = os['patch_minor'] key = find_key(ip) device_hash = wdb.Hash(key) hostname = device_hash['hostname'] #print key,hostname,ip,browser_family,browser_major,browser_minor,browser_patch,device_brand,device_family,device_model,os_family,os_major,os_minor,os_patch,os_patch_minor os_version = str(os_major) +'.'+ str(os_minor) +'.'+ str(os_patch) browser_combined = browser_family +'('+ str(browser_major) +'.'+ str(browser_minor) +'.'+ str(browser_patch) +')' k = wdb.Hash(key) if "Other" not in os_family: k.update(os=os_family) print ip,key,os_family if os_family == "Mac OS X": k.update(category="Desktop/Laptop") if os_family == "iOS": k.update(category="Smartphones/PDAs/Tablets") if ("None" or "Other") not in os_version: k.update(os_version=os_version) print ip,key,os_version if ("None" or "Other") not in browser_combined: k.update(browser_family=browser_combined) print ip,key,browser_combined except KeyError: pass
def ua_filter(ua_string, Chrome, Firefox, Chromium, Safari, Opera, Other): if Chrome or Firefox or Chromium or Safari or Opera or Other: parsed_string = user_agent_parser.ParseUserAgent(ua_string)['family'] if Chrome and parsed_string == "Chrome": return False elif Firefox and parsed_string == "Firefox": return False elif Safari and parsed_string == "Safari": return False elif Opera and parsed_string == "Opera": return False elif Other and (parsed_string != "Chrome" and parsed_string != "Firefox" and parsed_string != "Safari" and parsed_string != "Opera"): return 2 return True
def get_request_type(meta): ua_string = meta.get("HTTP_USER_AGENT", None) http_referer = meta.get("HTTP_REFERER", None) ua_dict = user_agent_parser.ParseUserAgent(ua_string) if http_referer == None and ua_dict['family'] == 'GmailImageProxy': return RequestType.GMAIL elif http_referer == None and ua_dict[ 'family'] == 'Other' and "github-camo" in ua_string: return RequestType.GITHUB elif http_referer: return RequestType.DIRECT else: logger.info("Could not get RequestType of " + ua_string) return RequestType.UNKNOWN
def parse_user_agent(user_agent_string): user_agent = user_agent_parser.ParseUserAgent(user_agent_string) name = user_agent["family"] version = "0" if user_agent["major"] is not None: version = user_agent["major"] if user_agent["minor"] is not None: version = version + "." + user_agent["minor"] if user_agent["patch"] is not None: version = version + "." + user_agent["patch"] return {"name": name, "version": version}
def get_request_parsed_ua_string(request, parse=None): ua_string = get_request_ua_string(request) if not isinstance(ua_string, str): ua_string = ua_string.decode('utf-8', 'ignore') if parse == "agent": return user_agent_parser.ParseUserAgent(ua_string) elif parse == "os": return user_agent_parser.ParseOS(ua_string) elif parse == "device": return user_agent_parser.ParseDevice(ua_string) return user_agent_parser.Parse(ua_string)
def get_user_agent_data(request): userAgent = request.META['HTTP_USER_AGENT'] from ua_parser import user_agent_parser browser = user_agent_parser.ParseUserAgent(userAgent) os = user_agent_parser.ParseOS(userAgent) device = user_agent_parser.ParseDevice(userAgent) return { "os": os['family'] or '' + ' ' + os['major'] or '' + '.' + os['minor'] or '', "browser": browser['family'] or '' + ' ' + browser['major'] or '', "device": device['brand'] or '' + ' ' + device['family'] or '' + ' ' + device['model'] or '', }
def do_GET(self): print self.path if self.path.find("/list.json") >= 0: self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() data = self.headers.getheader('User-Agent') data = user_agent_parser.ParseUserAgent(data) self.wfile.write(data['family']) elif self.path.find("/incident_list") >= 0: self.send_response(200) self.send_header('Content-Type', 'application/json') self.end_headers() data = 'halp' self.wfile.write(data) else: return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
def is_old(ua_string): if ua_string is None: return False ua = user_agent_parser.ParseUserAgent(ua_string) if ua['family'] == 'IE' and major_int_lt(ua, 11): return True if ua['family'] == 'Chrome' and major_int_lt_config( ua, 'ua_min_chrome_version'): return True if ua['family'] == 'Firefox' and major_int_lt_config( ua, 'ua_min_firefox_version'): return True if ua['family'] == 'Safari' and major_int_lt_config( ua, 'ua_min_safari_version'): return True if ua['family'] == 'Yandex Browser' and major_int_lt_config( ua, 'ua_min_yandex_version'): return True return False
def parse_ua_text(ua_text): #parsed_string = user_agent_parser.Parse(ua_text) #print(parsed_string) browser_data = user_agent_parser.ParseUserAgent(ua_text) os_data = user_agent_parser.ParseOS(ua_text) device_data = user_agent_parser.ParseDevice(ua_text) browser_name = str(browser_data['family']) browser_version = str(browser_data['major']) os_name = str(os_data['family']) os_version = str(os_data['major']) device_name = str(device_data['family']) device_brand = str(device_data['brand']) device_model = str(device_data['model']) csv_data = [ua_text,browser_name,browser_version,os_name,os_version,device_name,device_brand,device_model] return csv_data
def runUserAgentTestsFromYAML(self, file_name): yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name)) yamlContents = yaml.load(yamlFile, Loader=SafeLoader) yamlFile.close() for test_case in yamlContents["test_cases"]: # Inputs to Parse() user_agent_string = test_case["user_agent_string"] kwds = {} if "js_ua" in test_case: kwds = eval(test_case["js_ua"]) # The expected results expected = { "family": test_case["family"], "major": test_case["major"], "minor": test_case["minor"], "patch": test_case["patch"], } result = {} result = user_agent_parser.ParseUserAgent(user_agent_string, **kwds) self.assertEqual( result, expected, "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>" .format( user_agent_string, expected["family"], expected["major"], expected["minor"], expected["patch"], result["family"], result["major"], result["minor"], result["patch"], ), )
def sessions(self): from .models import LoggedInUser sessions = [] logged_in = LoggedInUser.objects.filter(user=self) for li in logged_in: try: s = Session.objects.get(session_key=li.session_key) puaos = user_agent_parser.ParseOS(li.user_agent) puaoss = '.'.join( [puaos[k] for k in puaos.keys() if puaos[k] is not None]) puabr = user_agent_parser.ParseUserAgent(li.user_agent) puabrs = '.'.join( [puabr[k] for k in puabr.keys() if puabr[k] is not None]) puad = user_agent_parser.ParseDevice(li.user_agent) puads = ' '.join( [puad[k] for k in puad.keys() if puad[k] is not None]) sessions.append((s, ' '.join([puads, puaoss, puabrs]))) except Session.DoesNotExist: li.delete() return sessions
def solve(): browsers = {} with open("data/data-01.txt") as f: for line in f.readlines(): ua_string = seg_access_log(line)[-4] parsed_string = user_agent_parser.ParseUserAgent(ua_string) browser = parsed_string["family"] if browser == "Facebook": print(browser) if browser not in browsers: browsers[browser] = 1 else: browsers[browser] += 1 total_browsers = sum(browser for browser in browsers.values()) for browser in browsers: browsers[browser] = "{0:.2f}%".format(100 * browsers[browser] / total_browsers) del total_browsers return browsers
def get_block_ips(): ips = {} with open("/var/log/nginx/access.log") as f: for line in f: lineformat = re.compile( r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE) data = re.search(lineformat, line) if data: datadict = data.groupdict() ip = datadict["ipaddress"] datetimestring = datadict["dateandtime"] user_agent = datadict["useragent"] parsed_string = user_agent_parser.ParseUserAgent(user_agent) browser = parsed_string["family"] refferer = datadict["refferer"] method = data.group(6) t = datetimestring.split() struct_time = strptime(t[0], "%d/%b/%Y:%H:%M:%S") iso_8601_time = "{}-{}-{}T{}:{}:{}+{}:{}".format( struct_time.tm_year, struct_time.tm_mon, struct_time.tm_mday, struct_time.tm_hour, struct_time.tm_min, struct_time.tm_sec, t[1][1:3], t[1][-2:]) parsed_time = dp.parse(iso_8601_time) t_in_seconds = float(parsed_time.strftime("%s")) if method == "GET": if ip not in ips: ips[ip] = { "begin_time": t_in_seconds, "request_time": 0.0, "status": True, "hits": 1, "time_blocked": "", "is_bot": False, "from_facebook": False } else: if ips[ip]["status"] is False or ips[ip][ "is_bot"] is True or ips[ip][ "from_facebook"] is True: continue if browser == "Googlebot" or browser == "bingbot": command = subprocess.Popen(['host', ip], stdout=subprocess.PIPE) text = command.stdout.read().decode('utf-8') if "domain name pointer" in text: ips[ip]["is_bot"] = True continue if browser == "Facebook" and "facebook.com" not in refferer: ips[ip]["from_facebook"] = True continue ips[ip]["request_time"] = t_in_seconds - ips[ip][ "begin_time"] ips[ip]["hits"] += 1 if ips[ip]["request_time"] < 60.0 and ips[ip][ "hits"] > 120: ips[ip]["status"] = False ips[ip]["time_blocked"] = datetimestring r.set(ip, ip, 7 * 86400) subprocess.Popen(['host', ip], stdout=subprocess.PIPE) continue elif ips[ip]["request_time"] > 60.0: ips[ip]["begin_time"] = t_in_seconds ips[ip]["request_time"] = 0.0 ips[ip]["hits"] = 1 del ips
def parse_user_agent(user_agent_str): # if user_agent_str is unparseable, will return: # {'brand': None, 'model': None, 'family': 'Other'} return user_agent_parser.ParseUserAgent(user_agent_str)
from ua_parser import user_agent_parser sys.path.append('../') import processing parser = argparse.ArgumentParser( description='Parse the user agent of the participants data') parser.add_argument('--path', required=True, help='Path to the participant data to be parsed') args = parser.parse_args() if __name__ == "__main__": print('Reading participants.csv...') participants = pd.read_csv(args.path, sep='\t') print('Parsing user agent (this may take a few minutes)...') participants['browser'] = participants.USER_AGENT.apply( lambda x: user_agent_parser.ParseUserAgent(x)['family']) participants['os'] = participants.USER_AGENT.apply( lambda x: user_agent_parser.ParseOS(x)['family']) participants['device_family'] = participants.USER_AGENT.apply( lambda x: user_agent_parser.ParseDevice(x)['family']) participants['device_brand'] = participants.USER_AGENT.apply( lambda x: user_agent_parser.ParseDevice(x)['brand']) participants['device_model'] = participants.USER_AGENT.apply( lambda x: user_agent_parser.ParseDevice(x)['model']) print('Saving to participants.csv...') participants.to_csv(args.path, sep='\t', index=False) print('DONE')
def parse_user_agent(user_agent_str): return user_agent_parser.ParseUserAgent(user_agent_str)
def log2database(nameFile): # leer el archivo try: file_object = open(nameFile, "r") except Exception as e: print(e) return False lines = file_object.readlines() contador = 1 # abrir conexion a la base de datos db = pymysql.connect(host,username,password,database) for linea in lines: objectLog = {} print("\n[procesando linea del log] = " + str(contador) + "\n") linea_1 = linea.replace("\n","") try: first_split = linea_1.split('\x22') # agregando user_agent objectLog['user_agent'] = first_split[5] # extraer informacion de user agent parsed_ua = user_agent_parser.ParseUserAgent(objectLog['user_agent']) print("USER AGENT") if parsed_ua['family'] is None: parsed_ua['family'] = "None" if parsed_ua['major'] is None: parsed_ua['major'] = "None" if parsed_ua['minor'] is None: parsed_ua['minor'] = "None" if parsed_ua['patch'] is None: parsed_ua['patch'] = "None" print(parsed_ua) # extraer informacion del SO parsed_os = user_agent_parser.ParseOS(objectLog['user_agent']) print("Sistema operativo") print(parsed_os) if parsed_os['family'] is None: parsed_os['family'] = "None" if parsed_os['major'] is None: parsed_os['major'] = "None" if parsed_os['minor'] is None: parsed_os['minor'] = "None" if parsed_os['patch'] is None: parsed_os['patch'] = "None" if parsed_os['patch_minor'] is None: parsed_os['patch_minor'] = "None" # extraer informacion del dispositivo parsed_device = user_agent_parser.ParseDevice(objectLog['user_agent']) print("Dispositivo") if parsed_device['family'] is None: parsed_device['family'] = "None" if parsed_device['brand'] is None: parsed_device['brand'] = "None" if parsed_device['model'] is None: parsed_device['model'] = "None" print(parsed_device) # agregando tcp_log objectLog['tcp_log'] = first_split[6].replace(" ", "") # filtrando ip y timestamp first_split[0] = first_split[0].replace("[","") first_split[0] = first_split[0].replace("]","") first_split[0] = first_split[0].replace(" +0000 ","") ip_date = first_split[0].split(" - - ") objectLog['ip'] = ip_date[0] objectLog['fecha'] = ip_date[1] objectLog['fecha'] = objectLog['fecha'].replace("/", "-") objectLog['fecha'] = objectLog['fecha'].replace(':', ' ', 1) objectLog['fecha'] = datetime.strptime(objectLog['fecha'], '%d-%b-%Y %H:%M:%S') # agregando metodo, url, version de http met_uri_http = first_split[1].split(" ") objectLog['metodo'] = met_uri_http[0] objectLog['url'] = met_uri_http[1] objectLog['version_http'] = met_uri_http[2] # agregar codigo de respuesta cod_res = first_split[2].split(" ") objectLog['res_codigo'] = cod_res[1] objectLog['res_codigo_2'] = cod_res[2] # agregar url de redirccion objectLog['redireccion'] = "" if ( first_split[3] == "-" ) else first_split[3] print() print(objectLog) # ENVIAR REGISTRO A BASE DE DATOS try: ################################################################################### ################################################################################### cursor = db.cursor() sql = "INSERT INTO db_proxy.logs_squid \ (user_agent,tcp_log,ip,fecha,metodo,url,res_codigo,size, \ redireccion,version_http) \ VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);" cursor.execute(sql, (objectLog['user_agent'], objectLog['tcp_log'], objectLog['ip'], objectLog['fecha'], objectLog['metodo'],objectLog['url'], objectLog['res_codigo'], objectLog['res_codigo_2'], objectLog['redireccion'],objectLog['version_http'])) db.commit() last_indice = cursor.lastrowid print("INDEX ID INSERT :" + str(last_indice)) print("[info] : datos insertados en la tabla log_squid") ################################################################################### ################################################################################### cursor = db.cursor() sql = "INSERT INTO db_proxy.dispositivo \ (logs_squid_idlogs_squid,family,brand,model) \ VALUES (%s,%s,%s,%s);" cursor.execute(sql, (last_indice , parsed_device['family'], parsed_device['brand'], parsed_device['model'] )) db.commit() print("[info] : datos insertados en la tabla dispositivo") ################################################################################### ################################################################################### cursor = db.cursor() sql = "INSERT INTO db_proxy.navegador \ (logs_squid_idlogs_squid,family,major,minor,patch) \ VALUES \ (%s,%s,%s,%s,%s);" cursor.execute(sql, (last_indice, parsed_ua['family'], parsed_ua['major'], parsed_ua['minor'], parsed_ua['patch'] )) db.commit() print("[info] : datos insertados en la tabla navegador") ################################################################################### ################################################################################### cursor = db.cursor() sql = "INSERT INTO db_proxy.sistema_operativo \ (logs_squid_idlogs_squid,family,major,minor,patch,patch_minor) \ VALUES \ (%s,%s,%s,%s,%s,%s);" cursor.execute(sql, (last_indice, parsed_os['family'], parsed_os['major'], parsed_os['minor'], parsed_os['patch'], parsed_os['patch_minor'] )) db.commit() print("[info] : datos insertados en la tabla sistema operativo") except Exception as e: print(e) except Exception as e: print(e) contador += 1 db.close()
def test_ny(self): ua_string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36" ua_dict = user_agent_parser.ParseUserAgent(ua_string) print ua_dict
def browser_extraction(x): user_agent = user_agent_parser.ParseUserAgent(str(x)) return user_agent['family']
def single_linky(request, guid): """ Given a Perma ID, serve it up. """ # Create a canonical version of guid (non-alphanumerics removed, hyphens every 4 characters, uppercase), # and forward to that if it's different from current guid. canonical_guid = Link.get_canonical_guid(guid) # We only do the redirect if the correctly-formatted GUID actually exists -- # this prevents actual 404s from redirecting with weird formatting. link = get_object_or_404(Link.objects.all_with_deleted(), guid=canonical_guid) if canonical_guid != guid: return HttpResponsePermanentRedirect( reverse('single_linky', args=[canonical_guid])) # Forward to replacement link if replacement_link is set. if link.replacement_link_id: return HttpResponseRedirect( reverse('single_linky', args=[link.replacement_link_id])) # If we get an unrecognized archive type (which could be an old type like 'live' or 'pdf'), forward to default version serve_type = request.GET.get('type') if serve_type is None: serve_type = 'source' elif serve_type not in valid_serve_types: return HttpResponsePermanentRedirect( reverse('single_linky', args=[canonical_guid])) # serve raw WARC if serve_type == 'warc_download': if request.user.can_view(link): response = StreamingHttpResponse(FileWrapper( default_storage.open(link.warc_storage_file()), 1024 * 8), content_type="application/gzip") response[ 'Content-Disposition'] = "attachment; filename=%s.warc.gz" % link.guid return response else: return HttpResponseForbidden('Private archive.') # Special handling for private links on Safari: # Safari won't let us set the auth cookie for the WARC_HOST domain inside the iframe, unless we've already set a # cookie on that domain outside the iframe. So do a redirect to WARC_HOST to set a cookie and then come back. # safari=1 in the query string indicates that the redirect has already happened. # See http://labs.fundbox.com/third-party-cookies-with-ie-at-2am/ if link.is_private and not request.GET.get('safari'): user_agent = user_agent_parser.ParseUserAgent( request.META.get('HTTP_USER_AGENT', '')) if user_agent.get('family') == 'Safari': return redirect_to_login( request.build_absolute_uri(), "//%s%s" % (settings.WARC_HOST, reverse('user_management_set_safari_cookie'))) # handle requested capture type if serve_type == 'image': capture = link.screenshot_capture else: capture = link.primary_capture # if primary capture did not work, but screenshot did work, forward to screenshot if ( not capture or capture.status != 'success' ) and link.screenshot_capture and link.screenshot_capture.status == 'success': return HttpResponseRedirect( reverse('single_linky', args=[guid]) + "?type=image") # If this record was just created by the current user, show them a new record message new_record = request.user.is_authenticated() and link.created_by_id == request.user.id and not link.user_deleted \ and link.creation_timestamp > timezone.now() - timedelta(seconds=300) # Provide the max upload size, in case the upload form is used max_size = settings.MAX_ARCHIVE_FILE_SIZE / 1024 / 1024 protocol = "https://" if settings.SECURE_SSL_REDIRECT else "http://" if not link.submitted_description: link.submitted_description = "This is an archive of %s from %s" % ( link.submitted_url, link.creation_timestamp.strftime("%A %d, %B %Y")) context = { 'link': link, 'can_view': request.user.can_view(link), 'can_edit': request.user.can_edit(link), 'can_delete': request.user.can_delete(link), 'can_toggle_private': request.user.can_toggle_private(link), 'capture': capture, 'serve_type': serve_type, 'new_record': new_record, 'this_page': 'single_link', 'max_size': max_size, 'link_url': settings.HOST + '/' + link.guid, 'protocol': protocol, } response = render(request, 'archive/single-link.html', context) date_header = format_date_time(mktime(link.creation_timestamp.timetuple())) link_memento = protocol + settings.HOST + '/' + link.guid link_timegate = protocol + settings.WARC_HOST + settings.TIMEGATE_WARC_ROUTE + '/' + link.safe_url link_timemap = protocol + settings.WARC_HOST + settings.WARC_ROUTE + '/timemap/*/' + link.safe_url response['Memento-Datetime'] = date_header link_memento_headers = '<{0}>; rel="original"; datetime="{1}",<{2}>; rel="memento"; datetime="{1}",<{3}>; rel="timegate",<{4}>; rel="timemap"; type="application/link-format"' response['Link'] = link_memento_headers.format(link.safe_url, date_header, link_memento, link_timegate, link_timemap) return response