Esempio n. 1
0
    def makePGTSComparisonYAML(self):
        import codecs
        outfile = codecs.open('outfile.yaml', 'w', 'utf-8')
        print >> outfile, "test_cases:"

        yamlFile = open(os.path.join(TEST_RESOURCES_DIR,
                                     'pgts_browser_list.yaml'))
        yamlContents = yaml.load(yamlFile)
        yamlFile.close()

        for test_case in yamlContents['test_cases']:
            user_agent_string = test_case['user_agent_string']
            kwds = {}
            if 'js_ua' in test_case:
                kwds = eval(test_case['js_ua'])

            (family, major, minor, patch) = user_agent_parser.ParseUserAgent(user_agent_string, **kwds)

            # Escape any double-quotes in the UA string
            user_agent_string = re.sub(r'"', '\\"', user_agent_string)
            print >> outfile, '    - user_agent_string: "' + user_agent_string + '"' + "\n" +\
                              '      family: "' + family + "\"\n" +\
                              "      major: " + ('' if (major is None) else "'" + major + "'") + "\n" +\
                              "      minor: " + ('' if (minor is None) else "'" + minor + "'") + "\n" +\
                              "      patch: " + ('' if (patch is None) else "'" + patch + "'")
        outfile.close()
Esempio n. 2
0
    def runUserAgentTestsFromYAML(self, file_name):
        yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))
        yamlContents = yaml.load(yamlFile)
        yamlFile.close()

        for test_case in yamlContents['test_cases']:
            # Inputs to Parse()
            user_agent_string = test_case['user_agent_string']
            kwds = {}
            if 'js_ua' in test_case:
                kwds = eval(test_case['js_ua'])

            # The expected results
            expected = {'family': test_case['family'],
                        'major': test_case['major'],
                        'minor': test_case['minor'],
                        'patch': test_case['patch']}

            result = {}
            result = user_agent_parser.ParseUserAgent(user_agent_string, **kwds)
            self.assertEqual(
                result, expected,
                "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>".format(
                    user_agent_string,
                    expected['family'], expected['major'], expected['minor'], expected['patch'],
                    result['family'], result['major'], result['minor'], result['patch']))
    def makePGTSComparisonYAML(self):
        import codecs

        outfile = codecs.open("outfile.yaml", "w", "utf-8")
        print >> outfile, "test_cases:"

        yamlFile = open(
            os.path.join(TEST_RESOURCES_DIR, "pgts_browser_list.yaml"))
        yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
        yamlFile.close()

        for test_case in yamlContents["test_cases"]:
            user_agent_string = test_case["user_agent_string"]
            kwds = {}
            if "js_ua" in test_case:
                kwds = eval(test_case["js_ua"])

            (family, major, minor,
             patch) = user_agent_parser.ParseUserAgent(user_agent_string,
                                                       **kwds)

            # Escape any double-quotes in the UA string
            user_agent_string = re.sub(r'"', '\\"', user_agent_string)
            print >> outfile, '    - user_agent_string: "' + user_agent_string + '"' + "\n" + '      family: "' + family + '"\n' + "      major: " + (
                "" if (major is None) else "'" + major +
                "'") + "\n" + "      minor: " + (
                    "" if (minor is None) else "'" + minor +
                    "'") + "\n" + "      patch: " + ("" if
                                                     (patch is None) else "'" +
                                                     patch + "'")
        outfile.close()
Esempio n. 4
0
 def checkTags(self, fp):
     ua = fp["User-Agent"]
     #We check first for UA from Tor browsers
     if ua == "Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0":
         return [torbrowser80]
     elif ua == "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:60.0) Gecko/20100101 Firefox/60.0":
         return [torbrowser80]
     elif ua == "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0":
         return [torbrowser80]
     elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:52.0) Gecko/20100101 Firefox/52.0":
         return [torbrowser70]
     elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:45.0) Gecko/20100101 Firefox/45.0":
         return [tor6]
     elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0":
         return [tor5]
     elif ua == "Mozilla/5.0 (Windows NT 6.1; rv:31.0) Gecko/20100101 Firefox/31.0":
         return [tor4]
     else:
         #We parse the UA with a more powerful parser
         parsedUA = user_agent_parser.ParseUserAgent(ua)
         family = parsedUA["family"]
         if "Chrome" in family:
             return [chrome]
         elif "Firefox" in family:
             return [firefox]
         elif "Edge" in family:
             return [edge]
         elif "Bot" in family:
             return [bot]
         elif "IE" in family:
             return [ie]
         else:
             return [others]
Esempio n. 5
0
 def get(self, request, *args, **kwargs):
     self.object = self.get_object()
     context = self.get_context_data(
         object=self.object,
         user_agent=user_agent_parser.ParseUserAgent(request.META['HTTP_USER_AGENT'])
     )
     return self.render_to_response(context)
Esempio n. 6
0
    def set_cookie(self,
                   response,
                   name,
                   value,
                   max_age,
                   httponly=True,
                   samesite='Strict'):

        secure = True if 'https:' in self._app_url else False

        is_http = flask.request.environ.get(
            'wsgi.url_scheme',
            flask.request.environ.get('HTTP_X_FORWARDED_PROTO',
                                      'http')) == 'http'

        ua = user_agent_parser.ParseUserAgent(
            flask.request.environ.get('HTTP_USER_AGENT', ''))

        if ua.get('family') == 'Electron' and is_http:
            secure = False

        response.set_cookie(
            name,
            value=value,
            max_age=max_age,
            secure=secure,
            path=self._app.config['requests_pathname_prefix'].rstrip('/'),
            httponly=httponly,
            samesite=samesite)
Esempio n. 7
0
 def mapper(self, _, line):
     matches = re.match(regex, line)
     if matches:
         groups = matches.groups()
         self.increment_counter('Browsers', user_agent_parser.ParseUserAgent(groups[6])['family'], 1)
         yield groups[0], (1, self.get_bytes(groups[4]))
     else:
         self.increment_counter('Incorrect input', 'Incorrect input', 1)
Esempio n. 8
0
def parse_file(input_file_path, output_file_path):
    """ read input file and parse each user agent string to get browser name and main version

    Args:
      input_file_path: the path of the input file
        
    """
    with open(input_file_path, 'r') as f1, open(output_file_path, 'a') as f2:
        reader = csv.reader(f1, delimiter='\t')
        writer = csv.writer(f2, delimiter='\t')
        count = 0
        correct_count = 0
        incorrect_count = 0
        error_count = 0
        for row in reader:
            count += 1        

            user_agent_string = row[0]
            true_browser_name = row[1]
            true_main_version = row[2]
            try:                        
                result_dict = user_agent_parser.ParseUserAgent(user_agent_string)
                browser_name = result_dict['family']
                main_version = result_dict['major']

                #correct browser name and main version for some special cases
                browser_name, main_version = correct_special_cases(user_agent_string, browser_name, main_version)

                if browser_name == true_browser_name and main_version == true_main_version:
                    correct_count += 1
                else:
                    #print(user_agent_string)
                    #print(true_browser_name, true_main_version, browser_name, main_version)
                    #print(" ")   
                    incorrect_count += 1

                writer.writerow([user_agent_string, true_browser_name, true_main_version, browser_name, main_version])    

            except Exception as e:
                error_count += 1
                print(type(e))
                print(e)
                print(user_agent_string)
                print(result_dict)
                #print(parser.simple_detect(user_agent_string))
                print(true_browser_name, ", ", true_main_version)
                print("  ")


            #if count > 100000:
            #    break    

        #print(count, correct_count, incorrect_count, error_count)   
        print("Total number of user agents processed: ", count)
        print("correct number: ", correct_count)
        print("incorrect number: ", incorrect_count)
        print("number of records unable to handle: ", error_count)     
Esempio n. 9
0
 def is_safari(request):
     try:
         from ua_parser import user_agent_parser
     except ImportError:  # pragma: no cover
         return None
     else:
         user_agent = request.META.get("HTTP_USER_AGENT", "")
         browser = user_agent_parser.ParseUserAgent(user_agent)["family"]
         return browser == "Safari"
Esempio n. 10
0
    def handle(self, request):
        try:
            integration = get_integration_from_request(request, "jira")
        except AtlassianConnectValidationError:
            return self.get_response({"error_message": "Unable to verify installation."})
        except ExpiredSignatureError:
            return self.get_response({"refresh_required": True})

        if not request.user.is_authenticated():
            parsed_user_agent = user_agent_parser.ParseUserAgent(
                request.META.get("HTTP_USER_AGENT", "")
            )
            # not enabling cross site cookies seems to be a common problem with Safari
            # as a result, there is a Safari specific link to instructions when is_safari=true
            is_safari = parsed_user_agent.get("family") == "Safari"
            return self.get_response(
                {
                    "login_required": True,
                    "is_safari": is_safari,
                    "login_url": absolute_uri(reverse("sentry-login")),
                }
            )

        organizations = list(
            request.user.get_orgs().filter(
                id__in=OrganizationMember.objects.filter(
                    role__in=[r.id for r in roles.get_all() if r.is_global], user=request.user
                ).values("organization")
            )
        )

        form = JiraConfigForm(organizations, request.POST)

        if request.method == "GET" or not form.is_valid():
            active_orgs = OrganizationIntegration.objects.filter(
                integration__provider="jira",
                integration=integration,
                organization__in=organizations,
            ).values_list("organization_id", flat=True)

            form = JiraConfigForm(organizations, initial={"organizations": active_orgs})
            return self.get_response({"form": form, "organizations": organizations})

        enabled_orgs = [o for o in organizations if o.id in form.cleaned_data["organizations"]]
        disabled_orgs = list(set(organizations) - set(enabled_orgs))

        # Remove Jira integrations not in the set of enabled organizations
        OrganizationIntegration.objects.filter(
            integration__provider="jira", integration=integration, organization__in=disabled_orgs
        ).delete()

        # Ensure all enabled integrations.
        for org in enabled_orgs:
            integration.add_organization(org, request.user)

        return self.get_response({"form": form, "completed": True})
Esempio n. 11
0
def process_tcp_packet(packet):
    if not packet.haslayer(http.HTTPRequest):
        # This packet doesn't contain an HTTP request so we skip it
        return
    http_layer = packet.getlayer(http.HTTPRequest)
    ip_layer = packet.getlayer(IP)
    
    try: 
        ip =  '{0[src]}'.format(ip_layer.fields, http_layer.fields)
        ua_string = '{1[User-Agent]}'.format(ip_layer.fields, http_layer.fields)
        agent = user_agent_parser.ParseUserAgent(ua_string)
        print ua_string
        device = user_agent_parser.ParseDevice(ua_string)
        os = user_agent_parser.ParseOS(ua_string)      
        browser_family = agent['family']
        browser_major = agent['major']
        browser_minor = agent['minor']
        browser_patch = agent['patch']
        device_brand = device['brand']
        device_family = device['family']
        device_model = device['model']
        os_family = os['family']
        os_major = os['major']
        os_minor = os['minor']
        os_patch = os['patch']
        os_patch_minor = os['patch_minor'] 
        key = find_key(ip)
        device_hash = wdb.Hash(key)   
        hostname = device_hash['hostname']
        #print key,hostname,ip,browser_family,browser_major,browser_minor,browser_patch,device_brand,device_family,device_model,os_family,os_major,os_minor,os_patch,os_patch_minor
        os_version = str(os_major) +'.'+ str(os_minor) +'.'+ str(os_patch)
        browser_combined = browser_family  +'('+ str(browser_major) +'.'+ str(browser_minor) +'.'+ str(browser_patch) +')'
        k = wdb.Hash(key)
        
        if "Other" not in os_family:
            k.update(os=os_family)
            print ip,key,os_family
            
            if os_family == "Mac OS X":
                k.update(category="Desktop/Laptop")
                
            if os_family == "iOS":
                k.update(category="Smartphones/PDAs/Tablets")    
            
        if ("None" or "Other") not in os_version:
            k.update(os_version=os_version)
            print ip,key,os_version

        if ("None" or "Other") not in browser_combined:
            k.update(browser_family=browser_combined)   
            print ip,key,browser_combined
            
                      
    except KeyError: 
        pass
Esempio n. 12
0
def ua_filter(ua_string, Chrome, Firefox, Chromium, Safari, Opera, Other):
    if Chrome or Firefox or Chromium or Safari or Opera or Other:
        parsed_string = user_agent_parser.ParseUserAgent(ua_string)['family']
        if Chrome and parsed_string == "Chrome":
            return False
        elif Firefox and parsed_string == "Firefox":
            return False
        elif Safari and parsed_string == "Safari":
            return False
        elif Opera and parsed_string == "Opera":
            return False
        elif Other and (parsed_string != "Chrome" and  parsed_string != "Firefox" and parsed_string != "Safari" and parsed_string != "Opera"):
            return 2
    return True
Esempio n. 13
0
def get_request_type(meta):
    ua_string = meta.get("HTTP_USER_AGENT", None)
    http_referer = meta.get("HTTP_REFERER", None)
    ua_dict = user_agent_parser.ParseUserAgent(ua_string)

    if http_referer == None and ua_dict['family'] == 'GmailImageProxy':
        return RequestType.GMAIL
    elif http_referer == None and ua_dict[
            'family'] == 'Other' and "github-camo" in ua_string:
        return RequestType.GITHUB
    elif http_referer:
        return RequestType.DIRECT
    else:
        logger.info("Could not get RequestType of " + ua_string)
        return RequestType.UNKNOWN
Esempio n. 14
0
def parse_user_agent(user_agent_string):
    user_agent = user_agent_parser.ParseUserAgent(user_agent_string)

    name = user_agent["family"]
    version = "0"

    if user_agent["major"] is not None:
        version = user_agent["major"]

    if user_agent["minor"] is not None:
        version = version + "." + user_agent["minor"]

    if user_agent["patch"] is not None:
        version = version + "." + user_agent["patch"]

    return {"name": name, "version": version}
Esempio n. 15
0
def get_request_parsed_ua_string(request, parse=None):
    ua_string = get_request_ua_string(request)

    if not isinstance(ua_string, str):
        ua_string = ua_string.decode('utf-8', 'ignore')

    if parse == "agent":
        return user_agent_parser.ParseUserAgent(ua_string)

    elif parse == "os":
        return user_agent_parser.ParseOS(ua_string)

    elif parse == "device":
        return user_agent_parser.ParseDevice(ua_string)

    return user_agent_parser.Parse(ua_string)
Esempio n. 16
0
 def get_user_agent_data(request):
     userAgent = request.META['HTTP_USER_AGENT']
     from ua_parser import user_agent_parser
     browser = user_agent_parser.ParseUserAgent(userAgent)
     os = user_agent_parser.ParseOS(userAgent)
     device = user_agent_parser.ParseDevice(userAgent)
     return {
         "os":
         os['family'] or '' + ' ' + os['major'] or '' + '.' + os['minor']
         or '',
         "browser":
         browser['family'] or '' + ' ' + browser['major'] or '',
         "device":
         device['brand'] or '' + ' ' + device['family']
         or '' + ' ' + device['model'] or '',
     }
 def do_GET(self):
     print self.path
     if self.path.find("/list.json") >= 0:
         self.send_response(200)
         self.send_header('Content-Type', 'application/json')
         self.end_headers()
         data = self.headers.getheader('User-Agent')
         data = user_agent_parser.ParseUserAgent(data)
         self.wfile.write(data['family'])
     elif self.path.find("/incident_list") >= 0:
         self.send_response(200)
         self.send_header('Content-Type', 'application/json')
         self.end_headers()
         data = 'halp'
         self.wfile.write(data)
     else:
         return SimpleHTTPServer.SimpleHTTPRequestHandler.do_GET(self)
Esempio n. 18
0
 def is_old(ua_string):
     if ua_string is None:
         return False
     ua = user_agent_parser.ParseUserAgent(ua_string)
     if ua['family'] == 'IE' and major_int_lt(ua, 11):
         return True
     if ua['family'] == 'Chrome' and major_int_lt_config(
             ua, 'ua_min_chrome_version'):
         return True
     if ua['family'] == 'Firefox' and major_int_lt_config(
             ua, 'ua_min_firefox_version'):
         return True
     if ua['family'] == 'Safari' and major_int_lt_config(
             ua, 'ua_min_safari_version'):
         return True
     if ua['family'] == 'Yandex Browser' and major_int_lt_config(
             ua, 'ua_min_yandex_version'):
         return True
     return False
Esempio n. 19
0
def parse_ua_text(ua_text):
    #parsed_string = user_agent_parser.Parse(ua_text)
    #print(parsed_string)
    browser_data = user_agent_parser.ParseUserAgent(ua_text)
    os_data = user_agent_parser.ParseOS(ua_text)
    device_data = user_agent_parser.ParseDevice(ua_text)
    
    browser_name = str(browser_data['family'])
    browser_version = str(browser_data['major'])
    
    os_name = str(os_data['family'])
    os_version = str(os_data['major'])

    device_name = str(device_data['family'])
    device_brand = str(device_data['brand'])
    device_model  = str(device_data['model'])


    csv_data = [ua_text,browser_name,browser_version,os_name,os_version,device_name,device_brand,device_model]
  
    return csv_data
    def runUserAgentTestsFromYAML(self, file_name):
        yamlFile = open(os.path.join(TEST_RESOURCES_DIR, file_name))
        yamlContents = yaml.load(yamlFile, Loader=SafeLoader)
        yamlFile.close()

        for test_case in yamlContents["test_cases"]:
            # Inputs to Parse()
            user_agent_string = test_case["user_agent_string"]
            kwds = {}
            if "js_ua" in test_case:
                kwds = eval(test_case["js_ua"])

            # The expected results
            expected = {
                "family": test_case["family"],
                "major": test_case["major"],
                "minor": test_case["minor"],
                "patch": test_case["patch"],
            }

            result = {}
            result = user_agent_parser.ParseUserAgent(user_agent_string,
                                                      **kwds)
            self.assertEqual(
                result,
                expected,
                "UA: {0}\n expected<{1}, {2}, {3}, {4}> != actual<{5}, {6}, {7}, {8}>"
                .format(
                    user_agent_string,
                    expected["family"],
                    expected["major"],
                    expected["minor"],
                    expected["patch"],
                    result["family"],
                    result["major"],
                    result["minor"],
                    result["patch"],
                ),
            )
Esempio n. 21
0
def sessions(self):
    from .models import LoggedInUser
    sessions = []
    logged_in = LoggedInUser.objects.filter(user=self)
    for li in logged_in:
        try:
            s = Session.objects.get(session_key=li.session_key)
            puaos = user_agent_parser.ParseOS(li.user_agent)
            puaoss = '.'.join(
                [puaos[k] for k in puaos.keys() if puaos[k] is not None])

            puabr = user_agent_parser.ParseUserAgent(li.user_agent)
            puabrs = '.'.join(
                [puabr[k] for k in puabr.keys() if puabr[k] is not None])

            puad = user_agent_parser.ParseDevice(li.user_agent)
            puads = ' '.join(
                [puad[k] for k in puad.keys() if puad[k] is not None])

            sessions.append((s, ' '.join([puads, puaoss, puabrs])))
        except Session.DoesNotExist:
            li.delete()
    return sessions
Esempio n. 22
0
def solve():
    browsers = {}

    with open("data/data-01.txt") as f:
        for line in f.readlines():
            ua_string = seg_access_log(line)[-4]
            parsed_string = user_agent_parser.ParseUserAgent(ua_string)
            browser = parsed_string["family"]
            if browser == "Facebook":
                print(browser)
            if browser not in browsers:
                browsers[browser] = 1
            else:
                browsers[browser] += 1

    total_browsers = sum(browser for browser in browsers.values())

    for browser in browsers:
        browsers[browser] = "{0:.2f}%".format(100 * browsers[browser] /
                                              total_browsers)

    del total_browsers

    return browsers
Esempio n. 23
0
def get_block_ips():
    ips = {}
    with open("/var/log/nginx/access.log") as f:
        for line in f:
            lineformat = re.compile(
                r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""",
                re.IGNORECASE)
            data = re.search(lineformat, line)
            if data:
                datadict = data.groupdict()
                ip = datadict["ipaddress"]
                datetimestring = datadict["dateandtime"]
                user_agent = datadict["useragent"]
                parsed_string = user_agent_parser.ParseUserAgent(user_agent)
                browser = parsed_string["family"]
                refferer = datadict["refferer"]
                method = data.group(6)
                t = datetimestring.split()
                struct_time = strptime(t[0], "%d/%b/%Y:%H:%M:%S")
                iso_8601_time = "{}-{}-{}T{}:{}:{}+{}:{}".format(
                    struct_time.tm_year, struct_time.tm_mon,
                    struct_time.tm_mday, struct_time.tm_hour,
                    struct_time.tm_min, struct_time.tm_sec, t[1][1:3],
                    t[1][-2:])
                parsed_time = dp.parse(iso_8601_time)
                t_in_seconds = float(parsed_time.strftime("%s"))

                if method == "GET":
                    if ip not in ips:
                        ips[ip] = {
                            "begin_time": t_in_seconds,
                            "request_time": 0.0,
                            "status": True,
                            "hits": 1,
                            "time_blocked": "",
                            "is_bot": False,
                            "from_facebook": False
                        }
                    else:
                        if ips[ip]["status"] is False or ips[ip][
                                "is_bot"] is True or ips[ip][
                                    "from_facebook"] is True:
                            continue

                        if browser == "Googlebot" or browser == "bingbot":
                            command = subprocess.Popen(['host', ip],
                                                       stdout=subprocess.PIPE)
                            text = command.stdout.read().decode('utf-8')
                            if "domain name pointer" in text:
                                ips[ip]["is_bot"] = True
                                continue

                        if browser == "Facebook" and "facebook.com" not in refferer:
                            ips[ip]["from_facebook"] = True
                            continue

                        ips[ip]["request_time"] = t_in_seconds - ips[ip][
                            "begin_time"]
                        ips[ip]["hits"] += 1

                        if ips[ip]["request_time"] < 60.0 and ips[ip][
                                "hits"] > 120:
                            ips[ip]["status"] = False
                            ips[ip]["time_blocked"] = datetimestring
                            r.set(ip, ip, 7 * 86400)
                            subprocess.Popen(['host', ip],
                                             stdout=subprocess.PIPE)
                            continue
                        elif ips[ip]["request_time"] > 60.0:
                            ips[ip]["begin_time"] = t_in_seconds
                            ips[ip]["request_time"] = 0.0
                            ips[ip]["hits"] = 1
    del ips
Esempio n. 24
0
def parse_user_agent(user_agent_str):
    # if user_agent_str is unparseable, will return:
    # {'brand': None, 'model': None, 'family': 'Other'}
    return user_agent_parser.ParseUserAgent(user_agent_str)
Esempio n. 25
0
from ua_parser import user_agent_parser
sys.path.append('../')
import processing

parser = argparse.ArgumentParser(
    description='Parse the user agent of the participants data')
parser.add_argument('--path',
                    required=True,
                    help='Path to the participant data to be parsed')
args = parser.parse_args()

if __name__ == "__main__":
    print('Reading participants.csv...')
    participants = pd.read_csv(args.path, sep='\t')

    print('Parsing user agent (this may take a few minutes)...')
    participants['browser'] = participants.USER_AGENT.apply(
        lambda x: user_agent_parser.ParseUserAgent(x)['family'])
    participants['os'] = participants.USER_AGENT.apply(
        lambda x: user_agent_parser.ParseOS(x)['family'])
    participants['device_family'] = participants.USER_AGENT.apply(
        lambda x: user_agent_parser.ParseDevice(x)['family'])
    participants['device_brand'] = participants.USER_AGENT.apply(
        lambda x: user_agent_parser.ParseDevice(x)['brand'])
    participants['device_model'] = participants.USER_AGENT.apply(
        lambda x: user_agent_parser.ParseDevice(x)['model'])

    print('Saving to participants.csv...')
    participants.to_csv(args.path, sep='\t', index=False)

    print('DONE')
Esempio n. 26
0
def parse_user_agent(user_agent_str):
    return user_agent_parser.ParseUserAgent(user_agent_str)
Esempio n. 27
0
def log2database(nameFile):
    
    # leer el archivo
    try:
        file_object  = open(nameFile, "r") 
    except Exception as e: 
        print(e)
        return False

    lines = file_object.readlines()
    contador = 1

    # abrir conexion a la base de datos
    db = pymysql.connect(host,username,password,database)

    for linea in lines:

        objectLog = {}

        print("\n[procesando linea del log] = " + str(contador) + "\n")

        linea_1 = linea.replace("\n","")
        
        try:
            first_split = linea_1.split('\x22')
            
            # agregando user_agent
            objectLog['user_agent'] = first_split[5]

            # extraer informacion de user agent
            parsed_ua = user_agent_parser.ParseUserAgent(objectLog['user_agent'])
            print("USER AGENT")

            if parsed_ua['family'] is None:
                parsed_ua['family'] = "None"

            if parsed_ua['major'] is None:
                parsed_ua['major'] = "None"

            if parsed_ua['minor'] is None:
                parsed_ua['minor'] = "None"

            if parsed_ua['patch'] is None:
                parsed_ua['patch'] = "None"

            print(parsed_ua)

            # extraer informacion del SO
            parsed_os = user_agent_parser.ParseOS(objectLog['user_agent'])
            print("Sistema operativo")
            print(parsed_os)
            if parsed_os['family'] is None:
                parsed_os['family'] = "None"
            
            if parsed_os['major'] is None:
                parsed_os['major'] = "None"

            if parsed_os['minor'] is None:
                parsed_os['minor'] = "None"

            if parsed_os['patch'] is None:
                parsed_os['patch'] = "None"
            
            if parsed_os['patch_minor'] is None:
                parsed_os['patch_minor'] = "None"

            # extraer informacion del dispositivo
            parsed_device = user_agent_parser.ParseDevice(objectLog['user_agent'])
            print("Dispositivo")
            if parsed_device['family'] is None:
                parsed_device['family'] = "None"

            if parsed_device['brand'] is None:
                parsed_device['brand'] = "None"

            if parsed_device['model'] is None:
                parsed_device['model'] = "None"

            print(parsed_device)

            # agregando tcp_log
            objectLog['tcp_log'] = first_split[6].replace(" ", "")

            # filtrando ip y timestamp
            first_split[0] = first_split[0].replace("[","")
            first_split[0] = first_split[0].replace("]","")
            first_split[0] = first_split[0].replace(" +0000 ","")
            ip_date = first_split[0].split(" - - ")
            objectLog['ip'] = ip_date[0]
            objectLog['fecha'] = ip_date[1]

            objectLog['fecha'] = objectLog['fecha'].replace("/", "-")
            objectLog['fecha'] = objectLog['fecha'].replace(':', ' ', 1)

            objectLog['fecha'] = datetime.strptime(objectLog['fecha'], '%d-%b-%Y %H:%M:%S')


            # agregando metodo, url, version de http
            met_uri_http =  first_split[1].split(" ")
            objectLog['metodo'] = met_uri_http[0]
            objectLog['url'] = met_uri_http[1]
            objectLog['version_http'] = met_uri_http[2]

            # agregar codigo de respuesta 
            cod_res = first_split[2].split(" ")
            objectLog['res_codigo'] =  cod_res[1]
            objectLog['res_codigo_2'] = cod_res[2]

            # agregar url de redirccion
            objectLog['redireccion'] = "" if ( first_split[3] == "-" ) else first_split[3]

            print()
            print(objectLog)

            # ENVIAR REGISTRO A BASE DE DATOS
            
            try:
                ###################################################################################
                ###################################################################################

                cursor = db.cursor()
                sql =   "INSERT INTO db_proxy.logs_squid \
                        (user_agent,tcp_log,ip,fecha,metodo,url,res_codigo,size, \
                        redireccion,version_http) \
                        VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s);"

                cursor.execute(sql, (objectLog['user_agent'], objectLog['tcp_log'], objectLog['ip'], objectLog['fecha'], objectLog['metodo'],objectLog['url'], objectLog['res_codigo'], objectLog['res_codigo_2'], objectLog['redireccion'],objectLog['version_http']))    

                db.commit()

                last_indice = cursor.lastrowid
                print("INDEX ID INSERT :" + str(last_indice)) 

                print("[info] : datos insertados en la tabla log_squid")

                ###################################################################################
                ###################################################################################

                cursor = db.cursor()
                sql =   "INSERT INTO db_proxy.dispositivo \
                        (logs_squid_idlogs_squid,family,brand,model) \
                        VALUES (%s,%s,%s,%s);"

                cursor.execute(sql, (last_indice , parsed_device['family'], parsed_device['brand'], parsed_device['model'] )) 
                db.commit()

                print("[info] : datos insertados en la tabla dispositivo") 

                ###################################################################################
                ###################################################################################

                cursor = db.cursor()
                sql =   "INSERT INTO db_proxy.navegador \
                        (logs_squid_idlogs_squid,family,major,minor,patch) \
                        VALUES \
                        (%s,%s,%s,%s,%s);"

                cursor.execute(sql, (last_indice, parsed_ua['family'], parsed_ua['major'], parsed_ua['minor'], parsed_ua['patch'] )) 
                db.commit()
                print("[info] : datos insertados en la tabla navegador") 

                ###################################################################################
                ###################################################################################

                cursor = db.cursor()
                sql =   "INSERT INTO db_proxy.sistema_operativo \
                        (logs_squid_idlogs_squid,family,major,minor,patch,patch_minor) \
                        VALUES \
                        (%s,%s,%s,%s,%s,%s);"

                cursor.execute(sql, (last_indice, parsed_os['family'], parsed_os['major'], parsed_os['minor'], parsed_os['patch'], parsed_os['patch_minor'] )) 
                db.commit()

                print("[info] : datos insertados en la tabla sistema operativo") 

            except Exception as e: 
                print(e)
        
        except Exception as e: 
                print(e)

        contador += 1
    
    db.close()
Esempio n. 28
0
 def test_ny(self):
     ua_string = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"
     ua_dict = user_agent_parser.ParseUserAgent(ua_string)
     print ua_dict
def browser_extraction(x):
    user_agent = user_agent_parser.ParseUserAgent(str(x))
    return user_agent['family']
Esempio n. 30
0
def single_linky(request, guid):
    """
    Given a Perma ID, serve it up.
    """

    # Create a canonical version of guid (non-alphanumerics removed, hyphens every 4 characters, uppercase),
    # and forward to that if it's different from current guid.
    canonical_guid = Link.get_canonical_guid(guid)

    # We only do the redirect if the correctly-formatted GUID actually exists --
    # this prevents actual 404s from redirecting with weird formatting.
    link = get_object_or_404(Link.objects.all_with_deleted(),
                             guid=canonical_guid)

    if canonical_guid != guid:
        return HttpResponsePermanentRedirect(
            reverse('single_linky', args=[canonical_guid]))

    # Forward to replacement link if replacement_link is set.
    if link.replacement_link_id:
        return HttpResponseRedirect(
            reverse('single_linky', args=[link.replacement_link_id]))

    # If we get an unrecognized archive type (which could be an old type like 'live' or 'pdf'), forward to default version
    serve_type = request.GET.get('type')
    if serve_type is None:
        serve_type = 'source'
    elif serve_type not in valid_serve_types:
        return HttpResponsePermanentRedirect(
            reverse('single_linky', args=[canonical_guid]))

    # serve raw WARC
    if serve_type == 'warc_download':
        if request.user.can_view(link):
            response = StreamingHttpResponse(FileWrapper(
                default_storage.open(link.warc_storage_file()), 1024 * 8),
                                             content_type="application/gzip")
            response[
                'Content-Disposition'] = "attachment; filename=%s.warc.gz" % link.guid
            return response
        else:
            return HttpResponseForbidden('Private archive.')

    # Special handling for private links on Safari:
    # Safari won't let us set the auth cookie for the WARC_HOST domain inside the iframe, unless we've already set a
    # cookie on that domain outside the iframe. So do a redirect to WARC_HOST to set a cookie and then come back.
    # safari=1 in the query string indicates that the redirect has already happened.
    # See http://labs.fundbox.com/third-party-cookies-with-ie-at-2am/
    if link.is_private and not request.GET.get('safari'):
        user_agent = user_agent_parser.ParseUserAgent(
            request.META.get('HTTP_USER_AGENT', ''))
        if user_agent.get('family') == 'Safari':
            return redirect_to_login(
                request.build_absolute_uri(),
                "//%s%s" % (settings.WARC_HOST,
                            reverse('user_management_set_safari_cookie')))

    # handle requested capture type
    if serve_type == 'image':
        capture = link.screenshot_capture
    else:
        capture = link.primary_capture

        # if primary capture did not work, but screenshot did work, forward to screenshot
        if (
                not capture or capture.status != 'success'
        ) and link.screenshot_capture and link.screenshot_capture.status == 'success':
            return HttpResponseRedirect(
                reverse('single_linky', args=[guid]) + "?type=image")

    # If this record was just created by the current user, show them a new record message
    new_record = request.user.is_authenticated() and link.created_by_id == request.user.id and not link.user_deleted \
                 and link.creation_timestamp > timezone.now() - timedelta(seconds=300)

    # Provide the max upload size, in case the upload form is used
    max_size = settings.MAX_ARCHIVE_FILE_SIZE / 1024 / 1024

    protocol = "https://" if settings.SECURE_SSL_REDIRECT else "http://"

    if not link.submitted_description:
        link.submitted_description = "This is an archive of %s from %s" % (
            link.submitted_url,
            link.creation_timestamp.strftime("%A %d, %B %Y"))

    context = {
        'link': link,
        'can_view': request.user.can_view(link),
        'can_edit': request.user.can_edit(link),
        'can_delete': request.user.can_delete(link),
        'can_toggle_private': request.user.can_toggle_private(link),
        'capture': capture,
        'serve_type': serve_type,
        'new_record': new_record,
        'this_page': 'single_link',
        'max_size': max_size,
        'link_url': settings.HOST + '/' + link.guid,
        'protocol': protocol,
    }

    response = render(request, 'archive/single-link.html', context)
    date_header = format_date_time(mktime(link.creation_timestamp.timetuple()))
    link_memento = protocol + settings.HOST + '/' + link.guid
    link_timegate = protocol + settings.WARC_HOST + settings.TIMEGATE_WARC_ROUTE + '/' + link.safe_url
    link_timemap = protocol + settings.WARC_HOST + settings.WARC_ROUTE + '/timemap/*/' + link.safe_url
    response['Memento-Datetime'] = date_header

    link_memento_headers = '<{0}>; rel="original"; datetime="{1}",<{2}>; rel="memento"; datetime="{1}",<{3}>; rel="timegate",<{4}>; rel="timemap"; type="application/link-format"'
    response['Link'] = link_memento_headers.format(link.safe_url, date_header,
                                                   link_memento, link_timegate,
                                                   link_timemap)

    return response