def parse_pyua(self):
     try:
         self.df_p['pyua_browser'] = map(lambda x : parse(x).browser.family, self.ua_in)
         self.df_p['pyua_device'] = map(lambda x : parse(x).device.family, self.ua_in)
         self.df_p['pyua_pc'] = map(lambda x : parse(x).is_pc, self.ua_in)
         self.df_p['pyua_mob'] = map(lambda x : parse(x).is_mobile, self.ua_in)
         self.df_p['pyua_tab'] = map(lambda x : parse(x).is_tablet, self.ua_in)
     except Exception as err:
         print "pyua can't parse this string", self.ua_in
         print err
     return self.df_p
Exemple #2
0
def get_user_agent(request):
    # Tries to get UserAgent objects from cache before constructing a UserAgent
    # from scratch because parsing regexes.yaml/json (ua-parser) is slow
    ua_string = request.META.get('HTTP_USER_AGENT', '')
    if cache:
        key = get_cache_key(ua_string)
        user_agent = cache.get(key)
        if user_agent is None:
            user_agent = parse(ua_string)
            cache.set(key, user_agent)
    else:
        user_agent = parse(ua_string)
    return user_agent
Exemple #3
0
def test(request):

    reg_b = re.compile(r"android|avantgo|blackberry|blazer|compal|elaine|fennec|hiptop|iemobile|ip(hone|od)|iris|kindle|lge |maemo|midp|mmp|opera m(ob|in)i|palm( os)?|phone|p(ixi|re)\\/|plucker|pocket|psp|symbian|treo|up\\.(browser|link)|vodafone|wap|windows (ce|phone)|xda|xiino", re.I|re.M)
    reg_v = re.compile(r"1207|6310|6590|3gso|4thp|50[1-6]i|770s|802s|a wa|abac|ac(er|oo|s\\-)|ai(ko|rn)|al(av|ca|co)|amoi|an(ex|ny|yw)|aptu|ar(ch|go)|as(te|us)|attw|au(di|\\-m|r |s )|avan|be(ck|ll|nq)|bi(lb|rd)|bl(ac|az)|br(e|v)w|bumb|bw\\-(n|u)|c55\\/|capi|ccwa|cdm\\-|cell|chtm|cldc|cmd\\-|co(mp|nd)|craw|da(it|ll|ng)|dbte|dc\\-s|devi|dica|dmob|do(c|p)o|ds(12|\\-d)|el(49|ai)|em(l2|ul)|er(ic|k0)|esl8|ez([4-7]0|os|wa|ze)|fetc|fly(\\-|_)|g1 u|g560|gene|gf\\-5|g\\-mo|go(\\.w|od)|gr(ad|un)|haie|hcit|hd\\-(m|p|t)|hei\\-|hi(pt|ta)|hp( i|ip)|hs\\-c|ht(c(\\-| |_|a|g|p|s|t)|tp)|hu(aw|tc)|i\\-(20|go|ma)|i230|iac( |\\-|\\/)|ibro|idea|ig01|ikom|im1k|inno|ipaq|iris|ja(t|v)a|jbro|jemu|jigs|kddi|keji|kgt( |\\/)|klon|kpt |kwc\\-|kyo(c|k)|le(no|xi)|lg( g|\\/(k|l|u)|50|54|e\\-|e\\/|\\-[a-w])|libw|lynx|m1\\-w|m3ga|m50\\/|ma(te|ui|xo)|mc(01|21|ca)|m\\-cr|me(di|rc|ri)|mi(o8|oa|ts)|mmef|mo(01|02|bi|de|do|t(\\-| |o|v)|zz)|mt(50|p1|v )|mwbp|mywa|n10[0-2]|n20[2-3]|n30(0|2)|n50(0|2|5)|n7(0(0|1)|10)|ne((c|m)\\-|on|tf|wf|wg|wt)|nok(6|i)|nzph|o2im|op(ti|wv)|oran|owg1|p800|pan(a|d|t)|pdxg|pg(13|\\-([1-8]|c))|phil|pire|pl(ay|uc)|pn\\-2|po(ck|rt|se)|prox|psio|pt\\-g|qa\\-a|qc(07|12|21|32|60|\\-[2-7]|i\\-)|qtek|r380|r600|raks|rim9|ro(ve|zo)|s55\\/|sa(ge|ma|mm|ms|ny|va)|sc(01|h\\-|oo|p\\-)|sdk\\/|se(c(\\-|0|1)|47|mc|nd|ri)|sgh\\-|shar|sie(\\-|m)|sk\\-0|sl(45|id)|sm(al|ar|b3|it|t5)|so(ft|ny)|sp(01|h\\-|v\\-|v )|sy(01|mb)|t2(18|50)|t6(00|10|18)|ta(gt|lk)|tcl\\-|tdg\\-|tel(i|m)|tim\\-|t\\-mo|to(pl|sh)|ts(70|m\\-|m3|m5)|tx\\-9|up(\\.b|g1|si)|utst|v400|v750|veri|vi(rg|te)|vk(40|5[0-3]|\\-v)|vm40|voda|vulc|vx(52|53|60|61|70|80|81|83|85|98)|w3c(\\-| )|webc|whit|wi(g |nc|nw)|wmlb|wonu|x700|xda(\\-|2|g)|yas\\-|your|zeto|zte\\-", re.I|re.M)

    # iPhone's user agent string
    ua_string = request.META['HTTP_USER_AGENT']
    user_agent = parse(ua_string)

    # Accessing user agent's browser attributes
    browser = user_agent.browser.family + ' ' + user_agent.browser.version_string

    # Accessing user agent's operating system properties
    os = user_agent.os.family + ' ' + user_agent.os.version_string

    # Accessing user agent's device properties
    device = user_agent.device.family  # returns 'iPhone'

    # Checking if its mobile
    b = reg_b.search(ua_string)
    v = reg_v.search(ua_string[0:4])

    is_mobile = False
    if b or v:
        is_mobile = True

    return HttpResponse("hello " + str(user_agent))
Exemple #4
0
def register_device(request):
    registration_id = request.POST.get('registration_id')
    android_id = request.POST.get('android_id')
    version = request.POST.get('version', '0.3.0')
    user_agent = parse(request.META['HTTP_USER_AGENT'])
    model = user_agent.device.family
    os_version = user_agent.os.version_string
    device_type = 'Mobile' if user_agent.is_mobile else 'Tablet'
    ip = get_client_ip(request)
    logger.debug(
        'register request from %s\nandroid_id = %s\napp_version = %s\nuser_agent = %s\nmodel = %s\nos_version = %s',
        ip, android_id, version, user_agent, model, os_version)
    try:
        device = Device.objects.get(android_id=android_id)
        device.registration_id = registration_id
        device.version = version
        device.model = model
        device.os_version = os_version
        device.type = device_type
        device.active = True
        device.last_seen = timezone.localtime(timezone.now())
        device.save()
        logger.debug('Device %s marked as last seen on %s', device.android_id, device.last_seen)
    except Device.DoesNotExist:
        Device.objects.create(registration_id=registration_id, android_id=android_id, version=version, model=model,
                              os_version=os_version, type=device_type)
        logger.info('Registered new device: %s', android_id)
    return HttpResponse(status=200)
def emailOpen(e):
	d = {}
	if request.cookies.get('LATrackingID'):
		a = modules.getModel(models.App, appid = request.cookies.get('LATrackingID'))
		d['app_id'] = a.id
	d['private_ip'] = request.environ.get('REMOTE_ADDR')
	d['public_ip'] = request.environ.get('HTTP_X_FORWARDED_FOR')
	d['full_url'] = request.environ.get('HTTP_REFERER', '').strip().lower()
	email = db.session.query(models.Email).filter_by(emailid=e).first()
	if email:
		d['email_id'] = email.id
	else:
		return jsonify(**{'status':'failure', 'description':'no such email found'})
	if d['public_ip']:
		g = geocoder.ip(d['public_ip'])
		d['lat'], d['lng'] = g.latlng
		d['city'] = g.city
		d['country'] = g.country
		d['state'] = g.state
	d['user_agent'] = request.environ.get('HTTP_USER_AGENT')
	if d['user_agent']:
		user_agent = parse(d['user_agent'])
		d['browser'] = user_agent.browser.family
		d['is_bot'], d['is_mobile'], d['is_tablet'], d['is_pc'] = user_agent.is_bot, user_agent.is_mobile, user_agent.is_tablet, user_agent.is_pc
	p = models.Visit(**d)
	p.date = datetime.now()
	db.session.add(p)
	db.session.commit()
	return jsonify(success=True, description='successfully tracked email')
Exemple #6
0
def getBrowser(userAgent):
	# check for empty or null uri
	if userAgent:
		user_agent =  parse(userAgent)
		return user_agent.browser.family.lower()
	else:
		return None
Exemple #7
0
def default():
    user_agent_string = request.user_agent.string
    user_agent = parse(user_agent_string)
    if user_agent.is_bot:
        return "Bot query"
    if not session.get('userdata'):
        return redirect("https://apps.facebook.com/mytoptenapp"+url_for('index', _external=False))
    fbdata = session['userdata']
    fb = facebook.GraphAPI(session['token'])
    new_user = False
    user = pg.query(TopTenUser).filter(TopTenUser.facebook_id == str(fbdata['id'])).first()
    if not user: 
        user = createUser(fbdata)
        new_user = True
    user.last_login = datetime.now()
    pg.commit()
    topten = pg.query(TopTen).join(TopTenUser).filter(TopTenUser.facebook_id == user.facebook_id).filter(TopTen.active == True).first()
    if not topten:
        topten   = createTopTen(fbdata)
    songlist = topten.songs
    if (len(songlist) < NUMSONGS):
        if user_agent.is_mobile:
            return redirect(url_for('makeSongsMob', facebook_id=user.facebook_id, new_user=new_user))
        else:
            return redirect(url_for('makeSongs', facebook_id=user.facebook_id, new_user=new_user))            
    else:
        if user_agent.is_mobile:
            return redirect(url_for('showSongsMob', facebook_id=user.facebook_id))
        else:    
            return redirect(url_for('showSongs', facebook_id=user.facebook_id))
    return 'Hello World!'
def update_browser_num(list_br, packet):
    if packet is None:
        return [0,0,0,0,0]
    else:
        if(len(list_br) == 0):
            list_br = [0,0,0,0,0]

        if(not(is_http_get(packet))):
            return list_br
        else:
            raw_ua = packet[get_http_layer(packet)].user_agent
            parse_ua = parse(raw_ua)
            ua_browser = parse_ua.browser.family
            if "Firefox" in ua_browser:
                list_br[0] = 1
            else:
                if "Chrome" in ua_browser:
                    list_br[1] = 1
                else:
                    if "Safari" in ua_browser:
                        list_br[2] = 1
                    else:
                        if "Internet Exlorer" in ua_browser:
                            list_br[3] = 1
                        else:
                            list_br[4] = 1
            return list_br
Exemple #9
0
 def serve(self):
   """Handle the request and serve the response"""
   super(self.__class__, self).serve()
   contexts = (
               ('General INI', self._gather_from_ini(
                 FILE_CONFIGURATION, 'APPLICATION',
                 None)),
               ('Sys', self._gather_members(sys, (
                 'builtin_module_names', 'copyright', 'modules', 'meta_path', 'path_importer_cache'))),
               ('App', self._gather_members(bottle.request.app, (
                 'routes', 'plugins'))),
               ('Routes', self._gather_from_list(bottle.request.app.routes, None)),
               ('Browser', self._gather_members(user_agents.parse(bottle.request.environ['HTTP_USER_AGENT']), None)),
               ('Request', self._gather_members(bottle.request, (
                 'environ', 'headers', 'route', 'urlparts'))),
               ('Environment', self._gather_from_dict(bottle.request.environ, (
                 'beaker.sessions',
                 'bottle.request', 'bottle.request.json',
                 'bottle.request.urlparts', 'bottle.route', 'route.handle',
                 'wsgi.errors'))),
               ('Session', self._gather_from_dict(self.session, None)),
               ('Session Object', self._gather_members(self.session.session._sess, (
                 'accessed_dict', 'cookie', 'request', 'namespace'))),
               ('Headers', self._gather_from_dict(bottle.request.headers, None)),
               ('Cookies', self._gather_from_dict(bottle.request.cookies, None)),
               ('Query', self._gather_from_dict(bottle.request.query, None)),
               ('Forms', self._gather_from_dict(bottle.request.forms, None)),
              )
   return self.get_template('info.tpl',
                            CONTEXTS = contexts)
def addUserAgentInfo(db):
    uaCompleted = set()
    for ua in list(db.execute("""select useragent from access
                            except
                            select useragent from uainfo""")):
        userAgent = ua[0]
        if userAgent in uaCompleted:
            continue
        uaRec = user_agents.parse(userAgent)
        if not uaRec:
            continue

        if uaRec.is_pc:
            browserType = 'Browser'
        elif uaRec.is_mobile:
            browserType = 'Mobile Browser'
        elif uaRec.is_bot:
            browserType = 'Robot'
        else:
            browserType = 'unknown'

        db.execute("""insert or replace into uainfo(useragent,
                    browser_type, ua_name, os_name, os_family) values(?, ?, ?, ?, ?)""",
                    (userAgent, browserType, uaRec.browser.family,
                     '%s %s' % (uaRec.os.family, uaRec.os.version_string), uaRec.os.family))
        uaCompleted.add(userAgent)
        db.commit() # commit per record in case we exit
    return
Exemple #11
0
 def parse_data(self,data):
     # Get User agent details and parse it
     ua = user_agents.parse(data['user_agent'])
     data['browser'] = ua.browser.family + ua.browser.version_string
     data['os'] = ua.os.family
     data['user_agent_name'] = data['user_agent'].split()[0]
     # Get Request name separately from the request
     data['request_name'] = data['request'].split()[0]
     # Convert time in to datettime
     time = data['time'].split()[0]
     try:
         date = datetime.strptime(time, "%d/%b/%Y:%H:%M:%S")
     except ValueError:
         date = "NULL"
     data['time'] = date
     if data["user"] == "-":
         data["user"] = None
     data["status"] = int(data["status"])
     if data["size"] == "-":
         data["size"] = 0
     else:
         data["size"] = int(data["size"])
     if data["referer"] == "-":
         data["referer"] = None
     return data
Exemple #12
0
    def connectionMade(self):
        log.debug("HTTP connection made.")

        try:
            user_agent = parse(self.headers['user-agent'])

            self.clientInfo["clientos"] = user_agent.os.family
            self.clientInfo["browser"]  = user_agent.browser.family
            try:
                self.clientInfo["browserv"] = user_agent.browser.version[0]
            except IndexError:
                self.clientInfo["browserv"] = "Other"
        except KeyError:
            self.clientInfo["clientos"] = "Other"
            self.clientInfo["browser"]  = "Other"
            self.clientInfo["browserv"] = "Other"

        self.clientInfo["clientip"] = self.client.getClientIP()

        self.plugins.hook()
        self.sendRequest()
        self.sendHeaders()
        
        if (self.command == 'POST'):
            self.sendPostData()
Exemple #13
0
def is_ios(request):
    ua = request.META.get('HTTP_USER_AGENT')
    user_agent = parse(ua)
    os = user_agent.os.family
    if os == 'iOS':
        return True
    return False
Exemple #14
0
    def format_record(r, recursive):
      path = r.req["path"]
      if "twostream" in path:
        try:
            path = r.req["referrer"].replace("https://www.govtrack.us", "")
        except:
            pass
      if "?" in path: path = path[:path.index("?")] # ensure no qsargs
      if r.req.get("query"): path += "?" + urllib.parse.urlencode({ k.encode("utf8"): v.encode("utf8") for k,v in list(r.req["query"].items()) })

      if r.req['agent']:
          ua = str(user_agents.parse(r.req['agent']))
          if ua == "Other / Other / Other": ua = "bot"
          ua = re.sub(r"(\d+)(\.[\d\.]+)", r"\1", ua) # remove minor version numbers
      else:
          ua = "unknown"

      ret = {
        "reqid": r.id,
        "when": r.when.strftime("%b %-d, %Y %-I:%M:%S %p"),
        "netblock": get_netblock_label(r.req['ip']) if r.req['ip'] else None,
        "path": path,
        "query": r.req.get('query', {}),
        "ua": ua,
      }
      if recursive:
          ret["netblock"] = ", ".join(sorted(set( get_netblock_label(rr.req["ip"]) for rr in Sousveillance.objects.filter(subject=r.subject) if rr.req["ip"] )))
          ret["recent"] = [format_record(rr, False) for rr in Sousveillance.objects.filter(subject=r.subject, id__lt=r.id).order_by('-when')[0:15]]
      return ret
Exemple #15
0
def allItems(request, page):
	itemCount = Item.objects.raw('select id, count(*) count from Item')[0].count

	#pageが数字なら
	try:
		page = int(request.GET['page'])
		page = pager.pagerInt(page, itemCount)['page']
		start_id = pager.pagerInt(page, itemCount)['start_id']
		prevNext = pager.pagerInt(page, itemCount)['prevNext']
		
	#pageが数字じゃなかったら			
	except:
		page = pager.pagerNotInt()['page']
		start_id = pager.pagerNotInt()['start_id']
		prevNext = pager.pagerNotInt()['prevNext']

	itemList = Item.objects.raw('select id, itemName, itemPrice, mediumImageUrls from Item ORDER BY itemPrice DESC limit %s, 30;' % start_id)

	c = Context({'typicalCategories':typicalCategories, 'searchForm':searchForm, 'itemList':itemList, 'itemCount':itemCount/30 + 1, 'prevNext':prevNext, 'page': page,})

	if parse(request.META['HTTP_USER_AGENT']).is_mobile:
		t = loader.get_template('rakuten/spAllItem.html')
	else:
		t = loader.get_template('rakuten/allItem.html')

	return HttpResponse(t.render(c))
def check_cookie_present(request):
    #TODO we need to check the number of redirects in case we end up in a loop for some reason
    if  HTTPS_IFRAME_COOKIESETTER_URL_TO_CHECK in request.path \
        and not ((settings.MEDIA_URL and request.path.startswith(settings.MEDIA_URL)) or request.path.startswith(settings.STATIC_URL)) \
        and (not HTTPS_IFRAME_COOKIESETTER_ONLY_HTTPS or(HTTPS_IFRAME_COOKIESETTER_ONLY_HTTPS and request.is_secure())) \
        and call_additional_checks():
        #get the url to the cookiesetter view
        cookiesetter_view_path = urlpath()
        user_agent = parse(request.META.get('HTTP_USER_AGENT', ''))

        if user_agent.browser.family in HTTPS_IFRAME_COOKIESETTER_BROWSERS \
            and cookiesetter_view_path not in request.path:#these are after the initial check as it is an expensive lookup

            current_absolute_url = urllib2.quote(request.build_absolute_uri().encode("utf8"))

            cookies_present = True

            for cookie_string in HTTPS_IFRAME_COOKIESETTER_COOKIES:
                try:
                    cookie_token = request.COOKIES[cookie_string]
                except KeyError:
                    cookies_present = False

            if not cookies_present:
                #ehck url scheme to http
                redirect_url = '%s?absurl=%s' %(cookiesetter_view_path, current_absolute_url)
                redirect_url = request.build_absolute_uri(redirect_url)
                parsed = urlparse(redirect_url)
                redirect_url = '%s://%s%s?%s' % ('http',parsed.netloc, parsed.path, parsed.query)
                return False,redirect_url


    requested_url = request.build_absolute_uri()
    return True, requested_url
def supported_browser(request):
    if 'supported_browser' not in request.session:

        user_agent = parse(request.META.get('HTTP_USER_AGENT', ''))
        request.session['supported_browser'] = all(
            pass_browser_entry(user_agent, entry) for entry in browser_requirements)
    return {'supported_browser': request.session['supported_browser']}
Exemple #18
0
def index(request, page):

	#ページネーション作成
	#itemをcount
	itemCount = Item.objects.raw('select id, count(*) count from Item where genreId in (510914, 294456, 101480,204122,506438,511121,101483,563353,200164,511068,204030,101467,511073,511018,563381,204064,506475,510927,204049,563350,204078,204262,204119,204088,204260,563379,511191,302804,563490,563378,101479,204081,204055,563455,563355,200045,563477,563373,204069,204084,101469,553329,204072,511248,204279,511026,563377,101484,511009,563349,101478,511046,204283,302801,510930,563380,563451,563364,563481,204263,200044,507977,204120,101476,553328,506498,506443,204130,204091,200048,510943,563406,201318,563461,204027,204086,204068,511199,204121,563359,101481,302800,510923,563463,506439,204071,204034,510973,563452,563387,204060,204061,563391,511182,511025,204080,563358,563474,563402,511155,511013,563356,563469,511072,563457,563352,204085,563489,563454,511048,200041,563375,204118,204275,204089,563398,563362,510993,510947,204039,563460,204036,511156,563482,563401,511111,511091,204048,563371,511134,510989,553310,510974,563465,511104,511187,200040,563464,101477,563486,101472,506454,204135,204092,204259,511069,204131,506497,510985,563483,563383,563466,510939,563470,204037,510938,200049,563453,563476,563366,511001,204065,302803,204265,506484,511021,204052,563456,204271,511178,563475,204083,563405,511129,204042,511057,563478,511249,302802,204063,204075,511194,563473,510922,563365,563497,553315,511163,511198,204051,511122,563479,511125,511160,563462,563459,511171,511110,511060,553314,511190,510931,511168,563392,204074,563496,563390,511099,510977,204266,506449,511130,510935,204126,511183,511233,563360,511076,563363,563393,511002,563385,563399,510997,506468,101468,511092,204123,511222,511221,506494,563374,511038,511042,563351,204127,510994,563416,563492,563491,563415,563369,510981,511037,101473,302807,511041,510986,506495,563493,204046,510982,510978,511022,511077,511264,204133,204066,563495,563376,511195,563446,506453,511126,511214,511283,511164,511159,511179,563494,511049,563458,511252,511237,511279,511151,506444,563370,511238,563386,302806,511053,511251,204134,506458,302805,511271,511089,506485,511066,510942,511229,511065,511005,506459,563372,511276,563357,506480,563346,511218,511100,511140,563485,510946,553319,563421,563480,510971,511056,204128,511275,511288,553299,563472,563394,204136,511259,510926,553309,563354,563467,563445,511209,511210,506469,511230,563468,511203,204077,506489,506479,511010,511267,511272,511136,511061,511284,511170,510918,511225,553321,563471,510934,511268,553320,204132,511186,511119,563345,563361);')[0].count

	#pageが数字なら
	try:
		page = int(request.GET['page'])
		page = pager.pagerInt(page, itemCount)['page']
		start_id = pager.pagerInt(page, itemCount)['start_id']
		prevNext = pager.pagerInt(page, itemCount)['prevNext']
		
	#pageが数字じゃなかったら			
	except:
		page = pager.pagerNotInt()['page']
		start_id = pager.pagerNotInt()['start_id']
		prevNext = pager.pagerNotInt()['prevNext']

	itemList = Item.objects.raw('select id, itemName, itemPrice, mediumImageUrls from Item where genreId in (510914, 294456, 101480,204122,506438,511121,101483,563353,200164,511068,204030,101467,511073,511018,563381,204064,506475,510927,204049,563350,204078,204262,204119,204088,204260,563379,511191,302804,563490,563378,101479,204081,204055,563455,563355,200045,563477,563373,204069,204084,101469,553329,204072,511248,204279,511026,563377,101484,511009,563349,101478,511046,204283,302801,510930,563380,563451,563364,563481,204263,200044,507977,204120,101476,553328,506498,506443,204130,204091,200048,510943,563406,201318,563461,204027,204086,204068,511199,204121,563359,101481,302800,510923,563463,506439,204071,204034,510973,563452,563387,204060,204061,563391,511182,511025,204080,563358,563474,563402,511155,511013,563356,563469,511072,563457,563352,204085,563489,563454,511048,200041,563375,204118,204275,204089,563398,563362,510993,510947,204039,563460,204036,511156,563482,563401,511111,511091,204048,563371,511134,510989,553310,510974,563465,511104,511187,200040,563464,101477,563486,101472,506454,204135,204092,204259,511069,204131,506497,510985,563483,563383,563466,510939,563470,204037,510938,200049,563453,563476,563366,511001,204065,302803,204265,506484,511021,204052,563456,204271,511178,563475,204083,563405,511129,204042,511057,563478,511249,302802,204063,204075,511194,563473,510922,563365,563497,553315,511163,511198,204051,511122,563479,511125,511160,563462,563459,511171,511110,511060,553314,511190,510931,511168,563392,204074,563496,563390,511099,510977,204266,506449,511130,510935,204126,511183,511233,563360,511076,563363,563393,511002,563385,563399,510997,506468,101468,511092,204123,511222,511221,506494,563374,511038,511042,563351,204127,510994,563416,563492,563491,563415,563369,510981,511037,101473,302807,511041,510986,506495,563493,204046,510982,510978,511022,511077,511264,204133,204066,563495,563376,511195,563446,506453,511126,511214,511283,511164,511159,511179,563494,511049,563458,511252,511237,511279,511151,506444,563370,511238,563386,302806,511053,511251,204134,506458,302805,511271,511089,506485,511066,510942,511229,511065,511005,506459,563372,511276,563357,506480,563346,511218,511100,511140,563485,510946,553319,563421,563480,510971,511056,204128,511275,511288,553299,563472,563394,204136,511259,510926,553309,563354,563467,563445,511209,511210,506469,511230,563468,511203,204077,506489,506479,511010,511267,511272,511136,511061,511284,511170,510918,511225,553321,563471,510934,511268,553320,204132,511186,511119,563345,563361) ORDER BY itemPrice DESC limit %s, 30;' % start_id)

	c = Context({'typicalCategories':typicalCategories, 'searchForm':searchForm, 'itemList':itemList, 'itemCount':itemCount/30 + 1, 'prevNext':prevNext, 'page': page,})

	if parse(request.META['HTTP_USER_AGENT']).is_mobile:
		t = loader.get_template('rakuten/spIndex.html')
	else:
		t = loader.get_template('rakuten/index.html')

	return HttpResponse(t.render(c))
Exemple #19
0
    def get(self):
        results = []
        agents = Counter()
        categories = Counter()
        total = 0

        for req in _requests:
            total += 1
            agent = user_agents.parse(req.get('agent', ['-'])[0].replace('"', ''))
            agents[agent.browser.family] += 1

            if agent.is_mobile:
                categories['mobile'] += 1
            elif agent.is_tablet:
                categories['tablet'] += 1
            elif agent.is_pc:
                categories['pc'] += 1
            elif agent.is_bot:
                categories['bot'] += 1

        for key, val in agents.iteritems():
            results.append({
                'name': key,
                'count': val
            })

        return {
            'data': {
                'count': sum(categories.values()),
                'categories': categories,
                'agents': results
            }
        }
Exemple #20
0
def search(request):
	query = request.GET['query']

	#ページネーション作成
	#itemをcount
	sql = 'select id, count(*) count from Item where itemName like "%%{}%%"'.format(query.encode('utf-8'))
	itemCount = Item.objects.raw(sql)[0].count

	#pageが数字なら
	try:
		page = int(request.GET['page'])
		page = pager.pagerInt(page, itemCount)['page']
		start_id = pager.pagerInt(page, itemCount)['start_id']
		prevNext = pager.pagerInt(page, itemCount)['prevNext']
		
	#pageが数字じゃなかったら			
	except:
		page = pager.pagerNotInt()['page']
		start_id = pager.pagerNotInt()['start_id']
		prevNext = pager.pagerNotInt()['prevNext']

	sql = 'select id, itemName, mediumImageUrls, itemPrice from Item where itemName like "%%{}%%"'.format(query.encode('utf-8'))
	searchedItems = Item.objects.raw(sql + ' limit %s, 30;' % start_id)

	if parse(request.META['HTTP_USER_AGENT']).is_mobile:
		t = loader.get_template('rakuten/spSearch.html')
	else:
		t = loader.get_template('rakuten/search.html')

	c = Context({'typicalCategories':typicalCategories, 'query':query, 'searchedItems':searchedItems, 'searchForm':searchForm, 'itemCount':itemCount/30 + 1, 'page':page, 'start_id':start_id, 'prevNext':prevNext, })
	return HttpResponse(t.render(c))
Exemple #21
0
def detail(request, item_id):

	# 商品詳細情報
	item = Item.objects.raw('select id, itemName, itemCaption, mediumImageUrls, format(itemPrice, 0) itemPrice, affiliateUrl, genreId, getTime, price_sentence, alchol_sentence, capacity_sentence, twenty_sentence ,tweetSentence from Item where id = %s;' % item_id)[0]

	# 同カテゴリの商品リスト
	category_id = item.genreid

	relatedItems = Item.objects.raw('select id, itemName, format(itemPrice,0) itemPrice, affiliateUrl, mediumImageUrls from Item where genreId = %s limit 30;' % category_id)

	# 現在の階層を確認
	nowCategory = Rakutencategory.objects.raw('select * from RakutenCategory where Category_id = %s' % category_id)
	#パンくずと関連カテゴリ生成
	try:
		breadCrumb = createBreadCrumb.makeBreadCrumb(nowCategory)['breadCrumb']
		bottomCategories = createBreadCrumb.makeBreadCrumb(nowCategory)['bottomCategories']
	# カテゴリがなかった場合、空のパンくず,関連カテゴリ配列を作成
	except IndexError:
		bottomCategories = []
		breadCrumb = []
	if parse(request.META['HTTP_USER_AGENT']).is_mobile:
		t = loader.get_template('rakuten/spDetail.html')
	else:
		t = loader.get_template('rakuten/detail.html')
	c = Context({'typicalCategories':typicalCategories, 'item':item, 'bottomCategories':bottomCategories, 'relatedItems':relatedItems, 'breadCrumb':breadCrumb, 'searchForm':searchForm})
	return HttpResponse(t.render(c))
Exemple #22
0
 def parse(self, event):
     if self.field in event:
         ua_str = event[self.field]
         ua = user_agents.parse(ua_str)
         ua_data = {
             'browser': {
                 'family': ua.browser.family,
                 'version': ua.browser.version_string,
             },
             'os': {
                 'family': ua.os.family,
                 'version': ua.os.version_string,
             },
             'device': {
                 'family': ua.device.family,
                 'brand': ua.device.brand,
                 'model': ua.device.model,
             },
             'is_mobile': ua.is_mobile,
             'is_tablet': ua.is_tablet,
             'is_touch_capable': ua.is_touch_capable,
             'is_pc': ua.is_pc,
             'is_bot': ua.is_bot,
         }
         if self.out_field:
             event[self.out_field] = ua_data
         else:
             event.update(ua_data)
     return event
Exemple #23
0
def Analytics(REQ):
    from datetime import datetime
    user_agent = parse(REQ.META.get('HTTP_USER_AGENT'))
    Code = REQ.GET.get("sn","")
    inurl = REQ.get_full_path()
    os = user_agent.os.family
    browser = user_agent.browser.family
    ip = REQ.META.get('HTTP_X_FORWARDED_FORMETA') and REQ.META.get('HTTP_X_FORWARDED_FORMETA') or REQ.META.get('REMOTE_ADDR')
    indata = {'ip': ip, 'browser': browser, 'os': os}
    try:
        ebusiness = ebusiness_members.objects.get(code=Code)
        isIn = ebusiness.flow_analytics_set.filter(**indata).order_by('-intime')
        ebusiness_flow = ebusiness.flow_analytics_set
    except ObjectDoesNotExist:
        isIn = flow_analytics.objects.filter(**indata).filter(ebusiness=None).order_by('-intime')
        ebusiness_flow = flow_analytics.objects

    if isIn.count() < 1:
        indata.update({"inurl":inurl,"endurl":inurl})
        CreateIn = ebusiness_flow.create(**indata)
        CreateIn.save()
    else:
        oldTime = isIn.values()[0]['intime'].strftime('%Y%m%d')
        newTime = datetime.now().strftime('%Y%m%d')
        if newTime == oldTime:
            oldID = isIn.values()[0]['id']
            UpdataIn = ebusiness_flow.get(id=oldID)
            UpdataIn.num = UpdataIn.num + 1
            UpdataIn.endurl = inurl
            UpdataIn.save()
        else:
            indata.update({"inurl":inurl,"endurl":inurl})
            CreateIn = ebusiness_flow.create(**indata)
            CreateIn.save()
    return ''
def make_activity_log_entry(user, correct, request):
    try:
        ip_address = request.remote_addr
        user_agent = parse(request.user_agent.string)
        request_json = request.json
        if user_agent.is_mobile:
            device_type = 0
        elif user_agent.is_tablet:
            device_type = 1
        elif user_agent.is_pc:
            device_type = 2
        else:
            device_type = -1

        new_activity_log_entry = ActivityLogEntry(
            correct=correct,
            current_word_index=user.current_word_index,
            datetime=datetime.datetime.now(),
            datetime_quest_started=user.datetime_quest_started,
            datetime_question_started=user.datetime_question_started,
            device_family=user_agent.device.family,
            device_model=user_agent.device.model,
            device_type=device_type,
            ip_address=ip_address,
            is_daily=user.is_on_daily,
            is_timed=user.is_timed,
            latitude=request_json['latitude'],
            longitude=request_json['longitude'],
            number_of_questions=user.number_of_questions,
            user_id=user.user_id
        )
        new_activity_log_entry.save()
    except Exception as ex:
        print(ex)
        print("failed too make activity log entry.")
def make_quest_log_entry(user, request):
    try:
        ip_address = request.remote_addr
        user_agent = parse(request.user_agent.string)
        request_json = request.json
        if user_agent.is_mobile:
            device_type = 0
        elif user_agent.is_tablet:
            device_type = 1
        elif user_agent.is_pc:
            device_type = 2
        else:
            device_type = -1

        new_quest_log_entry = QuestLogEntry(
            chapter_index=user.chapter_index_id,
            cumulative=user.cumulative,
            datetime_quest_completed=datetime.datetime.now(),
            datetime_quest_started=user.datetime_quest_started,
            device_family=user_agent.device.family,
            device_model=user_agent.device.model,
            device_type=device_type,
            ip_address=ip_address,
            is_daily=user.is_on_daily,
            is_timed=user.is_timed,
            latitude=request_json['latitude'],
            longitude=request_json['longitude'],
            number_of_questions=user.number_of_questions,
            user_id=user.user_id,
            number_correct=user.number_correct
        )
        new_quest_log_entry.save()
    except Exception as ex:
        print(ex)
        print("Failed to make quest log entry.")
Exemple #26
0
def index(groupKey=None):
	if groupKey=='favicon.ico': abort(404)

	#세션리셋
	session.clear()

	#ip = socket.gethostbyname(socket.gethostname())
	ip = request.remote_addr

	#https://pypi.python.org/pypi/user-agents/ #pip install user-agents
	from user_agents import parse
	user_agent = parse(request.user_agent.string)
	if user_agent.is_mobile==True : platform = "mobile"
	else : platform = "web"

	accessLog = AccessLog(ip, platform)
	db_session.add(accessLog)
	try:
		db_session.commit()
	except exc.IntegrityError as e:
		db_session.rollback()

	debug = request.args.get('debug', '')
	email = request.args.get('email', '')
	
	resp = make_response( render_template('index.html', groupKey=groupKey) )
	if email!='': 
		resp.set_cookie('email', email)
	if debug!='': 
		resp.set_cookie('debug', debug)
	if groupKey!=None:
		resp.set_cookie('groupKey', groupKey)

	return resp
Exemple #27
0
def redirect(request, shorturl):
    #send a 500 error shortURL doesn't exist
    try:
        urlObject = ShortUrl.objects.get(shortid=shorturl)
    except:
        return HttpResponseServerError()

    #check UA of request
    #user_agents is awesome! (pip install pyyaml ua-parser user-agents)
    userAgent = parse(request.META.get('HTTP_USER_AGENT', ''))

    if urlObject:
        if userAgent.is_mobile:
            urlObject.mobileRedirectCount += 1
            urlObject.save()
            return HttpResponseRedirect(urlObject.fullMobileUrl)

        elif userAgent.is_tablet:
            urlObject.tabletRedirectCount += 1
            urlObject.save()
            return HttpResponseRedirect(urlObject.fullTabletUrl)

        elif userAgent.is_pc:
            urlObject.desktopRedirectCount += 1
            urlObject.save()
            return HttpResponseRedirect(urlObject.fullDesktopUrl)
Exemple #28
0
def index():
	ua = request.headers.get('User-Agent')
	user_agent = parse(ua)
	user = g.user.nickname
	image = g.user.image
	user_id = g.user.id
	return render_template('index.html',title='home',user=user,image=image,id=user_id)
def verify_browser(request, min_versions=None):
    """ accepts a request and tags it with browser support info

        if `min_versions` is passed in, it skould be a dictionary of
            'browser': version
    """
    if not min_versions:
        min_versions = settings.MIN_BROWSER_VERSIONS

    request.user_agent = parse(request.META.get('HTTP_USER_AGENT', ''))
    request.browser_unknown = True
    request.browser_unsupported = False

    for family, version in min_versions.items():
        if request.user_agent.browser.family == family:
            request.browser_unknown = False

            bversion = request.user_agent.browser.version
            if isinstance(bversion, (list, tuple)):
                if len(bversion) > 0:
                    bversion = bversion[0]
                else:
                    bversion = 0
            else:
                bversion = bversion

            if bversion < version:
                request.browser_unsupported = True

            break
Exemple #30
0
def publish(request, dispatcher):
  "initialise req from the Twisted reauest"
  # First we need to transform the request into our own format
  # our format is {key:value, .., cookies:{key:value}, request:request}
  req = Req()
  # retain multiple value args as a list
  req.update(dict([(i[0], len(i[1]) > 1 and i[1] or i[1][0])
                   for i in request.__dict__['args'].items()]))
  req.cookies = request.__dict__['received_cookies'] or {}

  # if we have the relevant modules then add user agent information
  if user_agents:
    ua_string = request.getHeader('user-agent')
    req.user_agent = user_agents.parse(ua_string or '')
  else:
    req.user_agent = None
  req.request = request

  # set up Session cache
  session = request.getSession()
  req.cache = ISessionCache(session)

  # get the domain and port
  req._v_domain = req.get_host().split(":")[0]  # excludes port
  # Now process the request
  path = request.__dict__['path']
  try:
    result = dispatcher.request(path, req)
  except:
    raise
    sys.stderr.write(DATE().time())
    sys.stderr.write(path+'\n')
    result="request error..."
  return result
Exemple #31
0
 def get_browser(ua: str):
     return parse(ua).browser.family
Exemple #32
0
 def get_phone(ua: str):
     s = parse(ua).is_bot
     return parse(ua).device.family
Exemple #33
0
def is_mobile(user_agent):
    user_agent = parse(user_agent)
    return user_agent.is_mobile
# !/usr/bin/env python
# -*- coding: utf-8 -*-

from user_agents import parse

ua_string = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.110 Safari/537.36'
user_agent = parse(ua_string)
bw = user_agent.browser.family  #浏览器
s = user_agent.os.family  #操作系统
juge_pc = user_agent.is_pc  #判断是不是桌面系统
phone = user_agent.device.family
print(bw, s, phone)
Exemple #35
0
def ua_parse(ua):
    user_agent = parse(ua)
    return str(user_agent).split(' / ')
Exemple #36
0
def user_agent_info():
    return str(parse(request.headers.get('User-Agent')))
revgc.search([df.latitude[0], df.longitude[0]])
df['dow'] = df['created'].apply(lambda x: pd.to_datetime(x).weekday())
df['is_weekend'] = df['created'].apply(lambda x: 1 if pd.to_datetime(x).weekday() in (5, 6) else 0)
def make_harmonic_features(value, period=24):
    value *= 2 * np.pi / period 
    return np.cos(value), np.sin(value)
from scipy.spatial import distance
euclidean(make_harmonic_features(23), make_harmonic_features(1)) 
euclidean(make_harmonic_features(9), make_harmonic_features(11)) 
euclidean(make_harmonic_features(9), make_harmonic_features(21))
### pip install -q pyyaml ua-parser user-agents
import user_agents

ua = 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/56.0.2924.76 Chrome/56.0.2924.76 Safari/537.36'
ua = user_agents.parse(ua)

print('Is a bot? ', ua.is_bot)
print('Is mobile? ', ua.is_mobile)
print('Is PC? ',ua.is_pc)
print('OS Family: ',ua.os.family)
print('OS Version: ',ua.os.version)
print('Browser Family: ',ua.browser.family)
print('Browser Version: ',ua.browser.version)
from sklearn.preprocessing import StandardScaler
from scipy.stats import beta
from scipy.stats import shapiro
import numpy as np

data = beta(1, 10).rvs(1000).reshape(-1, 1)
shapiro(data)
Exemple #38
0
def parse_useragent(x):
    ua_string = x
    user_agent = parse(ua_string)
    return user_agent
    def __init__(self, request, placements, publisher, **kwargs):
        """
        Initialize an ad decision based on the request data.

        :param request: the HttpRequest object with geo data attached from GeolocationMiddleware
        :param placements: possible positions for the ad to go
        :param kwargs: Any additional possible arguments for the backend
        """
        self.request = request
        self.user_agent = parse(get_client_user_agent(request))
        self.placements = placements
        self.publisher = publisher

        self.ad_types = [p["ad_type"] for p in self.placements]

        self.country_code = request.geo.country_code
        self.region_code = request.geo.region_code
        self.metro_code = request.geo.metro_code

        # Optional parameters
        self.keywords = kwargs.get("keywords", []) or []
        requested_campaign_types = kwargs.get("campaign_types", []) or []
        if not requested_campaign_types:
            requested_campaign_types = ALL_CAMPAIGN_TYPES

        # Add default keywords from publisher
        if self.publisher.keywords:
            log.debug(
                "Adding default keywords: publisher=%s keywords=%s",
                self.publisher.slug,
                self.publisher.keywords,
            )
            merged_keywords = set(self.keywords) | set(self.publisher.keywords)
            self.keywords = list(merged_keywords)

        # Publishers can request certain campaign types
        # But only if those types are allowed by database settings
        self.campaign_types = []
        if (
            self.publisher.allow_paid_campaigns
            and PAID_CAMPAIGN in requested_campaign_types
        ):
            self.campaign_types.append(PAID_CAMPAIGN)
        if (
            self.publisher.allow_affiliate_campaigns
            and AFFILIATE_CAMPAIGN in requested_campaign_types
        ):
            self.campaign_types.append(AFFILIATE_CAMPAIGN)
        if (
            self.publisher.allow_community_campaigns
            and COMMUNITY_CAMPAIGN in requested_campaign_types
        ):
            self.campaign_types.append(COMMUNITY_CAMPAIGN)
        if (
            self.publisher.allow_house_campaigns
            and HOUSE_CAMPAIGN in requested_campaign_types
        ):
            self.campaign_types.append(HOUSE_CAMPAIGN)

        # When set, only return a specific ad or ads from a campaign
        self.ad_slug = kwargs.get("ad_slug")
        self.campaign_slug = kwargs.get("campaign_slug")
Exemple #40
0
    def form_valid(self, form):
        obj = form.save(commit=False)
        if self.request.user.is_authenticated():
            obj.user = self.request.user
        domain, created = Domain.objects.get_or_create(
            name=obj.domain_name.replace("www.", ""),
            defaults={'url': "http://" + obj.domain_name.replace("www.", "")})
        obj.domain = domain
        if created and self.request.user.is_authenticated():
            p = Points.objects.create(user=self.request.user,
                                      domain=domain,
                                      score=1)
            messages.success(self.request, 'Domain added! + 1')

        if self.request.POST.get('screenshot-hash'):
            reopen = default_storage.open(
                'uploads\/' + self.request.POST.get('screenshot-hash') +
                '.png', 'rb')
            django_file = File(reopen)
            obj.screenshot.save(self.request.POST.get('screenshot-hash') +
                                '.png',
                                django_file,
                                save=True)
        obj.user_agent = self.request.META.get('HTTP_USER_AGENT')
        obj.save()

        if self.request.user.is_authenticated():
            total_issues = Issue.objects.filter(user=self.request.user).count()
            user_prof = UserProfile.objects.get(user=self.request.user)
            if total_issues <= 10:
                user_prof.title = 1
            elif total_issues <= 50:
                user_prof.title = 2
            elif total_issues <= 200:
                user_prof.title = 3
            else:
                user_prof.title = 4

            user_prof.save()

        if domain.github and os.environ.get("GITHUB_PASSWORD"):
            from giturlparse import parse
            from requests.auth import HTTPBasicAuth
            import json
            import requests
            github_url = domain.github.replace("https", "git").replace(
                "http", "git") + ".git"
            p = parse(github_url)

            url = 'https://api.github.com/repos/%s/%s/issues' % (p.owner,
                                                                 p.repo)

            auth = HTTPBasicAuth(os.environ.get("GITHUB_USERNAME"),
                                 os.environ.get("GITHUB_PASSWORD"))
            issue = {
                'title':
                obj.description,
                'body':
                "![0](" + obj.screenshot.url + ") http://bugheist.com/issue/" +
                str(obj.id),
                'labels': ['bug', 'bugheist']
            }
            r = requests.post(url, json.dumps(issue), auth=auth)
            response = r.json()
            obj.github_url = response['html_url']
            obj.save()

        redirect_url = '/report'
        # redirect users to login
        if not self.request.user.is_authenticated():
            # we store the issue id on the user session to assign it as soon as he login/register
            self.request.session['issue'] = obj.id
            self.request.session['created'] = created
            self.request.session['domain'] = domain.id
            login_url = reverse('account_login')
            return HttpResponseRedirect(u'{}?next={}'.format(
                login_url, redirect_url))

        # assign issue
        self.process_issue(self.request.user, obj, created, domain)
        return HttpResponseRedirect(self.request.META.get('HTTP_REFERER'))
Exemple #41
0
def lambda_handler(event, context):

    error_count = 0

    ########################
    #### Downloading the new RAW events from S3

    bucket_name = event['Records'][0]['s3']['bucket']['name']
    file_key = event['Records'][0]['s3']['object']['key']
    logger.info('Reading {} from {}'.format(file_key, bucket_name))
    s3.download_file(bucket_name, file_key, '/tmp/file.zip')

    ########################
    #### Getting geolocation DB file
    geoippath = '/tmp/GeoLite2-City.mmdb'
    try:
        s3.download_file(bucket_name, 'GeoLite2-City.mmdb',
                         '/tmp/GeoLite2-City.mmdb')
    except:
        url = "https://geolite.maxmind.com/download/geoip/database/GeoLite2-City.tar.gz"  # This link no longer works. TODO: Check alternatives
        response = get(url)
        with open('/tmp/GeoLite2-City.tar.gz', 'wb') as file:
            file.write(response.content)
        geofilename = re.compile("GeoLite2-City.mmdb")
        tar = tarfile.open("/tmp/GeoLite2-City.tar.gz")
        for member in tar.getmembers():
            if geofilename.search(member.name):
                geoippath = '/tmp/' + member.name
                tar.extract(member, path='/tmp/')
        tar.close()
        s3.upload_file(geoippath, bucket_name, 'GeoLite2-City.mmdb')

    ########################
    #### Getting column names for all tables in atomic schema

    conn = psycopg2.connect(host=os.environ['POSTGRES_HOST'],
                            database=os.environ['POSTGRES_DATABASE'],
                            user=os.environ['POSTGRES_USER'],
                            password=os.environ['POSTGRES_PASSWORD'])
    sql = "SELECT t.table_schema, t.table_name, c.column_name FROM information_schema.tables t JOIN INFORMATION_SCHEMA.COLUMNS c ON c.table_name = t.table_name WHERE t.table_schema='atomic' ORDER BY t.table_name, c.ordinal_position ;"

    cur = conn.cursor()
    cur.execute(sql)
    table_list = cur.fetchall()

    cur.close()
    conn.close()

    table_columns = {}
    for e in table_list:
        if not e[1] in table_columns.keys():
            table_columns[e[1]] = []
        table_columns[e[1]].append(e[2])

    ########################
    #### Loading raw events content

    archgz = gzip.open('/tmp/file.zip')
    file_content = archgz.read()
    lines = file_content.split(b'\n')

    ########################
    #### Processing entries and storing them

    header = re.search('#Fields: (.*)', lines[1].decode("utf-8"))
    header = header.group(1).split()

    tprint(file_key, "Processing and enriching raw entries")
    try:
        datvalues = ""
        all_events = []
        geoipdbreader = geoip2.database.Reader(geoippath)
        i = 0
        for l in lines[2:-1]:

            r = re.compile(r'([^\t]*)\t*')
            l = r.findall(l.decode("utf-8"))[:-1]
            collector_tstamp = l[0] + ' ' + l[1]
            refersplitter = re.compile(r'([^/]*)/*')
            referer = refersplitter.findall(l[9])[:-1]
            refr_urlscheme = referer[0][:-1]
            try:
                refr_urlhost = referer[1]
            except:
                refr_urlhost = '-'
            try:
                refr_urlpath = '/' + '/'.join(referer[2:])
            except:
                refr_urlpath = '-'
            querysplitter = re.compile(r'([^\?]*)\?*')
            qryurl = querysplitter.findall(referer[-1])[:-1]
            try:
                refr_urlquery = qryurl[1]
            except IndexError:
                refr_urlquery = '-'
            userag = l[10].replace("%2520", " ")
            useragent = userag
            userag = parse(userag)
            br_name = userag.browser.family + ' ' + userag.browser.version_string
            br_family = userag.browser.family
            br_version = userag.browser.version
            os_family = userag.os.family
            dvce_type = userag.device.family
            dvce_ismobile = userag.is_mobile

            user_ipaddress = l[4]

            #### We determine geolocation info based on user IP.
            #### Set to NULL if no info available on DB
            try:
                geoipdbresult = geoipdbreader.city(l[4])
                geo_country = geoipdbresult.registered_country.iso_code
                if geo_country is None:
                    geo_country = ''
                try:
                    geo_city = geoipdbresult.city.names['en']
                except:
                    geo_city = '-'
                geo_zipcode = geoipdbresult.postal.code
                geo_latitude = geoipdbresult.location.latitude
                geo_longitude = geoipdbresult.location.longitude
                try:
                    geo_region_name = geoipdbresult.subdivisions[0].names['en']
                except:
                    geo_region_name = '-'
                geo_timezone = geoipdbresult.location.time_zone
            except:
                geo_country = ''
                geo_city = ''
                geo_zipcode = ''
                geo_latitude = ''
                geo_longitude = ''
                geo_region_name = ''
                geo_timezone = ''

            # In the rare case latitudes and longitudes are set to None, we reset them to '' (later NULL) to avoid insertion errors
            if geo_latitude is None:
                geo_latitude = ''
            if geo_longitude is None:
                geo_longitude = ''

            urisplt = re.compile(r'([^&]*)&*')
            urispltnodes = urisplt.findall(l[11])[:-1]

            user_ipaddress = hashlib.sha224(user_ipaddress.encode(
                'utf-8')).hexdigest()  # We store the IP as a hash for privacy
            spvalues = {
                'app_id': '-',
                'platform': '-',
                'collector_tstamp': collector_tstamp,
                'dvce_created_tstamp': '-',
                'event': '-',
                'event_id': '-',
                'txn_id': '-',
                'name_tracker': '-',
                'v_tracker': '-',
                'user_id': '-',
                'user_ipaddress': user_ipaddress,
                'user_fingerprint': '-',
                'domain_userid': '-',
                'domain_sessionidx': '-',
                'network_userid': '-',
                'geo_country': geo_country,
                'geo_city': geo_city,
                'geo_zipcode': geo_zipcode,
                'geo_latitude': geo_latitude,
                'geo_longitude': geo_longitude,
                'geo_region_name': geo_region_name,
                'page_url': '-',
                'page_title': '-',
                'page_referrer': '-',
                'refr_urlscheme': refr_urlscheme,
                'refr_urlhost': refr_urlhost,
                'refr_urlpath': refr_urlpath,
                'refr_urlquery': refr_urlquery,
                'se_category': '-',
                'se_action': '-',
                'se_label': '-',
                'se_property': '-',
                'se_value': '-',
                'unstruct_event': '-',
                'tr_orderid': '-',
                'tr_affiliation': '-',
                'tr_total': '-',
                'tr_tax': '-',
                'tr_shipping': '-',
                'tr_city': '-',
                'tr_state': '-',
                'tr_country': '-',
                'ti_orderid': '-',
                'ti_sku': '-',
                'ti_name': '-',
                'ti_category': '-',
                'ti_price': '-',
                'ti_quantity': '-',
                'pp_xoffset_min': '-',
                'pp_xoffset_max': '-',
                'pp_yoffset_min': '-',
                'pp_yoffset_max': '-',
                'useragent': unquote(unquote(useragent)),
                'br_name': br_name,
                'br_family': br_family,
                'br_version': br_version,
                'br_lang': '-',
                'br_features_pdf': '-',
                'br_features_flash': '-',
                'br_features_java': '-',
                'br_features_director': '-',
                'br_features_quicktime': '-',
                'br_features_realplayer': '-',
                'br_features_windowsmedia': '-',
                'br_features_gears': '-',
                'br_features_silverlight': '-',
                'br_cookies': '-',
                'br_colordepth': '-',
                'br_viewwidth': '-',
                'br_viewheight': '-',
                'os_family': os_family,
                'os_timezone': '-',
                'dvce_type': dvce_type,
                'dvce_ismobile': dvce_ismobile,
                'dvce_screenwidth': '-',
                'dvce_screenheight': '-',
                'doc_charset': '-',
                'doc_width': '-',
                'doc_height': '-',
                'tr_currency': '-',
                'ti_currency': '-',
                'geo_timezone': geo_timezone,
                'dvce_sent_tstamp': '-',
                'domain_sessionid': '-',
                'event_vendor': '-'
            }

            if len(urispltnodes[0]) > 3:
                for spparams in urispltnodes:
                    spsplitter = re.compile(r'([^=]*)=*')
                    sp = spsplitter.findall(spparams)[:-1]
                    if sp[0] == 'stm':
                        spvalues['dvce_sent_tstamp'] = sp[1]
                    if sp[0] == 'e':
                        spvalues['event'] = sp[1]
                    if sp[0] == 'url':
                        spvalues['page_url'] = unquote(unquote(sp[1]))
                    if sp[0] == 'page':
                        spvalues['page_title'] = sp[1]
                    if sp[0] == 'pp_mix':
                        spvalues['pp_xoffset_min'] = sp[1]
                    if sp[0] == 'pp_max':
                        spvalues['pp_xoffset_max'] = sp[1]
                    if sp[0] == 'pp_miy':
                        spvalues['pp_yoffset_min'] = sp[1]
                    if sp[0] == 'pp_may':
                        spvalues['pp_yoffset_max'] = sp[1]
                    if sp[0] == 'tv':
                        spvalues['v_tracker'] = sp[1]
                    if sp[0] == 'tna':
                        spvalues['name_tracker'] = sp[1]
                    if sp[0] == 'aid':
                        spvalues['app_id'] = sp[1]
                    if sp[0] == 'p':
                        spvalues['platform'] = sp[1]
                    if sp[0] == 'tz':
                        spvalues['os_timezone'] = unquote(unquote(sp[1]))
                    if sp[0] == 'lang':
                        spvalues['br_lang'] = sp[1]
                    if sp[0] == 'cs':
                        spvalues['doc_charset'] = sp[1]
                    if sp[0] == 'f_pdf':
                        spvalues['br_features_pdf'] = sp[1]
                    if sp[0] == 'f_qt':
                        spvalues['br_features_quicktime'] = sp[1]
                    if sp[0] == 'f_realp':
                        spvalues['br_features_realplayer'] = sp[1]
                    if sp[0] == 'f_wma':
                        spvalues['br_features_windowsmedia'] = sp[1]
                    if sp[0] == 'f_dir':
                        spvalues['br_features_director'] = sp[1]
                    if sp[0] == 'f_fla':
                        spvalues['br_features_flash'] = sp[1]
                    if sp[0] == 'f_java':
                        spvalues['br_features_java'] = sp[1]
                    if sp[0] == 'f_gears':
                        spvalues['br_features_gears'] = sp[1]
                    if sp[0] == 'f_ag':
                        spvalues['br_features_silverlight'] = sp[1]
                    if sp[0] == 'res':
                        ressplitter = re.compile(r'([^x]*)x*')
                        res = ressplitter.findall(sp[1])[:-1]
                        spvalues['dvce_screenheight'] = res[1]
                        spvalues['dvce_screenwidth'] = res[0]
                        continue
                    if sp[0] == 'cd':
                        spvalues['br_colordepth'] = sp[1]
                    if sp[0] == 'cookie':
                        spvalues['br_cookies'] = sp[1]
                    if sp[0] == 'eid':
                        spvalues['event_id'] = sp[1]
                    if sp[0] == 'dtm':
                        spvalues['dvce_created_tstamp'] = sp[1]
                    if sp[0] == 'vp':
                        ressplitter = re.compile(r'([^x]*)x*')
                        brdim = ressplitter.findall(sp[1])[:-1]
                        spvalues['br_viewwidth'] = brdim[1]
                        spvalues['br_viewheight'] = brdim[0]
                        continue
                    if sp[0] == 'ds':
                        ressplitter = re.compile(r'([^x]*)x*')
                        docdim = ressplitter.findall(sp[1])[:-1]
                        spvalues['doc_width'] = docdim[1]
                        spvalues['doc_height'] = docdim[0]
                        continue
                    if sp[0] == 'vid':
                        spvalues['domain_sessionidx'] = sp[1]
                    if sp[0] == 'sid':
                        spvalues['domain_sessionid'] = sp[1]
                    if sp[0] == 'duid':
                        spvalues['domain_userid'] = sp[1]
                    if sp[0] == 'fp':
                        spvalues['user_fingerprint'] = sp[1]
                    if sp[0] == 'ue_px':
                        spvalues['unstruct_event'] = sp[1]
                    if sp[0] == 'refr':
                        spvalues['page_referrer'] = unquote(unquote(sp[1]))
                    if sp[0] == 'tid':
                        spvalues['txn_id'] = sp[1]
                    if sp[0] == 'uid':
                        spvalues['user_id'] = sp[1]
                    if (sp[0] == 'nuid') or (sp[0] == 'tnuid'):
                        spvalues['network_userid'] = sp[1]
                    if sp[0] == 'se_ca':
                        spvalues['se_category'] = sp[1]
                    if sp[0] == 'se_ac':
                        spvalues['se_action'] = sp[1]
                    if sp[0] == 'se_la':
                        spvalues['se_label'] = sp[1]
                    if sp[0] == 'se_pr':
                        spvalues['se_property'] = sp[1]
                    if sp[0] == 'se_va':
                        spvalues['se_value'] = sp[1]
                    if sp[0] == 'tr_id':
                        spvalues['tr_orderid'] = sp[1]
                    if sp[0] == 'tr_af':
                        spvalues['tr_affiliation'] = sp[1]
                    if sp[0] == 'tr_tt':
                        spvalues['tr_total'] = sp[1]
                    if sp[0] == 'tr_tx':
                        spvalues['tr_tax'] = sp[1]
                    if sp[0] == 'tr_sh':
                        spvalues['tr_shipping'] = sp[1]
                    if sp[0] == 'tr_ci':
                        spvalues['tr_city'] = sp[1]
                    if sp[0] == 'tr_st':
                        spvalues['tr_state'] = sp[1]
                    if sp[0] == 'tr_co':
                        spvalues['tr_country'] = sp[1]
                    if sp[0] == 'ti_id':
                        spvalues['ti_orderid'] = sp[1]
                    if sp[0] == 'ti_sk':
                        spvalues['ti_sku'] = sp[1]
                    if sp[0] == 'ti_na':
                        spvalues['ti_name'] = sp[1]
                    if sp[0] == 'ti_ca':
                        spvalues['ti_category'] = sp[1]
                    if sp[0] == 'ti_pr':
                        spvalues['ti_price'] = sp[1]
                    if sp[0] == 'ti_qu':
                        spvalues['ti_quantity'] = sp[1]
                    if sp[0] == 'tr_cu':
                        spvalues['tr_currency'] = sp[1]
                    if sp[0] == 'ti_cu':
                        spvalues['ti_currency'] = sp[1]
                    if sp[0] == 'evn':
                        spvalues['event_vendor'] = sp[1]
                    if sp[0] == 'ue_pr':
                        spvalues['unstruct_event_unencoded'] = sp[1]
                    if sp[0] == 'cx':
                        spvalues['context'] = sp[1]
                #     new_line = ''
                #     for key,val in spvalues.items():
                #         new_line += str(val) + '\t'
                # datvalues += new_line + '\n'
                all_events.append(spvalues)

                i += 1

    except Exception as e:
        tprint(file_key, "Error: " + str(e))
        error_count += 1

    tprint(file_key, "Processed " + str(i) + " entries")

    ########################
    #### Sorting events by destination and storing corresponding CSV files

    j = 0

    csvs = {}  ## Dictionary to store all CSVs

    tprint(file_key, "Sorting events per destination and storing to CSV")

    for spvalues in all_events:
        try:
            unstruct_event_bool = False
            context_present = False
            custom_schema_str = ''
            j += 1
            for key, val in copy.deepcopy(spvalues).items():
                if val == '-' or val == ():
                    del spvalues[key]

            if 'dvce_created_tstamp' in spvalues:
                try:
                    spvalues[
                        'dvce_created_tstamp'] = datetime.datetime.fromtimestamp(
                            int(spvalues['dvce_created_tstamp']) /
                            1000).strftime('%Y-%m-%d %H:%M:%S')
                except:
                    pass
            if 'dvce_sent_tstamp' in spvalues:
                try:
                    spvalues[
                        'dvce_sent_tstamp'] = datetime.datetime.fromtimestamp(
                            int(spvalues['dvce_sent_tstamp']) /
                            1000).strftime('%Y-%m-%d %H:%M:%S')
                except:
                    pass

            if 'unstruct_event' in spvalues:
                unstruct_event_bool = True
                # decode from base64 and parse into dictionary
                params = base64.urlsafe_b64decode(spvalues['unstruct_event'] +
                                                  '===').decode("utf-8")
                unstruct_event = json.loads(params)
                del spvalues['unstruct_event']

            elif 'unstruct_event_unencoded' in spvalues:
                # parse into dictionary
                unstruct_event_bool = True
                params = urllib.parse.unquote(
                    urllib.parse.unquote(spvalues['unstruct_event_unencoded']))
                unstruct_event = json.loads(params)
                del spvalues['unstruct_event_unencoded']

            # assign context to a variable
            if 'context' in spvalues:
                context_present = True
                # decode from base64 and parse into dictionary
                context_decoded = base64.urlsafe_b64decode(
                    spvalues['context'] + '===').decode("utf-8")
                context = json.loads(context_decoded)
                del spvalues['context']

            ## In any event we store an atomic.events entry
            columns_names = list(spvalues.keys())
            columns_names_str = ', '.join(columns_names)
            binds_str = ', '.join('%s' for _ in range(len(columns_names)))
            values = [spvalues[column_name] for column_name in columns_names]

            ### Generating CSV for the atomic event
            event_new_line = ''
            for column in table_columns['events']:
                if column in spvalues.keys():
                    event_new_line += str(spvalues[column]).replace(
                        "'", r"\'") + '\t'
                else:
                    event_new_line += '\t'
            event_new_line = re.sub('\t$', '\n', event_new_line)

            if "events" not in csvs.keys():
                csvs["events"] = ""
            csvs["events"] += event_new_line

            if unstruct_event_bool:
                unstruct_event['data']['data']['root_id'] = spvalues[
                    'event_id']

                # define the corresponding schema name
                if re.search(r'achievement_gui_interaction',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_achievement_gui_interaction_1'
                if re.search(r'achievement_unlocked',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_achievement_unlocked_1'
                if re.search(r'email_click', unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_email_click_1'
                if re.search(r'email_opened',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_email_opened_1'
                if re.search(r'email_sent', unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_email_sent_1'
                if re.search(r'landing_from_email',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_landing_from_email_1'
                if re.search(r'user_creation',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_user_creation_1'
                if re.search(r'blockchain_account_creation',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_blockchain_account_creation_1'
                if re.search(r'user_new_identity',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_user_new_identity_1'
                if re.search(r'link_click', unstruct_event['data']['schema']):
                    custom_schema_str = 'com_snowplowanalytics_snowplow_link_click_1'
                    # convert camel snake fields to snake case
                    for key in copy.deepcopy(
                            unstruct_event['data']['data']).keys():
                        newKey = camel_to_snake(key)
                        unstruct_event['data']['data'][
                            newKey] = unstruct_event['data']['data'].pop(key)
                if re.search(r'stream_watch',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_stream_watch_1'
                if re.search(r'new_creator_account',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_new_creator_account_1'
                if re.search(r'stream_session_started',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_stream_session_started_1'
                if re.search(r'stream_session_ended',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_stream_session_ended_1'
                if re.search(r'challenge_sent_to_live_channel',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_challenge_sent_to_live_channel_1'
                if re.search(r'cvp_challenge_started',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_cvp_challenge_started_1'
                if re.search(r'cvp_challenge_joined_by_player',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_cvp_challenge_joined_by_player_1'
                if re.search(r'cvp_challenge_resolved',
                             unstruct_event['data']['schema']):
                    custom_schema_str = 'io_azarus_cvp_challenge_resolved_1'

                if len(custom_schema_str) > 0:
                    unstruct_event_data = flatten(
                        unstruct_event['data']['data'])
                    columns_names_custom = list(unstruct_event_data.keys())
                    columns_names_custom_str = ', '.join(
                        '"{0}"'.format(c) for c in columns_names_custom)
                    binds_custom_str = ', '.join(
                        '%s' for _ in range(len(columns_names_custom)))
                    values_custom = [
                        unstruct_event_data[column_name_custom]
                        for column_name_custom in columns_names_custom
                    ]

                    ##### Generating a CSV file for the corresponding custom event
                    custom_event_new_line = ''
                    if custom_schema_str in table_columns.keys():
                        for column in table_columns[custom_schema_str]:
                            if column in unstruct_event_data.keys():
                                custom_event_new_line += str(
                                    unstruct_event_data[column]).replace(
                                        "'", r"\'") + '\t'
                            else:
                                custom_event_new_line += '\t'
                        custom_event_new_line = re.sub('\t$', '\n',
                                                       custom_event_new_line)

                        if custom_schema_str not in csvs.keys():
                            csvs[custom_schema_str] = ""
                        csvs[custom_schema_str] += custom_event_new_line
                    else:
                        tprint(
                            file_key, "ERROR " + str(custom_schema_str) +
                            " not in table columns")

            # process context and prepare sql if custom context is found
            custom_cx_sqls = []
            if context_present:

                # iterate over all contexts and check for custom ones
                for cx in context['data']:
                    custom_cx_schema_str = ''
                    # define the corresponding custom context schema name
                    if re.search(r'twitch_user_context', cx['schema']):
                        custom_cx_schema_str = 'io_azarus_twitch_user_context_1'

                    if len(custom_cx_schema_str) > 0:
                        cx['data']['root_id'] = spvalues['event_id']
                        custom_cx_data = flatten(cx['data'])
                        columns_names_custom_cx = list(custom_cx_data.keys())
                        columns_names_custom_cx_str = ', '.join(
                            '"{0}"'.format(c) for c in columns_names_custom_cx)
                        binds_cx_custom_str = ', '.join(
                            '%s' for _ in range(len(columns_names_custom_cx)))
                        values_custom_cx = [
                            custom_cx_data[column_name_custom_cx] for
                            column_name_custom_cx in columns_names_custom_cx
                        ]

                        ### Generating CSV for the corresponding context events
                        context_event_new_line = ''
                        if custom_cx_schema_str in table_columns.keys():
                            for column in table_columns[custom_cx_schema_str]:
                                if column in custom_cx_data.keys():
                                    context_event_new_line += str(
                                        custom_cx_data[column]).replace(
                                            "'", r"\'") + '\t'
                                else:
                                    context_event_new_line += '\t'
                            context_event_new_line = re.sub(
                                '\t$', '\n', context_event_new_line)

                            if custom_cx_schema_str not in csvs.keys():
                                csvs[custom_cx_schema_str] = ""
                            csvs[
                                custom_cx_schema_str] += context_event_new_line
                        else:
                            tprint(
                                file_key,
                                "ERROR " + str(custom_cx_schema_str) +
                                " not in table columns")
        except Exception as e:
            tprint(
                file_key,
                "EventError. One event was not processed due to the following error: "
                + str(e))

    ########################
    #### Sorting events by destination and storing corresponding CSV files

    conn = psycopg2.connect(host=os.environ['POSTGRES_HOST'],
                            database=os.environ['POSTGRES_DATABASE'],
                            user=os.environ['POSTGRES_USER'],
                            password=os.environ['POSTGRES_PASSWORD'])

    ########################
    #### Inserting CSV contents to corresponding tables
    tprint(file_key, "Inserting events into corresponding table")
    insertion_error = False
    if len(csvs.keys()):
        for k in csvs.keys():
            tprint(file_key, "Events " + str(k))
            inserts_file_name = "/tmp/" + k + "_data_" + str(
                hashlib.sha224(csvs[k].encode('utf-8')).hexdigest()) + '.tsv'

            try:
                f = open(inserts_file_name, "w")
                f.write(csvs[k])
                f.close()

                ### INSERTING
                cur = conn.cursor()
                cur.copy_from(open(inserts_file_name, 'r'),
                              "atomic." + str(k),
                              null='',
                              sep='\t')

            except Exception as e:
                tprint(
                    file_key, "Insertion failed for table " + str(k) +
                    ". Error : " + str(e))
                error_count += 1
                insertion_error = True

        if not insertion_error:
            conn.commit()
        else:
            conn.rollback()

        cur.close()
        conn.close()

    ########################
    #### Dumping enriched CSV file to S3
    # tprint(file_key, "Writing enriched data to S3")
    # try:
    #     if len(urispltnodes[0]) > 5:
    #         gz_body = BytesIO()
    #         gz = gzip.GzipFile(None, 'wb', 9, gz_body)
    #         gz.write(datvalues.encode('utf-8'))
    #         gz.close()
    #         s3.put_object(Bucket=bucket_name, Key=file_key.replace("RAW", "Converted"),  ContentType='text/plain',  ContentEncoding='gzip',  Body=gz_body.getvalue())
    # except Exception as e:
    #     tprint(file_key, "Error: " + str(e))
    #     error_count += 1

    ########################
    #### Writing log file to keep track of processed files

    if not insertion_error:
        tprint(file_key, "Writing log file to S3")
        try:
            if len(urispltnodes[0]) > 5:
                gz_body = BytesIO()
                gz = gzip.GzipFile(None, 'wb', 9, gz_body)
                gz.write("".encode('utf-8'))
                gz.close()
                s3.put_object(Bucket=bucket_name,
                              Key=file_key.replace("RAW", "Processed"),
                              ContentType='text/plain',
                              ContentEncoding='gzip',
                              Body=gz_body.getvalue())
        except Exception as e:
            tprint(file_key, "Error: " + str(e))
            error_count += 1

        if error_count:
            tprint(file_key, "NbErrors: " + str(error_count))
Exemple #42
0
def parse_user_agent():
    engine = snowflake_engine_factory(os.environ, "TRANSFORMER", "util")
    """ This function searches for and parses all available user agents received via telemetry data that are
    not currently in the analytics.mattermost.user_agent_registry table."""

    # CREATE USER_AGENT_REGISTRY IF IT DOES NOT ALREADY EXIST.
    query = f"""
    CREATE TABLE IF NOT EXISTS analytics.WEB.user_agent_registry
(
    context_useragent VARCHAR,
    browser           VARCHAR,
    browser_version   VARCHAR,
    operating_system  VARCHAR,
    os_version        VARCHAR,
    device_type       VARCHAR,
    device_brand      VARCHAR,
    device_model      VARCHAR
);"""

    execute_query(engine, query)

    # UNION ALL SOURCES OF CONTEXT_USERAGENT DATA THAT ARE NOT CURRENTLY IN THE USER_AGENT_REGISTRY TABLE.
    query = f"""
    SELECT *
    FROM (
        SELECT CONTEXT_USER_AGENT AS CONTEXT_USERAGENT
        FROM RAW.MATTERMOST2.EVENT
        WHERE CONTEXT_USER_AGENT IS NOT NULL
            AND CONTEXT_USER_AGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
            AND TIMESTAMP >= '2020-05-20'
        GROUP BY 1
        UNION ALL
        SELECT CONTEXT_USERAGENT
        FROM RAW.MM_TELEMETRY_PROD.EVENT
        WHERE CONTEXT_USERAGENT IS NOT NULL
            AND CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
            AND TIMESTAMP >= '2020-05-20'
        GROUP BY 1
        UNION ALL
        SELECT USERAGENT AS CONTEXT_USERAGENT
        FROM RAW.RELEASES.LOG_ENTRIES
        WHERE USERAGENT IS NOT NULL
            AND USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
            AND LOGDATE::date >= '2020-05-19'
        GROUP BY 1
        UNION ALL
        SELECT USERAGENT AS CONTEXT_USERAGENT
        FROM RAW.DIAGNOSTICS.LOG_ENTRIES
        WHERE USERAGENT IS NOT NULL
            AND USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
            AND LOGDATE::date >= '2020-05-19'
        GROUP BY 1
        UNION ALL
        SELECT CONTEXT_USERAGENT
        FROM raw.mattermostcom.pages
        WHERE CONTEXT_USERAGENT IS NOT NULL
            AND CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
            AND TIMESTAMP >= '2020-05-20'
        GROUP BY 1
        )
    WHERE CONTEXT_USERAGENT NOT IN (SELECT CONTEXT_USERAGENT FROM analytics.WEB.user_agent_registry GROUP BY 1)
    GROUP BY 1;
    """

    df = execute_dataframe(engine, query)

    if (
            len(df) == 0
    ):  # CHECKS TO SEE IF THERE ARE ANY NEW CONTEXT_USERAGENTS TO INSERT INTO THE TABLE
        print("Nothing to do.")
    else:  # PARSES USERAGENT COMPONENTS AND APPENDS EACH COMPONENT AS A COLUMN TO THE EXISTING DATAFRAME.
        browser = []
        browser_family = []
        browser_version = []
        browser_version_string = []
        operating_system = []
        os_family = []
        os_version = []
        os_version_string = []
        device = []
        device_family = []
        device_brand = []
        device_model = []

        for index, row in df.iterrows():
            ua_string = row["CONTEXT_USERAGENT"]
            user_agent = parse(ua_string)

            browser.append(user_agent.browser)
            browser_family.append(user_agent.browser.family)
            browser_version.append(user_agent.browser.version)
            browser_version_string.append(user_agent.browser.version_string)

            # Accessing user agent's operating system properties
            operating_system.append(user_agent.os)
            os_family.append(user_agent.os.family)
            os_version.append(user_agent.os.version)
            os_version_string.append(user_agent.os.version_string)

            # Accessing user agent's device properties
            device.append(user_agent.device)
            device_family.append(user_agent.device.family)
            device_brand.append(user_agent.device.brand)
            device_model.append(user_agent.device.model)

        browser = pd.Series(browser_family, name="browser")
        browser_version = pd.Series(browser_version_string,
                                    name="browser_version")
        op_sys = pd.Series(os_family, name="operating_system")
        os_version = pd.Series(os_version_string, name="os_version")
        device_type = pd.Series(device_family, name="device_type")
        device_brand = pd.Series(device_brand, name="device_brand")
        device_model = pd.Series(device_model, name="device_model")

        agent_lists = [
            browser,
            browser_version,
            op_sys,
            os_version,
            device_type,
            device_brand,
            device_model,
        ]
        for item in agent_lists:
            df = df.join(item)

        connection = engine.connect()

        # 16,384 is Snowflake Insert statement row limit. To ensure the job executes successfully we use the below code to check that the data being inserted
        # is not more than the allowed row limit. If it is, we incrementally load the dataframe.
        df[0:16384 if len(df) > 16384 else len(df)].to_sql(
            "user_agent_registry",
            con=connection,
            index=False,
            schema="WEB",
            if_exists="append",
        )
        i = 2  # The default number of times to increment. Will autoincrement if more than 2 inserts are required.

        if i <= math.ceil(len(df) / 16384):
            x = 16384  # The start row of the dataframe slice to be inserted. Will autoincrement if more than 2 inserts are required.
            y = (
                16384 * 2
            )  # The end row of the dataframe slice to be inserted. Will autoincrement if more than 2 inserts are required.

            # Loops through the remaining insert statements required to finish the job i.e. load all new user agents found in the mattermostcom.pages table.
            for n in range(math.ceil(len(df) / 16384) - 1):
                df[x:y if y < len(df) else len(df)].to_sql(
                    "user_agent_registry",
                    con=connection,
                    index=False,
                    schema="WEB",
                    if_exists="append",
                )
                x = y
                y += 16384
                i += 1
        return print(
            f"""Successfully uploaded {len(df)} records to mattermost.user_agent_registry!"""
        )
Exemple #43
0
def get_user_agent(request, max_length=200):
    """Return user agent for request."""
    uaobj = user_agents.parse(
        force_text(request.META.get('HTTP_USER_AGENT', ''), errors='replace'))
    return force_text(uaobj)[:max_length]
Exemple #44
0
def hello():
    user_agent = parse(request.headers.get('User-Agent'))
    qmobile = user_agent.is_mobile
    return render_template("index.html", qmobile=qmobile)
Exemple #45
0
def main():
    # Lb type picker
    lb_type = st.sidebar.radio(
        "LB Type",
        (
            "AWS Classic",
            "AWS Application",
        ),
    )

    st.markdown(
        "<h1 style='text-align: center;'>AWS LB Log Story</h1>", unsafe_allow_html=True
    )
    uploaded_file = st.file_uploader("Upload here you AWS LB Log.")

    # if it has a file it will call the parse function and show the chart
    if uploaded_file is not None:
        if lb_type == "AWS Classic":
            df = parse_clb_log_file(uploaded_file.read().decode("utf-8"))
        else:
            df = parse_alb_log_file(uploaded_file.read().decode("utf-8"))

        # it it's an empty file or not compatible it whould thrown an error
        if df.empty:
            st.error("Invalid Format or empty file")
        # otherwise it will show the charts
        else:
            # Transformations
            # creating a new column with the user agent definition (refer to function below)
            df["ua"] = df.apply(user_agent_definition, axis=1)
            # this column will get just the browser from the user agent field
            df["browser"] = df.apply(
                lambda x: parse(x.user_agent).browser.family, axis=1
            )
            # this column will get just the device from the user agent field
            df["device"] = df.apply(lambda x: parse(x.user_agent).device.family, axis=1)
            # this column will get just the os from the user agent field
            df["os"] = df.apply(lambda x: parse(x.user_agent).os.family, axis=1)

            # Plot bots vs Devices
            # TODO: pull the slice wich contains the bots
            st.header(f"Bot vs Devices")
            fig_ua = px.pie(
                df, names="ua", color_discrete_sequence=px.colors.sequential.RdBu
            )
            st.plotly_chart(fig_ua, use_container_width=True)

            # Plot Browsers
            st.header(f"Browser")
            fig_browser = px.pie(
                df,
                names="browser",
                color_discrete_sequence=px.colors.sequential.RdBu,
            )
            st.plotly_chart(fig_browser, use_container_width=True)

            # Plot Devices
            st.header(f"Devices")
            fig_device = px.pie(
                df,
                names="device",
                color_discrete_sequence=px.colors.sequential.RdBu,
            )
            st.plotly_chart(fig_device, use_container_width=True)

            # Plot OS
            st.header(f"OS")
            fig_os = px.pie(
                df, names="os", color_discrete_sequence=px.colors.sequential.RdBu
            )
            st.plotly_chart(fig_os, use_container_width=True)

            # AWS CLB and ALB have different fiedls for backend code, so I'm adding this if to not have any error
            if lb_type == "AWS Classic":
                # Backend Response
                st.header(f"Backend Response Codes")
                fig_brc = px.pie(
                    df,
                    names="backend_response_code",
                    color_discrete_sequence=px.colors.sequential.RdBu,
                )
                st.plotly_chart(fig_brc, use_container_width=True)

                # LB Response Code
                st.header(f"LB Response Codes")
                fig_erc = px.pie(
                    df,
                    names="elb_response_code",
                    color_discrete_sequence=px.colors.sequential.RdBu,
                )
                st.plotly_chart(fig_erc, use_container_width=True)
    def handle(self, *args, **options):
        logger.info('Starting the processing of raw tracks')
        t0 = time.time()
        total_raw_trackers_analysed = 0
        total_beat_trackers_analysed = 0
        admins_warned = False
        running_time = 0
        while True:
            t1 = time.time()
            raw_trackers = RawTracker.objects.filter(processed=False)
            total_raw_trackers_analysed += raw_trackers.count()
            for raw_tracker in raw_trackers:
                # Let's verify account:
                try:
                    profile = Profile.objects.get(account_id=raw_tracker.account_id)
                except Profile.DoesNotExist:
                    raw_tracker.ip = None
                    raw_tracker.wrong_account_id = True
                    raw_tracker.processed = True
                    raw_tracker.save()
                    continue
                parsed_url = urlparse(raw_tracker.url)
                queries = QueryDict(parsed_url.query, mutable=False)
                website_url = normalize_website(parsed_url.hostname)
                page = parsed_url.path
                if not page:
                    page = '/'

                utm_source = queries.get('utm_source')
                try:
                    website = Website.objects.get(website_url=website_url)
                except Website.DoesNotExist:
                    raw_tracker.ip = None
                    raw_tracker.website_does_not_exist = True
                    raw_tracker.processed = True
                    raw_tracker.save()
                    continue

                if website.owner != profile.user:
                    raw_tracker.ip = None
                    raw_tracker.wrong_owner = True
                    raw_tracker.processed = True
                    raw_tracker.save()

                referrer_url = None
                referrer_page = '/'
                if raw_tracker.referrer:
                    parsed_referrer = urlparse(raw_tracker.referrer)
                    referrer_url = normalize_referrer(normalize_website(parsed_referrer.hostname))
                    if 'google' in referrer_url:
                        referrer_url = 'Google'
                    if 'bing' in referrer_url:
                        referrer_url = 'Bing'
                    referrer_page = parsed_referrer.path

                tracker = Tracker.objects.create(
                    url=website_url,
                    page=page,
                    website=website,
                    referrer_url=referrer_url,
                    referrer_page=referrer_page,
                    timestamp=raw_tracker.timestamp,
                    utm_source=utm_source,
                    raw_tracker=raw_tracker,
                )
                type_device = None

                if not raw_tracker.dnt:
                    try:
                        user_agent = parse(raw_tracker.user_agent)
                    except:
                        logger.error('Problem parsing user agent Raw Tracker {}'.format(raw_tracker.id))
                        user_agent = None
                        type_device = Tracker.UNKNOWN
                    if user_agent:
                        operating_system = user_agent.os.family
                        device_family = user_agent.device.family
                        browser = user_agent.browser.family

                        if user_agent.is_mobile:
                            type_device = Tracker.MOBILE
                        elif user_agent.is_tablet:
                            type_device = Tracker.TABLET
                        elif user_agent.is_pc:
                            type_device = Tracker.PC
                        elif user_agent.is_bot:
                            type_device = Tracker.BOT
                        else:
                            type_device = Tracker.UNKNOWN

                        tracker.operating_system = operating_system
                        tracker.device_family = device_family
                        tracker.browser = browser
                        tracker.type_device = type_device

                    tracker.screen_height = raw_tracker.screen_height
                    tracker.screen_width = raw_tracker.screen_width
                    tracker.save()

                    if profile.can_geolocation and not type_device == Tracker.BOT:
                        if raw_tracker.ip:
                            geo = GeoIP2()
                            try:
                                location_data = geo.city(raw_tracker.ip)
                                tracker.country = location_data.get('country_code', '') or ''
                                tracker.region = location_data.get('region', '') or ''
                            except:
                                pass
                    raw_tracker.ip = None

                    tracker.save()
                raw_tracker.processed = True
                raw_tracker.save()

            beats = BeatTracker.objects.filter(processed=False)
            qs = beats.values('raw_tracker').annotate(Count('pk'))
            for beat_tracker in qs:
                try:
                    tracker = Tracker.objects.get(raw_tracker__id=beat_tracker['raw_tracker'])
                    tracker.session_length += 20*beat_tracker['pk__count']
                    tracker.save()
                    total_beat_trackers_analysed += beat_tracker['pk__count']
                except Tracker.DoesNotExist:
                    logger.warning(f"Processing beat for a non existing tracker: (id: {beat_tracker['raw_tracker']}) ")
            beats.update(processed=True)

            t2 = time.time()
            running_time += t2-t1
            # When it finishes one loop, check for the already available Raw Trackers
            # If there are already more than when it started, it is a problem, we are lagging behind

            new_raw_trackers = RawTracker.objects.filter(processed=False).count()

            if new_raw_trackers > raw_trackers.count():
                logger.warning("The raw tracker is lagging behind")
                if not admins_warned:
                    message = """ Dear admin,
                    The raw tracker process task has just analysed {} Raw Tracks in {} seconds but there are already
                    {} new tracks to analyse. The taks is running behind and something has to be done.
                    """.format(raw_trackers.count(), t2-t1, new_raw_trackers)
                    subject = "[WARNING] Raw tracks processing lagging behind"
                    try:
                        mail_admins(subject, message, fail_silently=False)
                        admins_warned = True
                    except:
                        logger.error('Failed sending warning e-mail to admins')

            if t2-t0 > 12*60*60:  # Log the statistics every 12 hours
                logger.info('Processed {} tracks in {}s at a rate of {}tracks/s'\
                        .format(total_raw_trackers_analysed, running_time, total_raw_trackers_analysed/running_time))
                running_time = 0
                total_raw_trackers_analysed = 0
                t0 = time.time()

            if t2-t1 < 30:
                time.sleep(60*30-(t2-t1))  # It sleeps for what is left of the 30 minutes
            else:
                logger.error('Processing the batch takes longer than 30 minutes')
                time.sleep(5)  # Only sleep 5 seconds and continue. This is done just to release the CPU.
Exemple #47
0
def ingress_request(service_uuid,
                    tracker,
                    time,
                    payload,
                    ip,
                    location,
                    user_agent,
                    dnt=False,
                    identifier=""):
    try:
        service = Service.objects.get(pk=service_uuid, status=Service.ACTIVE)
        log.debug(f"Linked to service {service}")

        if dnt and service.respect_dnt:
            return

        ip_data = _geoip2_lookup(ip)
        log.debug(f"Found geoip2 data")

        # Validate payload
        if payload.get("loadTime", 1) <= 0:
            payload["loadTime"] = None

        # Create or update session
        session = (
            Session.objects.filter(
                service=service,
                last_seen__gt=timezone.now() - timezone.timedelta(minutes=10),
                ip=ip,
                user_agent=user_agent,
            ).first()
            # We used to check for identifiers, but that can cause issues when people
            # re-open the page in a new tab, for example. It's better to match sessions
            # solely based on IP and user agent.
        )
        if session is None:
            log.debug("Cannot link to existing session; creating a new one...")
            ua = user_agents.parse(user_agent)
            initial = True
            device_type = "OTHER"
            if (ua.is_bot
                    or (ua.browser.family or "").strip().lower() == "googlebot"
                    or (ua.device.family or ua.device.model
                        or "").strip().lower() == "spider"):
                device_type = "ROBOT"
            elif ua.is_mobile:
                device_type = "PHONE"
            elif ua.is_tablet:
                device_type = "TABLET"
            elif ua.is_pc:
                device_type = "DESKTOP"
            session = Session.objects.create(
                service=service,
                ip=ip,
                user_agent=user_agent,
                identifier=identifier.strip(),
                browser=ua.browser.family or "",
                device=ua.device.family or ua.device.model or "",
                device_type=device_type,
                os=ua.os.family or "",
                asn=ip_data.get("asn", ""),
                country=ip_data.get("country", ""),
                longitude=ip_data.get("longitude"),
                latitude=ip_data.get("latitude"),
                time_zone=ip_data.get("time_zone", ""),
            )
        else:
            log.debug("Updating old session with new data...")
            initial = False
            # Update last seen time
            session.last_seen = timezone.now()
            if session.identifier == "" and identifier.strip() != "":
                session.identifier = identifier.strip()
            session.save()

        # Create or update hit
        idempotency = payload.get("idempotency")
        idempotency_path = f"hit_idempotency_{idempotency}"
        hit = None
        if idempotency is not None:
            if cache.get(idempotency_path) is not None:
                cache.touch(idempotency_path, 10 * 60)
                hit = Hit.objects.filter(pk=cache.get(idempotency_path),
                                         session=session).first()
                if hit is not None:
                    # There is an existing hit with an identical idempotency key. That means
                    # this is a heartbeat.
                    log.debug(
                        "Hit is a heartbeat; updating old hit with new data..."
                    )
                    hit.heartbeats += 1
                    hit.last_seen = timezone.now()
                    hit.save()
        if hit is None:
            log.debug("Hit is a page load; creating new hit...")
            # There is no existing hit; create a new one
            hit = Hit.objects.create(
                session=session,
                initial=initial,
                tracker=tracker,
                # At first, location is given by the HTTP referrer. Some browsers
                # will send the source of the script, however, so we allow JS payloads
                # to include the location.
                location=payload.get("location", location),
                referrer=payload.get("referrer", ""),
                load_time=payload.get("loadTime"),
            )
            # Set idempotency (if applicable)
            if idempotency is not None:
                cache.set(idempotency_path, hit.pk, timeout=10 * 60)
    except Exception as e:
        log.exception(e)
        raise e
Exemple #48
0
def group_by_device_type(uas_list):
    '''group user agent by device type, only "desktop", "mobile", "tablet" are supported'''
    ud = {
        'desktop': {
            'chrome': dict(),
            'safari': dict(),
            'firefox': dict(),
            'opera': dict(),
            'ie': dict()
        },
        'mobile': {
            'chrome': dict(),
            'safari': dict(),
            'firefox': dict(),
            'opera': dict(),
            'ie': dict()
        },
        'tablet': {
            'chrome': dict(),
            'safari': dict(),
            'firefox': dict(),
            'opera': dict(),
            'ie': dict()
        },
    }
    for ua in uas_list:
        parsed_ua = user_agents.parse(ua)
        os_family = parsed_ua.os.family

        if parsed_ua.is_mobile:
            device_dict = ud['mobile']
        elif parsed_ua.is_tablet:
            device_dict = ud['tablet']
        elif parsed_ua.is_pc:
            device_dict = ud['desktop']
        else:
            logger.warn(
                '[UnsupportedDeviceType] Family: %s, Brand: %s, Model: %s',
                parsed_ua.device.family, parsed_ua.device.brand,
                parsed_ua.device.model)
            continue

        raw_browser_family = parsed_ua.browser.family.lower()
        if raw_browser_family.find('safari') != -1 and raw_browser_family.find(
                'chrome') == -1:
            browser_dict = device_dict['safari']
        elif raw_browser_family.find('chrome') != -1:
            browser_dict = device_dict['chrome']
        elif raw_browser_family.find('firefox') != -1:
            browser_dict = device_dict['firefox']
        elif raw_browser_family.find('opera') != -1 or raw_browser_family.find(
                'opr') != -1:
            browser_dict = device_dict['opera']
        elif raw_browser_family.find('msie') != -1 or raw_browser_family.find(
                'ie') != -1:
            browser_dict = device_dict['ie']
        else:
            logger.warn('[UnsupportedBrowserType] Family: %s',
                        parsed_ua.browser.family)
            continue

        if os_family in browser_dict:
            browser_dict[os_family].append(ua)
        else:
            browser_dict[os_family] = [ua]

    return ud
Exemple #49
0
 def do_GET(self):
 # 传入数据
     config = load_yaml_config('./config.yml')['setting']
 # 默认测试数据
     data = ''
 # 时区
     zone = config['zone']
 # 查询天数
     search_time_limit = config['search_time_limit']
 # 查询条数
     search_time_limit_num = config['search_time_limit_num']
 # 生成标准时间
     now = datetime.utcnow()
 # 读取用户信息
     user_info = {
     "token": os.environ["DAODAO_TOKEN"],
     "user": config['user'],
     "source": config['repo']
     }
 # 生成当前时区时间标题
     now_time = time_zone_reset(now, zone, '%Y-%m-%d')
 # 生成查询范围
     since = search_time(search_time_limit)
     
     print('当地时间为:', now_time)
     user_agent = user_agents.parse(self.headers['User-Agent'])
     o = parse.urlparse(self.path)            
     if 'k' in parse.parse_qs(o.query) :
         data = parse.parse_qs(o.query)['k'][0]
         if data == os.environ["DAODAO_PASSWORD"]:
             if 'g' in parse.parse_qs(o.query):
                 data = parse.parse_qs(o.query)['g'][0]
                 text = change_data_handle(int(data),'','combine',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent)
             elif 'a' in parse.parse_qs(o.query):
                 data = parse.parse_qs(o.query)['a'][0]
                 data = data.split(',',1)
                 text = change_data_handle(int(data[0]),data[1],'append',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent)
             elif 'e' in parse.parse_qs(o.query):
                 data = parse.parse_qs(o.query)['e'][0]
                 data = data.split(',',1)
                 text = change_data_handle(int(data[0]),data[1],'edit',search_time_limit, search_time_limit_num, zone,now_time, user_info, since,user_agent)
             elif 'c' in parse.parse_qs(o.query):
                 data = parse.parse_qs(o.query)['c'][0]
                 text = creat_data(now_time, user_info, '{"content":"'+ data+'",\n"user_agents":"'+str(user_agent)+'"}',  since)
             elif 'dn' in parse.parse_qs(o.query):
                 num = parse.parse_qs(o.query)['dn'][0]
                 text = delete_data_single(num,user_info, search_time_limit, search_time_limit_num)
             elif 'd' in parse.parse_qs(o.query):
                 num = parse.parse_qs(o.query)['d'][0]
                 text = delete_data_muti(num,user_info, search_time_limit, search_time_limit_num)
             else:
                 text = 'please check!'
         else:
             text='Please enter the correct password'
     elif 'q' in parse.parse_qs(o.query):
         num = int(parse.parse_qs(o.query)['q'][0])
         if num == 0:
             num = search_time_limit_num 
         if 't' in parse.parse_qs(o.query):
             limit = int(parse.parse_qs(o.query)['t'][0])
         else:
             limit = search_time_limit
         text = json.dumps(search_daodao_lite(user_info, limit, num))
     else:
         text='Please enter the correct password'
     
     self.send_response(200)
     # """ Sets headers required for CORS """
     self.send_header('Content-type', 'application/json')
     self.send_header("Access-Control-Allow-Origin", "*")
     self.send_header("Access-Control-Allow-Methods", "*")
     self.send_header("Access-Control-Allow-Headers", "Authorization, Content-Type")
     self.end_headers()
     self.wfile.write(text.encode())
Exemple #50
0
# 183.69.210.164 - - [07/Apr/2017:09:32:53 +0800] "GET /app/include/authcode.inc.php HTTP/1.1" 200 384
# "http://job.magedu.com/index.php?m=login" "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36
# (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0"

ops = {
    'datetime':
    lambda timestr: datetime.datetime.strptime(timestr, '%d/%b/%Y:%H:%M:%S %z'
                                               ),
    'status':
    int,
    'length':
    int,
    'request':
    lambda request: dict(zip(('method', 'url', 'protocol'), request.split())),
    'useragent':
    lambda useragent: parse(useragent)
}

pattern = '''(?P<remote>[\d.]{7,}) - - \[(?P<datetime>[/\w +:]+)\] \
"(?P<method>\w+) (?P<url>[/\w.]+) (?P<protocol>[/\w.]+)" (?P<status>\d+) (?P<length>\d+) .+ \
"(?P<useragent>.*)"'''
regex = re.compile(pattern)


def extract(line) -> dict:
    matcher = regex.match(line)
    if matcher:
        return {
            k: ops.get(k, lambda x: x)(v)
            for k, v in matcher.groupdict().items()
        }
Exemple #51
0
def is_spider():
    return user_agents.parse(flask.request.headers.get('User-Agent')).is_bot
Exemple #52
0
    def expand(self, link, request, edit):
        '''
        Expand a toush link
        '''

        # take edit from the request url
        # this code is stupid lol
        # print(request.path)
        # if 'edit' in request.path and edit:
        #     edit = True
        # else:
        #     edit = False

        try:
            url = ToushLink.objects.get(short_url__exact=link)
        except ToushLink.DoesNotExist:
            raise KeyError("invalid shortlink")

        # ensure we are within usage counts
        if url.max_count != -1:
            if url.max_count <= url.usage_count:
                raise PermissionError("max usages for link reached")

        # ensure we are within allowed datetime
        # print(timezone.now())
        # print(url.expired)
        if timezone.now() > url.expired:
            raise PermissionError("shortlink expired")

        if not edit:
            url.usage_count += 1
            url.save()

        ### TO CONSERVE RESOURCES, CREATE VISITOR OBJECT AFTER THE CHECKS.
        ### VISITOR OBJECT ONLY MADE IF THE VISITOR "SEES" THE END PAGE
        user = request.user if request.user.username else None
        ip_addr_v4 = request.META['REMOTE_ADDR']
        ua_string = request.META['HTTP_USER_AGENT']
        user_agent = parse(ua_string)

        if user is not None and not edit:
            Visitor.objects.create(
                url=url,
                user=user,
                ip_addr_v4=ip_addr_v4,
                is_mobile=user_agent.is_mobile,
                is_tablet=user_agent.is_tablet,
                is_touch_capable=user_agent.is_touch_capable,
                is_pc=user_agent.is_pc,
                is_bot=user_agent.is_bot,
                browser=user_agent.browser.family,
                browser_version=user_agent.browser.version_string,
                os=user_agent.os.family,
                os_version=user_agent.os.version_string,
                device=user_agent.device.family)
        elif not edit:
            Visitor.objects.create(
                url=url,
                ip_addr_v4=ip_addr_v4,
                is_mobile=user_agent.is_mobile,
                is_tablet=user_agent.is_tablet,
                is_touch_capable=user_agent.is_touch_capable,
                is_pc=user_agent.is_pc,
                is_bot=user_agent.is_bot,
                browser=user_agent.browser.family,
                browser_version=user_agent.browser.version_string,
                os=user_agent.os.family,
                os_version=user_agent.os.version_string,
                device=user_agent.device.family)

        #### IN OBJECT TYPE DEFINITION, EXCLUDE THE DATA/STATS FIELDS
        # so analytics don't leave server
        return url
Exemple #53
0
    async def signin(
        self,
        request: Request,
        password: str,
        identifier: Union[str, EmailStr],
    ) -> str:
        location: str = "Unknown"
        if "x-real-ip" in request.headers:
            real_ip: str = request.headers["x-real-ip"]
            self.logger.debug("requesters IP address is %s", real_ip)
            try:
                ipstack_res: Dict = self.ip2geo.get_location(real_ip)
                location = (
                    f"{ipstack_res['location']['country_flag_emoji']} "
                    f"{ipstack_res['city']} / "
                    f"{ipstack_res['region_name']} / "
                    f"{ipstack_res['country_name']}"
                )
            except Exception as exception:
                self.logger.error(
                    "an error acquired when requesting ipstack: %s", exception
                )
                location = "Unknown"
        self.logger.debug("requesters geolocation is %s", location)

        device: str = "Unknown"
        if "user-agent" in request.headers:
            ua_str: str = request.headers["user-agent"]
            try:
                ua = parse(ua_str)
                device = str(ua)
            except Exception:
                device = "Unknown"
        self.logger.debug("requesters device is %s", device)

        email: Optional[EmailStr] = None
        user_id: Optional[str] = None
        try:
            email = validate_email(identifier).email
        except EmailNotValidError:
            user_id = identifier
            try:
                user_id = custom_charset(None, user_id)
            except Exception as exception:
                self.logger.error(exception)
                raise HTTPException(
                    status_code=status.HTTP_409_CONFLICT,
                    detail={
                        "rus": "incorrect identifier",
                        "eng": "неправльный идентификатор",
                    },
                )

        try:
            if email:
                user = await self.database.fetch_one(
                    sa.sql.select(
                        [self.users.c.hashed_password, self.users.c.user_id]
                    ).where(self.users.c.email == email)
                )
                user_id = user["user_id"]
            else:
                user = await self.database.fetch_one(
                    sa.sql.select([self.users.c.hashed_password]).where(
                        self.users.c.user_id == user_id
                    )
                )
        except Exception as exception:
            self.logger.error(exception)
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "eng": "An error occurred when working with Auth DB",
                    "rus": "Произошла ошибка при обращении к базе данных "
                    "модуля авторизации",
                },
            )

        hashed_password = user["hashed_password"]

        if not crypto_context.verify(password, hashed_password):
            raise HTTPException(
                status_code=status.HTTP_401_UNAUTHORIZED,
                detail={
                    "end": "Incorrect password",
                    "rus": "Неправильный пароль",
                },
            )

        now: datetime.datetime = datetime.datetime.now()
        token_uuid: bytes = uuid.uuid4().bytes

        insert = self.tokens.insert().values(
            token_uuid=token_uuid,
            location=location,
            device=device,
            issued_by=user_id,
            issued_at=str(now),
        )

        try:
            await self.database.execute(insert)
        except Exception as exception:
            self.logger.error(exception)
            raise HTTPException(
                status_code=status.HTTP_409_CONFLICT,
                detail={
                    "eng": "An error occurred when working with Auth DB",
                    "rus": "Произошла ошибка при обращении к базе данных "
                    "модуля авторизации",
                },
            )

        header: Dict[str, str] = {"alg": "ES384", "typ": "JWT"}
        payload: Dict[str, Any] = {
            "iss": "paperback",
            "sub": str(user_id),
            "exp": int(round((now + datetime.timedelta(days=2)).timestamp(), 0)),
            "iat": int(round(now.timestamp(), 0)),
            "jti": str(uuid.UUID(bytes=token_uuid)),
        }
        self.logger.debug("created token %s for user %s", payload, user_id)
        return jwt.encode(header, payload, self.private_key)
Exemple #54
0
 def setup(self, request, *args, **kwargs):
     super().setup(request, *args, **kwargs)
     user_agent = request.META.get('HTTP_USER_AGENT', '')
     self.request.user_agent = parse(user_agent)
Exemple #55
0
def terms_of_use():
    user_agent = parse(request.headers.get('User-Agent'))
    return render_template('terms_of_use.html',
                           mobile=user_agent.is_mobile,
                           subtitle=gettext(u'Nutzungsbedingungen'))
Exemple #56
0
 def is_outdated_browser(self):
     ua_string = self.request.get("HTTP_USER_AGENT", "")
     ua = parse(ua_string)
     return ua.browser.family == "IE"
Exemple #57
0
def advertisement():
    """
    This is the url we give for the ad for our 'external question'.  The ad has
    to display two different things: This page will be called from within
    mechanical turk, with url arguments hitId, assignmentId, and workerId.
    If the worker has not yet accepted the hit:
        These arguments will have null values, we should just show an ad for
        the experiment.
    If the worker has accepted the hit:
        These arguments will have appropriate values and we should enter the
        person in the database and provide a link to the experiment popup.
    """
    user_agent_string = request.user_agent.string
    user_agent_obj = user_agents.parse(user_agent_string)
    browser_ok = True
    browser_exclude_rule = CONFIG.get('HIT Configuration',
                                      'browser_exclude_rule')
    for rule in browser_exclude_rule.split(','):
        myrule = rule.strip()
        if myrule in ["mobile", "tablet", "touchcapable", "pc", "bot"]:
            if (myrule == "mobile" and user_agent_obj.is_mobile) or\
               (myrule == "tablet" and user_agent_obj.is_tablet) or\
               (myrule == "touchcapable" and user_agent_obj.is_touch_capable) or\
               (myrule == "pc" and user_agent_obj.is_pc) or\
               (myrule == "bot" and user_agent_obj.is_bot):
                browser_ok = False
        elif (myrule == "Safari" or myrule == "safari"):
            if "Chrome" in user_agent_string and "Safari" in user_agent_string:
                pass
            elif "Safari" in user_agent_string:
                browser_ok = False
        elif myrule in user_agent_string:
            browser_ok = False

    if not browser_ok:
        # Handler for IE users if IE is not supported.
        raise ExperimentError('browser_type_not_allowed')

    if not ('hitId' in request.args and 'assignmentId' in request.args):
        raise ExperimentError('hit_assign_worker_id_not_set_in_mturk')
    hit_id = request.args['hitId']
    assignment_id = request.args['assignmentId']
    mode = request.args['mode']
    if hit_id[:5] == "debug":
        debug_mode = True
    else:
        debug_mode = False
    already_in_db = False
    if 'workerId' in request.args:
        worker_id = request.args['workerId']
        # First check if this workerId has completed the task before (v1).
        nrecords = Participant.query.\
            filter(Participant.assignmentid != assignment_id).\
            filter(Participant.workerid == worker_id).\
            count()

        if nrecords > 0:  # Already completed task
            already_in_db = True
    else:  # If worker has not accepted the hit
        worker_id = None
    try:
        part = Participant.query.\
            filter(Participant.hitid == hit_id).\
            filter(Participant.assignmentid == assignment_id).\
            filter(Participant.workerid == worker_id).\
            one()
        status = part.status
    except exc.SQLAlchemyError:
        status = None

    allow_repeats = CONFIG.getboolean('HIT Configuration', 'allow_repeats')
    if (status == STARTED or status == QUITEARLY) and not debug_mode:
        # Once participants have finished the instructions, we do not allow
        # them to start the task again.
        raise ExperimentError('already_started_exp_mturk')
    elif status == COMPLETED or (status == SUBMITTED and not already_in_db):
        # 'or status == SUBMITTED' because we suspect that sometimes the post
        # to mturk fails after we've set status to SUBMITTED, so really they
        # have not successfully submitted. This gives another chance for the
        # submit to work when not using the psiturk ad server.
        use_psiturk_ad_server = CONFIG.getboolean('Shell Parameters',
                                                  'use_psiturk_ad_server')
        if not use_psiturk_ad_server:
            # They've finished the experiment but haven't successfully submitted the HIT
            # yet.
            return render_template('thanks-mturksubmit.html',
                                   using_sandbox=(mode == "sandbox"),
                                   hitid=hit_id,
                                   assignmentid=assignment_id,
                                   workerid=worker_id)
        else:
            # Show them a thanks message and tell them to go away.
            return render_template('thanks.html')
    elif already_in_db and not (debug_mode or allow_repeats):
        raise ExperimentError('already_did_exp_hit')
    elif status == ALLOCATED or not status or debug_mode:
        # Participant has not yet agreed to the consent. They might not
        # even have accepted the HIT.
        with open('templates/ad.html', 'r') as temp_file:
            ad_string = temp_file.read()
        ad_string = insert_mode(ad_string, mode)
        return render_template_string(ad_string,
                                      hitid=hit_id,
                                      assignmentid=assignment_id,
                                      workerid=worker_id)
    else:
        raise ExperimentError('status_incorrectly_set')
Exemple #58
0
from user_agents import parse

u = 'Mozilla/5.0 ( Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36'

ua = parse(u)

print(ua)
print(ua.browser)
Exemple #59
0
def advertisement():
    """
    This is the url we give for the ad for our 'external question'.  The ad has
    to display two different things: This page will be called from within
    mechanical turk, with url arguments hitId, assignmentId, and workerId.
    If the worker has not yet accepted the hit:
        These arguments will have null values, we should just show an ad for
        the experiment.
    If the worker has accepted the hit:
        These arguments will have appropriate values and we should enter the
        person in the database and provide a link to the experiment popup.
    """
    user_agent_string = request.user_agent.string
    user_agent_obj = user_agents.parse(user_agent_string)
    browser_ok = True
    for rule in string.split(
            CONFIG.get('HIT Configuration', 'browser_exclude_rule'), ','):
        myrule = rule.strip()
        if myrule in ["mobile", "tablet", "touchcapable", "pc", "bot"]:
            if (myrule == "mobile" and user_agent_obj.is_mobile) or\
               (myrule == "tablet" and user_agent_obj.is_tablet) or\
               (myrule == "touchcapable" and user_agent_obj.is_touch_capable) or\
               (myrule == "pc" and user_agent_obj.is_pc) or\
               (myrule == "bot" and user_agent_obj.is_bot):
                browser_ok = False
        elif myrule in user_agent_string:
            browser_ok = False

    if not browser_ok:
        # Handler for IE users if IE is not supported.
        raise ExperimentError('browser_type_not_allowed')

    if not ('hitId' in request.args and 'assignmentId' in request.args):
        raise ExperimentError('hit_assign_worker_id_not_set_in_mturk')
    hit_id = request.args['hitId']
    assignment_id = request.args['assignmentId']
    mode = request.args['mode']
    if hit_id[:5] == "debug":
        debug_mode = True
    else:
        debug_mode = False
    already_in_db = False
    if 'workerId' in request.args:
        worker_id = request.args['workerId']
        # First check if this workerId has completed the task before (v1).
        nrecords = Participant.query.\
            filter(Participant.assignmentid != assignment_id).\
            filter(Participant.workerid == worker_id).\
            count()

        if nrecords > 0:  # Already completed task
            already_in_db = True
    else:  # If worker has not accepted the hit
        worker_id = None
    try:
        part = Participant.query.\
            filter(Participant.hitid == hit_id).\
            filter(Participant.assignmentid == assignment_id).\
            filter(Participant.workerid == worker_id).\
            one()
        status = part.status
    except exc.SQLAlchemyError:
        status = None

    if status == STARTED and not debug_mode:
        # Once participants have finished the instructions, we do not allow
        # them to start the task again.
        raise ExperimentError('already_started_exp_mturk')
    elif status == COMPLETED:
        # They've done the debriefing but perhaps haven't submitted the HIT
        # yet.. Turn asignmentId into original assignment id before sending it
        # back to AMT
        return render_template(
            'thanks.html',
            is_sandbox=(mode == "sandbox"),
            hitid=hit_id,
            assignmentid=assignment_id,
            workerid=worker_id
        )
    elif already_in_db and not debug_mode:
        raise ExperimentError('already_did_exp_hit')
    elif status == ALLOCATED or not status or debug_mode:
        # Participant has not yet agreed to the consent. They might not
        # even have accepted the HIT.
        with open('templates/ad.html', 'r') as temp_file:
            ad_string = temp_file.read()
        ad_string = insert_mode(ad_string, mode)
        return render_template_string(
            ad_string,
            hitid=hit_id,
            assignmentid=assignment_id,
            workerid=worker_id
        )
    else:
        raise ExperimentError('status_incorrectly_set')
Exemple #60
0
def privacy_policy():
    user_agent = parse(request.headers.get('User-Agent'))
    return render_template('privacy_policy.html',
                           mobile=user_agent.is_mobile,
                           subtitle=gettext(u'Datenschutz'))