def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user_id = user.id # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text(user_agent, encoding='latin-1', errors='ignore') # grab the source param and store it source = request.GET.get('source', None) if source: visitor.source = source # grab the medium param and store it medium = request.GET.get('medium', None) if medium: visitor.medium = medium time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) try: with transaction.atomic(): visitor.save() except IntegrityError: # there is a small chance a second response has saved this # Visitor already and a second save() at the same time (having # failed to UPDATE anything) will attempt to INSERT the same # session key (pk) again causing an IntegrityError # If this happens we'll just grab the "winner" and use that! visitor = Visitor.objects.get(pk=session_key) return visitor
def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text(user_agent, encoding='latin-1', errors='ignore') time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) visitor.save() return visitor
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_request(self, request): # don't process AJAX requests if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\ or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = {'session_key': session_key, 'ip_address': ip_address} visitor_id = request.session.get('visitor_id', None) if not visitor_id: # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.only('id').get(**attrs) except Visitor.DoesNotExist: request.session.set_test_cookie() # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.only('id').filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) try: visitor.save() except DatabaseError: print_stack_trace() log.error( 'There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) except: return request.session['visitor_id'] = visitor_id = visitor.id redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}' visitor_data = json.loads(redis_data) visitor_data['visitor_id'] = visitor_id # update the tracking information visitor_data['user_agent'] = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1)) # TODO: ensure that we are on the same time zone - I just put UTC for now # to get it working last_update = visitor_data.get('last_update', None) if not last_update or last_update <= time.mktime( one_hour_ago.timetuple()): visitor_data['referrer'] = utils.u_clean( request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor_data['page_views'] = 0 visitor_data['session_start'] = time.mktime(now.timetuple()) visitor_data['url'] = request.path page_views = visitor_data.get('page_views', 0) + 1 visitor_data['page_views'] = page_views visitor_data['last_update'] = time.mktime(now.timetuple()) try: # Extracting visitor data from GA cookie cookie = request.COOKIES.get('__utmz') if cookie: try: data = cookie.split('.', 4)[-1] data = dict(match.groups() for match in re.finditer( r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data)) except (ValueError, IndexError): log.error('Malformed GA cookie: {0!r}'.format(cookie)) else: visitor_data['source'] = normalize_ga_value( data.get('utmcsr')) visitor_data['medium'] = normalize_ga_value( data.get('utmcmd')) visitor_data['campaign'] = normalize_ga_value( data.get('utmccn')) visitor_data['keywords'] = normalize_ga_value( data.get('utm.ctr')) utm_source = request.GET.get("utm_source", "unknown") request.session['acquisition_source_name'] = utm_source if utm_source != "unknown": # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard. # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter. # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product. # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword. # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective. #update the tracking info with the latest and bump the old one to be stored in the history #visitor.bump_past_acquisition_info() past_acquisition_info = visitor_data.get( 'past_acquisition_info', []) if visitor_data.get('acquisition_source', None): old_visitor_data = {'date_valid_until': time.time()} for k in VISITOR_PARAMS_MAPPING.keys(): old_visitor_data[k] = visitor_data.get(k, None) past_acquisition_info.append(old_visitor_data) visitor_data[ 'past_acquisition_info'] = past_acquisition_info for k, v in VISITOR_PARAMS_MAPPING.items(): value = request.GET.get(v, 'unknown')[:255] visitor_data[k] = value except: print_stack_trace() redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
def process_response(self, request, response): # Session framework not installed, nothing to see here.. if not hasattr(request, 'session'): return response # Do not track AJAX requests.. if request.is_ajax() and not TRACK_AJAX_REQUESTS: return response # Do not track if HTTP HttpResponse status_code blacklisted if response.status_code in TRACK_IGNORE_STATUS_CODES: return response # If dealing with a non-authenticated user, we still should track the # session since if authentication happens, the `session_key` carries # over, thus having a more accurate start time of session user = getattr(request, 'user', None) # Check for anonymous users if not user or user.is_anonymous(): if not TRACK_ANONYMOUS_USERS: return response user = None # Force a save to generate a session key if one does not exist if not request.session.session_key: request.session.save() # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the # field `default` value visitor = Visitor(pk=session_key, ip_address=get_ip_address(request), user_agent=request.META.get( 'HTTP_USER_AGENT', None)) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # Be conservative with the determining time on site since simply # increasing the session timeout could greatly skew results. This # is the only time we can guarantee. now = timezone.now() time_on_site = 0 if visitor.start_time: time_on_site = (now - visitor.start_time).seconds visitor.time_on_site = time_on_site visitor.save() if TRACK_PAGEVIEWS: # Match against `path_info` to not include the SCRIPT_NAME.. path = request.path_info.lstrip('/') for url in TRACK_IGNORE_URLS: if url.match(path): break else: referer = None if TRACK_REFERER: referer = request.META.get('HTTP_REFERER', None) pageview = Pageview(visitor=visitor, url=request.path, view_time=now, method=request.method, referer=referer) pageview.save() return response
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '') if utils.user_agent_is_untracked( user_agent ): log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session'): # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest() # ensure that the request.path does not begin with any of the prefixes for prefix in NO_TRACKING_PREFIXES: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent[:255], last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent[:255] # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) : visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path[:255] visitor.page_views += 1 visitor.last_update = now visitor.save()