def _create_visitor(self, user, page_views, ip_address): visitor = Visitor( user=user, url="/", page_views=page_views, session_start=datetime.datetime.now(), last_update=datetime.datetime.now(), ip_address = ip_address, session_key = random.randrange(1000000, 9999999999, 2) ) visitor.save() return visitor
def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) if TRACK_USING_GEOIP: visitor.geoip_data = get_geo_ip_data(visitor.ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user_id = user.id # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text(user_agent, encoding='latin-1', errors='ignore') time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) try: with transaction.atomic(): visitor.save() except IntegrityError: # there is a small chance a second response has saved this # Visitor already and a second save() at the same time (having # failed to UPDATE anything) will attempt to INSERT the same # session key (pk) again causing an IntegrityError # If this happens we'll just grab the "winner" and use that! visitor = Visitor.objects.get(pk=session_key) return visitor
def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user_id = user.id # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text( user_agent, encoding='latin-1', errors='ignore') time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) try: with transaction.atomic(): visitor.save() except IntegrityError: # there is a small chance a second response has saved this # Visitor already and a second save() at the same time (having # failed to UPDATE anything) will attempt to INSERT the same # session key (pk) again causing an IntegrityError # If this happens we'll just grab the "winner" and use that! visitor = Visitor.objects.get(pk=session_key) return visitor
def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text(user_agent, encoding='latin-1', errors='ignore') time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) visitor.save() return visitor
def process_response(self, request, response): if not hasattr(request, "session"): return response # Do not track AJAX requests.. if request.is_ajax() and not TRACK_AJAX_REQUESTS: return response # If dealing with a non-authenticated user, we still should track the # session since if authentication happens, the `session_key` carries # over, thus having a more accurate start time of session user = getattr(request, "user", None) # We cannot do anything with Anonymous users if user and not user.is_authenticated(): user = None # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(session_key=session_key) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the # field `default` value visitor = Visitor( session_key=session_key, ip_address=get_ip_address(request), user_agent=request.META.get("HTTP_USER_AGENT", None), ) visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # Be conservative with the determining time on site since simply # increasing the session timeout could greatly skew results. This # is the only time we can guarantee. time_on_site = 0 if visitor.start_time: time_on_site = (datetime.now() - visitor.start_time).seconds visitor.time_on_site = time_on_site visitor.save() return response
def _refresh_visitor(self, user, request, visit_time): # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the field # `default` value ip_address = get_ip_address(request) visitor = Visitor(pk=session_key, ip_address=ip_address) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user # update some session expiration details visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # grab the latest User-Agent and store it user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: visitor.user_agent = smart_text( user_agent, encoding='latin-1', errors='ignore') time_on_site = 0 if visitor.start_time: time_on_site = total_seconds(visit_time - visitor.start_time) visitor.time_on_site = int(time_on_site) visitor.save() return visitor
def process_request(self, request): # don't process AJAX requests if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\ or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = {'session_key': session_key, 'ip_address': ip_address} visitor_id = request.session.get('visitor_id', None) if not visitor_id: # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.only('id').get(**attrs) except Visitor.DoesNotExist: request.session.set_test_cookie() # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.only('id').filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) try: visitor.save() except DatabaseError: print_stack_trace() log.error( 'There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) except: return request.session['visitor_id'] = visitor_id = visitor.id redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}' visitor_data = json.loads(redis_data) visitor_data['visitor_id'] = visitor_id # update the tracking information visitor_data['user_agent'] = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1)) # TODO: ensure that we are on the same time zone - I just put UTC for now # to get it working last_update = visitor_data.get('last_update', None) if not last_update or last_update <= time.mktime( one_hour_ago.timetuple()): visitor_data['referrer'] = utils.u_clean( request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor_data['page_views'] = 0 visitor_data['session_start'] = time.mktime(now.timetuple()) visitor_data['url'] = request.path page_views = visitor_data.get('page_views', 0) + 1 visitor_data['page_views'] = page_views visitor_data['last_update'] = time.mktime(now.timetuple()) try: # Extracting visitor data from GA cookie cookie = request.COOKIES.get('__utmz') if cookie: try: data = cookie.split('.', 4)[-1] data = dict(match.groups() for match in re.finditer( r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data)) except (ValueError, IndexError): log.error('Malformed GA cookie: {0!r}'.format(cookie)) else: visitor_data['source'] = normalize_ga_value( data.get('utmcsr')) visitor_data['medium'] = normalize_ga_value( data.get('utmcmd')) visitor_data['campaign'] = normalize_ga_value( data.get('utmccn')) visitor_data['keywords'] = normalize_ga_value( data.get('utm.ctr')) utm_source = request.GET.get("utm_source", "unknown") request.session['acquisition_source_name'] = utm_source if utm_source != "unknown": # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard. # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter. # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product. # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword. # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective. #update the tracking info with the latest and bump the old one to be stored in the history #visitor.bump_past_acquisition_info() past_acquisition_info = visitor_data.get( 'past_acquisition_info', []) if visitor_data.get('acquisition_source', None): old_visitor_data = {'date_valid_until': time.time()} for k in VISITOR_PARAMS_MAPPING.keys(): old_visitor_data[k] = visitor_data.get(k, None) past_acquisition_info.append(old_visitor_data) visitor_data[ 'past_acquisition_info'] = past_acquisition_info for k, v in VISITOR_PARAMS_MAPPING.items(): value = request.GET.get(v, 'unknown')[:255] visitor_data[k] = value except: print_stack_trace() redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = right_now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '')[:255] # see if the user agent is not supposed to be tracked for ua in UntrackedUserAgent.objects.all(): # if the keyword is found in the user agent, stop tracking if str(user_agent).find(ua.keyword) != -1: return if hasattr(request, 'session'): # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) prefixes = utils.get_untracked_prefixes() # don't track media file requests if settings.MEDIA_URL and settings.MEDIA_URL != '/': prefixes.append(settings.MEDIA_URL) if settings.ADMIN_MEDIA_PREFIX: prefixes.append(settings.ADMIN_MEDIA_PREFIX) try: # finally, don't track requests to the tracker update pages prefixes.append(reverse('tracking-refresh-active-users')) except NoReverseMatch: # django-tracking hasn't been included in the URLconf if we get here pass # ensure that the request.path does not begin with any of the prefixes for prefix in prefixes: if request.path.startswith(prefix): return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) filter_params = { 'ip_address': ip_address, 'user_agent': user_agent, 'last_update__gte': cutoff } try: visitor = Visitor.objects.get(**filter_params) visitor.session_key = session_key except Visitor.MultipleObjectsReturned: # just get the first match visitor = Visitor.objects.filter(**filter_params)[0] visitor.session_key = session_key except Visitor.DoesNotExist: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = request.META.get('HTTP_REFERER', 'unknown')[:255] # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now visitor.save()
def process_response(self, request, response): # Session framework not installed, nothing to see here.. if not hasattr(request, 'session'): return response # Do not track AJAX requests.. if request.is_ajax() and not TRACK_AJAX_REQUESTS: return response # Do not track if HTTP HttpResponse status_code blacklisted if response.status_code in TRACK_IGNORE_STATUS_CODES: return response # If dealing with a non-authenticated user, we still should track the # session since if authentication happens, the `session_key` carries # over, thus having a more accurate start time of session user = getattr(request, 'user', None) # Check for anonymous users if not user or user.is_anonymous(): if not TRACK_ANONYMOUS_USERS: return response user = None # Force a save to generate a session key if one does not exist if not request.session.session_key: request.session.save() # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: visitor_user_agent = request.META.get('HTTP_USER_AGENT', None) if visitor_user_agent is not None: visitor_user_agent = visitor_user_agent.decode('latin-1', errors='ignore') # Log the ip address. Start time is managed via the # field `default` value visitor = Visitor(pk=session_key, ip_address=get_ip_address(request), user_agent=visitor_user_agent) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # Be conservative with the determining time on site since simply # increasing the session timeout could greatly skew results. This # is the only time we can guarantee. now = timezone.now() time_on_site = 0 if visitor.start_time: time_on_site = (now - visitor.start_time).seconds visitor.time_on_site = time_on_site visitor.save() if TRACK_PAGEVIEWS: # Match against `path_info` to not include the SCRIPT_NAME.. path = request.path_info.lstrip('/') for url in TRACK_IGNORE_URLS: if url.match(path): break else: referer = None query_string = None if TRACK_REFERER: referer = request.META.get('HTTP_REFERER', None) if TRACK_QUERY_STRING: query_string = request.META.get('QUERY_STRING') pageview = Pageview(visitor=visitor, url=request.path, view_time=now, method=request.method, referer=referer, query_string=query_string) pageview.save() return response
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '') if utils.user_agent_is_untracked( user_agent ): log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session'): # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest() # ensure that the request.path does not begin with any of the prefixes for prefix in NO_TRACKING_PREFIXES: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent[:255], last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent[:255] # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) : visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path[:255] visitor.page_views += 1 visitor.last_update = now visitor.save()
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = request.META.get('HTTP_USER_AGENT', '')[:255] # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = list(UntrackedUserAgent.objects.all()) cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if unicode(user_agent, errors='ignore').find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session'): if not request.session.session_key: request.session.save() # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() # Attributes we use when creating a new user new_attrs = { 'session_key': session_key, 'ip_address': ip_address } # If we have a visitor_id cookie, use it visitor_id = request.COOKIES.get('visitor_id') if visitor_id: attrs = {'id': visitor_id} else: attrs = new_attrs # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # add tracking ID to model if specified in the URL tid = request.GET.get('tid') or request.GET.get('fb_source') if tid: get = request.GET.copy() attrs['tid'] = tid request.GET = get visitor = Visitor(**new_attrs) log.debug('Created a new visitor: %s' % new_attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, update their referrer URL if not visitor.last_update: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) request.visitor = visitor request.session['visitor_id'] = visitor.pk
def process_request(self, request): # don't process AJAX requests if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\ or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } visitor_id = request.session.get('visitor_id', None) if not visitor_id: # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.only('id').get(**attrs) except Visitor.DoesNotExist: request.session.set_test_cookie() # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.only('id').filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) try: visitor.save() except DatabaseError: print_stack_trace() log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) except: return request.session['visitor_id'] = visitor_id = visitor.id redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}' visitor_data = json.loads(redis_data) visitor_data['visitor_id'] = visitor_id # update the tracking information visitor_data['user_agent'] = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1)) # TODO: ensure that we are on the same time zone - I just put UTC for now # to get it working last_update = visitor_data.get('last_update', None) if not last_update or last_update <= time.mktime(one_hour_ago.timetuple()): visitor_data['referrer'] = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor_data['page_views'] = 0 visitor_data['session_start'] = time.mktime(now.timetuple()) visitor_data['url'] = request.path page_views = visitor_data.get('page_views', 0) + 1 visitor_data['page_views'] = page_views visitor_data['last_update'] = time.mktime(now.timetuple()) try: # Extracting visitor data from GA cookie cookie = request.COOKIES.get('__utmz') if cookie: try: data = cookie.split('.', 4)[-1] data = dict(match.groups() for match in re.finditer( r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data)) except (ValueError, IndexError): log.error('Malformed GA cookie: {0!r}'.format(cookie)) else: visitor_data['source'] = normalize_ga_value(data.get('utmcsr')) visitor_data['medium'] = normalize_ga_value(data.get('utmcmd')) visitor_data['campaign'] = normalize_ga_value(data.get('utmccn')) visitor_data['keywords'] = normalize_ga_value(data.get('utm.ctr')) utm_source = request.GET.get("utm_source", "unknown") request.session['acquisition_source_name'] = utm_source if utm_source != "unknown": # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard. # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter. # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product. # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword. # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective. #update the tracking info with the latest and bump the old one to be stored in the history #visitor.bump_past_acquisition_info() past_acquisition_info = visitor_data.get('past_acquisition_info', []) if visitor_data.get('acquisition_source', None): old_visitor_data = {'date_valid_until': time.time()} for k in VISITOR_PARAMS_MAPPING.keys(): old_visitor_data[k] = visitor_data.get(k, None) past_acquisition_info.append(old_visitor_data) visitor_data['past_acquisition_info'] = past_acquisition_info for k,v in VISITOR_PARAMS_MAPPING.items(): value = request.GET.get(v, 'unknown')[:255] visitor_data[k] = value except: print_stack_trace() redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) #user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') user_agent = str(request.META.get('HTTP_USER_AGENT', '')[:255] ) # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = timezone.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent visitor.pv += 1 """ try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) """ #visitor.url = request.path """ pageview, created = PageView.objects.get_or_create(url=request.path, defaults={ 'visitor': visitor}) pageview.url = request.path pageview.pv = pageview.pv + 1 try: pageview.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals())) """ username = user.username if user else 'anonymous' log.info('time: {}, ip: {}, user: {}, url: {},'.format(now, ip_address, username, request.path))
def process_response(self, request, response): if OPEN_WHITE_IP: ip_list = WhiteList.objects.all().values_list('ip_address', flat=True) if get_ip_address(request) not in ip_list: print u"sorry you have no power" return HttpResponse(u"sorry you have no power") if request.path.startswith(reverse('login')): return response if request.path.startswith(reverse('logoff')): return response if request.path.startswith(reverse('recieve')): return response if request.path.startswith('/admin'): return response # Session framework not installed, nothing to see here.. if not hasattr(request, 'session'): return response # Do not track AJAX requests.. if request.is_ajax() and not TRACK_AJAX_REQUESTS: return response # Do not track if HTTP HttpResponse status_code blacklisted if response.status_code in TRACK_IGNORE_STATUS_CODES: return response # If dealing with a non-authenticated user, we still should track the # session since if authentication happens, the `session_key` carries # over, thus having a more accurate start time of session user = getattr(request, 'user', None) # Check for anonymous users # if cookie lost, redo login if not user or not isinstance(user, User): return HttpResponseRedirect(reverse("login")) # if not TRACK_ANONYMOUS_USERS: # return response # user = None #get user instance # user = User.objects.get(id=user.id) # Force a save to generate a session key if one does not exist if not request.session.session_key: request.session.save() # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the # field `default` value visitor = Visitor(pk=session_key, ip_address=get_ip_address(request), user_agent=request.META.get('HTTP_USER_AGENT', None)) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # Be conservative with the determining time on site since simply # increasing the session timeout could greatly skew results. This # is the only time we can guarantee. now = timezone.now() time_on_site = 0 if visitor.start_time: time_on_site = (now - visitor.start_time).seconds visitor.time_on_site = time_on_site visitor.save() if TRACK_PAGEVIEWS: # Match against `path_info` to not include the SCRIPT_NAME.. path = request.path_info.lstrip('/') for url in TRACK_IGNORE_URLS: if url.match(path): break else: pageview = Pageview(visitor=visitor, url=request.path, view_time=now, method=request.method) pageview.save() return response
def process_request(self, request): try: # don't process AJAX requests if request.is_ajax(): return # create some useful variables session_key = request.session.session_key ip_address = request.META.get('REMOTE_ADDR', '') user_agent = request.META.get('HTTP_USER_AGENT', '') # see if the user agent is not supposed to be tracked for ua in UntrackedUserAgent.objects.all(): # if the keyword is found in the user agent, stop tracking if str(user_agent).find(ua.keyword) != -1: return prefixes = utils.get_untracked_prefixes() # don't track media files prefixes.append(settings.MEDIA_URL) prefixes.append(settings.ADMIN_MEDIA_PREFIX) # ensure that the request.path does not begin with any of the prefixes validURL = True for prefix in prefixes: if request.path.startswith(prefix): validURL = False break # if the URL needs to be tracked, track it! if validURL: # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: try: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitor = Visitor.objects.get( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) visitor.session_key = session_key except Visitor.DoesNotExist: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now + timedelta(hours=-1) if not visitor.last_update or \ visitor.last_update <= one_hour_ago: visitor.referrer = request.META.get('HTTP_REFERER', 'unknown') # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now visitor.save() except: pass
def process_request(self, request): # don't process AJAX requests if request.is_ajax(): return # create some useful variables ip_address = utils.get_ip(request) user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore') # retrieve untracked user agents from cache ua_key = '_tracking_untracked_uas' untracked = cache.get(ua_key) if untracked is None: log.info('Updating untracked user agent cache') untracked = UntrackedUserAgent.objects.all() cache.set(ua_key, untracked, 3600) # see if the user agent is not supposed to be tracked for ua in untracked: # if the keyword is found in the user agent, stop tracking if user_agent.find(ua.keyword) != -1: log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword)) return if hasattr(request, 'session') and request.session.session_key: # use the current session key if we can session_key = request.session.session_key else: # otherwise just fake a session key session_key = '%s:%s' % (ip_address, user_agent) session_key = session_key[:40] # ensure that the request.path does not begin with any of the prefixes for prefix in self.prefixes: if request.path.startswith(prefix): log.debug('Not tracking request to: %s' % request.path) return # if we get here, the URL needs to be tracked # determine what time it is now = datetime.now() attrs = { 'session_key': session_key, 'ip_address': ip_address } # for some reason, Visitor.objects.get_or_create was not working here try: visitor = Visitor.objects.get(**attrs) except Visitor.DoesNotExist: # see if there's a visitor with the same IP and user agent # within the last 5 minutes cutoff = now - timedelta(minutes=5) visitors = Visitor.objects.filter( ip_address=ip_address, user_agent=user_agent, last_update__gte=cutoff ) if len(visitors): visitor = visitors[0] visitor.session_key = session_key log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id)) else: # it's probably safe to assume that the visitor is brand new visitor = Visitor(**attrs) log.debug('Created a new visitor: %s' % attrs) except: return # determine whether or not the user is logged in user = request.user if isinstance(user, AnonymousUser): user = None # update the tracking information visitor.user = user visitor.user_agent = user_agent # if the visitor record is new, or the visitor hasn't been here for # at least an hour, update their referrer URL one_hour_ago = now - timedelta(hours=1) if not visitor.last_update or visitor.last_update <= one_hour_ago: visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255]) # reset the number of pages they've been to visitor.page_views = 0 visitor.session_start = now visitor.url = request.path visitor.page_views += 1 visitor.last_update = now try: visitor.save() except DatabaseError: log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
def process_response(self, request, response): # Session framework not installed, nothing to see here.. if not hasattr(request, 'session'): return response # Do not track AJAX requests.. if request.is_ajax() and not TRACK_AJAX_REQUESTS: return response # Do not track if HTTP HttpResponse status_code blacklisted if response.status_code in TRACK_IGNORE_STATUS_CODES: return response # If dealing with a non-authenticated user, we still should track the # session since if authentication happens, the `session_key` carries # over, thus having a more accurate start time of session user = getattr(request, 'user', None) # Check for anonymous users if not user or user.is_anonymous(): if not TRACK_ANONYMOUS_USERS: return response user = None # Force a save to generate a session key if one does not exist if not request.session.session_key: request.session.save() # A Visitor row is unique by session_key session_key = request.session.session_key try: visitor = Visitor.objects.get(pk=session_key) except Visitor.DoesNotExist: # Log the ip address. Start time is managed via the # field `default` value visitor = Visitor(pk=session_key, ip_address=get_ip_address(request), user_agent=request.META.get( 'HTTP_USER_AGENT', None)) # Update the user field if the visitor user is not set. This # implies authentication has occured on this request and now # the user is object exists. Check using `user_id` to prevent # a database hit. if user and not visitor.user_id: visitor.user = user visitor.expiry_age = request.session.get_expiry_age() visitor.expiry_time = request.session.get_expiry_date() # Be conservative with the determining time on site since simply # increasing the session timeout could greatly skew results. This # is the only time we can guarantee. now = timezone.now() time_on_site = 0 if visitor.start_time: time_on_site = (now - visitor.start_time).seconds visitor.time_on_site = time_on_site visitor.save() if TRACK_PAGEVIEWS: # Match against `path_info` to not include the SCRIPT_NAME.. path = request.path_info.lstrip('/') for url in TRACK_IGNORE_URLS: if url.match(path): break else: referer = None if TRACK_REFERER: referer = request.META.get('HTTP_REFERER', None) pageview = Pageview(visitor=visitor, url=request.path, view_time=now, method=request.method, referer=referer) pageview.save() return response