예제 #1
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user_id = user.id

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(user_agent,
                                            encoding='latin-1',
                                            errors='ignore')

        # grab the source param and store it
        source = request.GET.get('source', None)
        if source:
            visitor.source = source

        # grab the medium param and store it
        medium = request.GET.get('medium', None)
        if medium:
            visitor.medium = medium

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        try:
            with transaction.atomic():
                visitor.save()
        except IntegrityError:
            # there is a small chance a second response has saved this
            # Visitor already and a second save() at the same time (having
            # failed to UPDATE anything) will attempt to INSERT the same
            # session key (pk) again causing an IntegrityError
            # If this happens we'll just grab the "winner" and use that!
            visitor = Visitor.objects.get(pk=session_key)

        return visitor
예제 #2
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(user_agent,
                                            encoding='latin-1',
                                            errors='ignore')

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        visitor.save()
        return visitor
예제 #3
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
예제 #4
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255],
                             errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' %
                          (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {'session_key': session_key, 'ip_address': ip_address}

        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff)

                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' %
                              (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error(
                        'There was a problem saving visitor information:\n%s\n\n%s'
                        % (traceback.format_exc(), locals()))
            except:
                return

            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(
                one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(
                request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(
                        data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(
                        data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(
                        data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(
                        data.get('utm.ctr'))

            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history

                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get(
                    'past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data[
                        'past_acquisition_info'] = past_acquisition_info
                for k, v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
예제 #5
0
    def process_response(self, request, response):
        # Session framework not installed, nothing to see here..
        if not hasattr(request, 'session'):
            return response

        # Do not track AJAX requests..
        if request.is_ajax() and not TRACK_AJAX_REQUESTS:
            return response

        # Do not track if HTTP HttpResponse status_code blacklisted
        if response.status_code in TRACK_IGNORE_STATUS_CODES:
            return response

        # If dealing with a non-authenticated user, we still should track the
        # session since if authentication happens, the `session_key` carries
        # over, thus having a more accurate start time of session
        user = getattr(request, 'user', None)

        # Check for anonymous users
        if not user or user.is_anonymous():
            if not TRACK_ANONYMOUS_USERS:
                return response
            user = None

        # Force a save to generate a session key if one does not exist
        if not request.session.session_key:
            request.session.save()

        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the
            # field `default` value
            visitor = Visitor(pk=session_key,
                              ip_address=get_ip_address(request),
                              user_agent=request.META.get(
                                  'HTTP_USER_AGENT', None))

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # Be conservative with the determining time on site since simply
        # increasing the session timeout could greatly skew results. This
        # is the only time we can guarantee.
        now = timezone.now()
        time_on_site = 0
        if visitor.start_time:
            time_on_site = (now - visitor.start_time).seconds
        visitor.time_on_site = time_on_site

        visitor.save()

        if TRACK_PAGEVIEWS:
            # Match against `path_info` to not include the SCRIPT_NAME..
            path = request.path_info.lstrip('/')
            for url in TRACK_IGNORE_URLS:
                if url.match(path):
                    break
            else:
                referer = None
                if TRACK_REFERER:
                    referer = request.META.get('HTTP_REFERER', None)

                pageview = Pageview(visitor=visitor,
                                    url=request.path,
                                    view_time=now,
                                    method=request.method,
                                    referer=referer)
                pageview.save()

        return response
예제 #6
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')

	if utils.user_agent_is_untracked( user_agent ):
          log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
          return

        if hasattr(request, 'session'):
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest()

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in NO_TRACKING_PREFIXES:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent[:255],
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent[:255]

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) :
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path[:255]
        visitor.page_views += 1
        visitor.last_update = now
        visitor.save()