예제 #1
0
 def _create_visitor(self, user, page_views, ip_address):
     visitor = Visitor(
         user=user,
         url="/",
         page_views=page_views,
         session_start=datetime.datetime.now(),
         last_update=datetime.datetime.now(),
         ip_address = ip_address,
         session_key = random.randrange(1000000, 9999999999, 2)
         )
     visitor.save()
     return visitor
예제 #2
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)
            if TRACK_USING_GEOIP:
                visitor.geoip_data = get_geo_ip_data(visitor.ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user_id = user.id

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(user_agent,
                                            encoding='latin-1',
                                            errors='ignore')

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        try:
            with transaction.atomic():
                visitor.save()
        except IntegrityError:
            # there is a small chance a second response has saved this
            # Visitor already and a second save() at the same time (having
            # failed to UPDATE anything) will attempt to INSERT the same
            # session key (pk) again causing an IntegrityError
            # If this happens we'll just grab the "winner" and use that!
            visitor = Visitor.objects.get(pk=session_key)

        return visitor
예제 #3
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user_id = user.id

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(
                user_agent, encoding='latin-1', errors='ignore')

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        try:
            with transaction.atomic():
                visitor.save()
        except IntegrityError:
            # there is a small chance a second response has saved this
            # Visitor already and a second save() at the same time (having
            # failed to UPDATE anything) will attempt to INSERT the same
            # session key (pk) again causing an IntegrityError
            # If this happens we'll just grab the "winner" and use that!
            visitor = Visitor.objects.get(pk=session_key)

        return visitor
예제 #4
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(user_agent,
                                            encoding='latin-1',
                                            errors='ignore')

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        visitor.save()
        return visitor
예제 #5
0
    def process_response(self, request, response):
        if not hasattr(request, "session"):
            return response

        # Do not track AJAX requests..
        if request.is_ajax() and not TRACK_AJAX_REQUESTS:
            return response

        # If dealing with a non-authenticated user, we still should track the
        # session since if authentication happens, the `session_key` carries
        # over, thus having a more accurate start time of session

        user = getattr(request, "user", None)
        # We cannot do anything with Anonymous users
        if user and not user.is_authenticated():
            user = None

        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(session_key=session_key)
            # Update the user field if the visitor user is not set. This
            # implies authentication has occured on this request and now
            # the user is object exists. Check using `user_id` to prevent
            # a database hit.
            if user and not visitor.user_id:
                visitor.user = user
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the
            # field `default` value
            visitor = Visitor(
                session_key=session_key,
                ip_address=get_ip_address(request),
                user_agent=request.META.get("HTTP_USER_AGENT", None),
            )

        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # Be conservative with the determining time on site since simply
        # increasing the session timeout could greatly skew results. This
        # is the only time we can guarantee.
        time_on_site = 0
        if visitor.start_time:
            time_on_site = (datetime.now() - visitor.start_time).seconds
        visitor.time_on_site = time_on_site

        visitor.save()

        return response
예제 #6
0
    def _refresh_visitor(self, user, request, visit_time):
        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the field
            # `default` value
            ip_address = get_ip_address(request)
            visitor = Visitor(pk=session_key, ip_address=ip_address)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        # update some session expiration details
        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # grab the latest User-Agent and store it
        user_agent = request.META.get('HTTP_USER_AGENT', None)
        if user_agent:
            visitor.user_agent = smart_text(
                user_agent, encoding='latin-1', errors='ignore')

        time_on_site = 0
        if visitor.start_time:
            time_on_site = total_seconds(visit_time - visitor.start_time)
        visitor.time_on_site = int(time_on_site)

        visitor.save()
        return visitor
예제 #7
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255],
                             errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' %
                          (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {'session_key': session_key, 'ip_address': ip_address}

        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff)

                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' %
                              (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error(
                        'There was a problem saving visitor information:\n%s\n\n%s'
                        % (traceback.format_exc(), locals()))
            except:
                return

            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(
                one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(
                request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(
                        data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(
                        data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(
                        data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(
                        data.get('utm.ctr'))

            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history

                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get(
                    'past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data[
                        'past_acquisition_info'] = past_acquisition_info
                for k, v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
예제 #8
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = right_now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
예제 #9
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')[:255]

        # see if the user agent is not supposed to be tracked
        for ua in UntrackedUserAgent.objects.all():
            # if the keyword is found in the user agent, stop tracking
            if str(user_agent).find(ua.keyword) != -1:
                return

        if hasattr(request, 'session'):
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)

        prefixes = utils.get_untracked_prefixes()

        # don't track media file requests
        if settings.MEDIA_URL and settings.MEDIA_URL != '/':
            prefixes.append(settings.MEDIA_URL)
        if settings.ADMIN_MEDIA_PREFIX:
            prefixes.append(settings.ADMIN_MEDIA_PREFIX)

        try:
            # finally, don't track requests to the tracker update pages
            prefixes.append(reverse('tracking-refresh-active-users'))
        except NoReverseMatch:
            # django-tracking hasn't been included in the URLconf if we get here
            pass

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in prefixes:
            if request.path.startswith(prefix):
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
                    'session_key': session_key,
                    'ip_address': ip_address
                }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            filter_params = {
                    'ip_address': ip_address,
                    'user_agent': user_agent,
                    'last_update__gte': cutoff
                }
            try:
                visitor = Visitor.objects.get(**filter_params)
                visitor.session_key = session_key
            except Visitor.MultipleObjectsReturned:
                # just get the first match
                visitor = Visitor.objects.filter(**filter_params)[0]
                visitor.session_key = session_key
            except Visitor.DoesNotExist:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = request.META.get('HTTP_REFERER', 'unknown')[:255]

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        visitor.save()
예제 #10
0
    def process_response(self, request, response):
        # Session framework not installed, nothing to see here..
        if not hasattr(request, 'session'):
            return response

        # Do not track AJAX requests..
        if request.is_ajax() and not TRACK_AJAX_REQUESTS:
            return response

        # Do not track if HTTP HttpResponse status_code blacklisted
        if response.status_code in TRACK_IGNORE_STATUS_CODES:
            return response

        # If dealing with a non-authenticated user, we still should track the
        # session since if authentication happens, the `session_key` carries
        # over, thus having a more accurate start time of session
        user = getattr(request, 'user', None)

        # Check for anonymous users
        if not user or user.is_anonymous():
            if not TRACK_ANONYMOUS_USERS:
                return response
            user = None

        # Force a save to generate a session key if one does not exist
        if not request.session.session_key:
            request.session.save()

        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            visitor_user_agent = request.META.get('HTTP_USER_AGENT', None)
            if visitor_user_agent is not None:
                visitor_user_agent = visitor_user_agent.decode('latin-1', errors='ignore')
            # Log the ip address. Start time is managed via the
            # field `default` value
            visitor = Visitor(pk=session_key, ip_address=get_ip_address(request),
                user_agent=visitor_user_agent)

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # Be conservative with the determining time on site since simply
        # increasing the session timeout could greatly skew results. This
        # is the only time we can guarantee.
        now = timezone.now()
        time_on_site = 0
        if visitor.start_time:
            time_on_site = (now - visitor.start_time).seconds
        visitor.time_on_site = time_on_site

        visitor.save()

        if TRACK_PAGEVIEWS:
            # Match against `path_info` to not include the SCRIPT_NAME..
            path = request.path_info.lstrip('/')
            for url in TRACK_IGNORE_URLS:
                if url.match(path):
                    break
            else:
                referer = None
                query_string = None

                if TRACK_REFERER:
                    referer = request.META.get('HTTP_REFERER', None)

                if TRACK_QUERY_STRING:
                    query_string = request.META.get('QUERY_STRING')

                pageview = Pageview(visitor=visitor, url=request.path,
                    view_time=now, method=request.method, referer=referer, query_string=query_string)
                pageview.save()

        return response
예제 #11
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')

	if utils.user_agent_is_untracked( user_agent ):
          log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
          return

        if hasattr(request, 'session'):
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = md5( '%s:%s' % ( ip_address, user_agent ) ).hexdigest()

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in NO_TRACKING_PREFIXES:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent[:255],
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent[:255]

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        if not visitor.last_update or visitor.last_update <= ( now - timedelta( hours = 1 ) ) :
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path[:255]
        visitor.page_views += 1
        visitor.last_update = now
        visitor.save()
예제 #12
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = request.META.get('HTTP_USER_AGENT', '')[:255]

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = list(UntrackedUserAgent.objects.all())
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if unicode(user_agent, errors='ignore').find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session'):
            if not request.session.session_key:
                request.session.save()
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        # Attributes we use when creating a new user
        new_attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # If we have a visitor_id cookie, use it
        visitor_id = request.COOKIES.get('visitor_id')

        if visitor_id:
            attrs = {'id': visitor_id}
        else:
            attrs = new_attrs

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # add tracking ID to model if specified in the URL
            tid = request.GET.get('tid') or request.GET.get('fb_source')
            if tid:
                get = request.GET.copy()
                attrs['tid'] = tid
                request.GET = get

            visitor = Visitor(**new_attrs)
            log.debug('Created a new visitor: %s' % new_attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, update their referrer URL
        if not visitor.last_update:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))

        request.visitor = visitor
        request.session['visitor_id'] = visitor.pk
예제 #13
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.path.startswith("/s/") or request.path.startswith("/static/") or request.path.startswith("/admin/")\
             or request.path.startswith("/favicon.ico") or (request.is_ajax() and not request.path.startswith('/o/')):
            return
        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }
        
        visitor_id = request.session.get('visitor_id', None)
        if not visitor_id:
            # for some reason, Visitor.objects.get_or_create was not working here
            try:
                visitor = Visitor.objects.only('id').get(**attrs)
            except Visitor.DoesNotExist:
                request.session.set_test_cookie()
                # see if there's a visitor with the same IP and user agent
                # within the last 5 minutes
                cutoff = now - timedelta(minutes=5)
                visitors = Visitor.objects.only('id').filter(
                    ip_address=ip_address,
                    user_agent=user_agent,
                    last_update__gte=cutoff
                )
    
                if len(visitors):
                    visitor = visitors[0]
                    visitor.session_key = session_key
                    log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
                else:
                    # it's probably safe to assume that the visitor is brand new
                    visitor = Visitor(**attrs)
                    log.debug('Created a new visitor: %s' % attrs)
                try:
                    visitor.save()
                except DatabaseError:
                    print_stack_trace()
                    log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
            except:
                return
            
            request.session['visitor_id'] = visitor_id = visitor.id

        redis_data = redis.get('visitor_data_%s' % visitor_id) or '{}'
        visitor_data = json.loads(redis_data)
        visitor_data['visitor_id'] = visitor_id

        # update the tracking information
        visitor_data['user_agent'] = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = pytz.UTC.localize(now - timedelta(hours=1))
        # TODO: ensure that we are on the same time zone - I just put UTC for now
        # to get it working
        last_update = visitor_data.get('last_update', None)
        if not last_update or last_update <= time.mktime(one_hour_ago.timetuple()):
            visitor_data['referrer'] = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor_data['page_views'] = 0
            visitor_data['session_start'] = time.mktime(now.timetuple())

        visitor_data['url'] = request.path
        page_views = visitor_data.get('page_views', 0) + 1
        visitor_data['page_views'] = page_views
        visitor_data['last_update'] = time.mktime(now.timetuple())

        try:
            # Extracting visitor data from GA cookie
            cookie = request.COOKIES.get('__utmz')
            if cookie:
                try:
                    data = cookie.split('.', 4)[-1]
                    data = dict(match.groups() for match in re.finditer(
                        r'(utm(?:csr|ccn|cmd|ctr))=([^\|]*)', data))
                except (ValueError, IndexError):
                    log.error('Malformed GA cookie: {0!r}'.format(cookie))
                else:
                    visitor_data['source'] = normalize_ga_value(data.get('utmcsr'))
                    visitor_data['medium'] = normalize_ga_value(data.get('utmcmd'))
                    visitor_data['campaign'] = normalize_ga_value(data.get('utmccn'))
                    visitor_data['keywords'] = normalize_ga_value(data.get('utm.ctr'))
            
            utm_source = request.GET.get("utm_source", "unknown")
            request.session['acquisition_source_name'] = utm_source

            if utm_source != "unknown":
                # utm_source: Identify the advertiser, site, publication, etc. that is sending traffic to your property, e.g. google, citysearch, newsletter4, billboard.
                # utm_medium: The advertising or marketing medium, e.g.: cpc, banner, email newsletter.
                # utm_campaign: The individual campaign name, slogan, promo code, etc. for a product.
                # utm_term: Identify paid search keywords. If you're manually tagging paid keyword campaigns, you should also use utm_term to specify the keyword.
                # utm_content: Used to differentiate similar content, or links within the same ad. For example, if you have two call-to-action links within the same email message, you can use utm_content and set different values for each so you can tell which version is more effective.

                #update the tracking info with the latest and bump the old one to be stored in the history
                
                #visitor.bump_past_acquisition_info()
                past_acquisition_info = visitor_data.get('past_acquisition_info', [])
                if visitor_data.get('acquisition_source', None):
                    old_visitor_data = {'date_valid_until': time.time()}
                    for k in VISITOR_PARAMS_MAPPING.keys():
                        old_visitor_data[k] = visitor_data.get(k, None)
                    past_acquisition_info.append(old_visitor_data)
                    visitor_data['past_acquisition_info'] = past_acquisition_info
                for k,v in VISITOR_PARAMS_MAPPING.items():
                    value = request.GET.get(v, 'unknown')[:255]
                    visitor_data[k] = value

        except:
            print_stack_trace()
        redis.set('visitor_data_%s' % visitor_id, json.dumps(visitor_data))
예제 #14
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        #user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')
        user_agent = str(request.META.get('HTTP_USER_AGENT', '')[:255] )

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = timezone.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            visitor = Visitor(**attrs)
            log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent
        visitor.pv += 1
        """
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
        """

        #visitor.url = request.path
        """
        pageview, created = PageView.objects.get_or_create(url=request.path, defaults={
            'visitor': visitor})
        pageview.url = request.path
        pageview.pv = pageview.pv + 1
        try:
            pageview.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
        """
        username = user.username if user else 'anonymous'
        log.info('time: {}, ip: {}, user: {}, url: {},'.format(now, ip_address, username, request.path))
예제 #15
0
    def process_response(self, request, response):
        
        if OPEN_WHITE_IP:
            ip_list = WhiteList.objects.all().values_list('ip_address', flat=True)
            if get_ip_address(request) not in ip_list:
                print u"sorry you have no power"
                return HttpResponse(u"sorry you have no power")
        if request.path.startswith(reverse('login')):
            return response
        if request.path.startswith(reverse('logoff')):
            return response
        if request.path.startswith(reverse('recieve')):
            return response
        if request.path.startswith('/admin'):
            return response
        # Session framework not installed, nothing to see here..
        if not hasattr(request, 'session'):
            return response

        # Do not track AJAX requests..
        if request.is_ajax() and not TRACK_AJAX_REQUESTS:
            return response

        # Do not track if HTTP HttpResponse status_code blacklisted
        if response.status_code in TRACK_IGNORE_STATUS_CODES:
            return response

        # If dealing with a non-authenticated user, we still should track the
        # session since if authentication happens, the `session_key` carries
        # over, thus having a more accurate start time of session
        user = getattr(request, 'user', None)
        # Check for anonymous users
        # if cookie lost, redo login
        if not user or not isinstance(user, User):
            return HttpResponseRedirect(reverse("login"))
            # if not TRACK_ANONYMOUS_USERS:
            #     return response
            # user = None
        #get user instance
        # user = User.objects.get(id=user.id)
        # Force a save to generate a session key if one does not exist
        if not request.session.session_key:
            request.session.save()

        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the
            # field `default` value
            visitor = Visitor(pk=session_key, ip_address=get_ip_address(request),
                user_agent=request.META.get('HTTP_USER_AGENT', None))

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.

        if user and not visitor.user_id:
            visitor.user = user

        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # Be conservative with the determining time on site since simply
        # increasing the session timeout could greatly skew results. This
        # is the only time we can guarantee.
        now = timezone.now()
        time_on_site = 0
        if visitor.start_time:
            time_on_site = (now - visitor.start_time).seconds
        visitor.time_on_site = time_on_site

        visitor.save()

        if TRACK_PAGEVIEWS:
            # Match against `path_info` to not include the SCRIPT_NAME..
            path = request.path_info.lstrip('/')
            for url in TRACK_IGNORE_URLS:
                if url.match(path):
                    break
            else:
                pageview = Pageview(visitor=visitor, url=request.path,
                    view_time=now, method=request.method)
                pageview.save()

        return response
예제 #16
0
 def process_request(self, request):
     try:
         # don't process AJAX requests
         if request.is_ajax():
             return
 
         # create some useful variables
         session_key = request.session.session_key
         ip_address = request.META.get('REMOTE_ADDR', '')
         user_agent = request.META.get('HTTP_USER_AGENT', '')
 
         # see if the user agent is not supposed to be tracked
         for ua in UntrackedUserAgent.objects.all():
             # if the keyword is found in the user agent, stop tracking
             if str(user_agent).find(ua.keyword) != -1:
                 return
 
         prefixes = utils.get_untracked_prefixes()
         # don't track media files
         prefixes.append(settings.MEDIA_URL)
         prefixes.append(settings.ADMIN_MEDIA_PREFIX)
 
         # ensure that the request.path does not begin with any of the prefixes
         validURL = True
         for prefix in prefixes:
             if request.path.startswith(prefix):
                 validURL = False
                 break
 
         # if the URL needs to be tracked, track it!
         if validURL:
             # determine what time it is
             now = datetime.now()
 
             attrs = {
                         'session_key': session_key,
                         'ip_address': ip_address
                     }
 
             # for some reason, Visitor.objects.get_or_create was not working here
             try:
                 visitor = Visitor.objects.get(**attrs)
             except Visitor.DoesNotExist:
                 try:
                     # see if there's a visitor with the same IP and user agent
                     # within the last 5 minutes
                     cutoff = now - timedelta(minutes=5)
                     visitor = Visitor.objects.get(
                                     ip_address=ip_address,
                                     user_agent=user_agent,
                                     last_update__gte=cutoff
                                 )
                     visitor.session_key = session_key
                 except Visitor.DoesNotExist:
                     # it's probably safe to assume that the visitor is brand new
                     visitor = Visitor(**attrs)
 
             # determine whether or not the user is logged in
             user = request.user
             if isinstance(user, AnonymousUser):
                 user = None
 
             # update the tracking information
             visitor.user = user
             visitor.user_agent = user_agent
 
             # if the visitor record is new, or the visitor hasn't been here for
             # at least an hour, update their referrer URL
             one_hour_ago = now + timedelta(hours=-1)
             if not visitor.last_update or \
                 visitor.last_update <= one_hour_ago:
                 visitor.referrer = request.META.get('HTTP_REFERER', 'unknown')
 
                 # reset the number of pages they've been to
                 visitor.page_views = 0
                 visitor.session_start = now
 
             visitor.url = request.path
             visitor.page_views += 1
             visitor.last_update = now
             visitor.save()
     except:
         pass
예제 #17
0
    def process_request(self, request):
        # don't process AJAX requests
        if request.is_ajax(): return

        # create some useful variables
        ip_address = utils.get_ip(request)
        user_agent = unicode(request.META.get('HTTP_USER_AGENT', '')[:255], errors='ignore')

        # retrieve untracked user agents from cache
        ua_key = '_tracking_untracked_uas'
        untracked = cache.get(ua_key)
        if untracked is None:
            log.info('Updating untracked user agent cache')
            untracked = UntrackedUserAgent.objects.all()
            cache.set(ua_key, untracked, 3600)

        # see if the user agent is not supposed to be tracked
        for ua in untracked:
            # if the keyword is found in the user agent, stop tracking
            if user_agent.find(ua.keyword) != -1:
                log.debug('Not tracking UA "%s" because of keyword: %s' % (user_agent, ua.keyword))
                return

        if hasattr(request, 'session') and request.session.session_key:
            # use the current session key if we can
            session_key = request.session.session_key
        else:
            # otherwise just fake a session key
            session_key = '%s:%s' % (ip_address, user_agent)
            session_key = session_key[:40]

        # ensure that the request.path does not begin with any of the prefixes
        for prefix in self.prefixes:
            if request.path.startswith(prefix):
                log.debug('Not tracking request to: %s' % request.path)
                return

        # if we get here, the URL needs to be tracked
        # determine what time it is
        now = datetime.now()

        attrs = {
            'session_key': session_key,
            'ip_address': ip_address
        }

        # for some reason, Visitor.objects.get_or_create was not working here
        try:
            visitor = Visitor.objects.get(**attrs)
        except Visitor.DoesNotExist:
            # see if there's a visitor with the same IP and user agent
            # within the last 5 minutes
            cutoff = now - timedelta(minutes=5)
            visitors = Visitor.objects.filter(
                ip_address=ip_address,
                user_agent=user_agent,
                last_update__gte=cutoff
            )

            if len(visitors):
                visitor = visitors[0]
                visitor.session_key = session_key
                log.debug('Using existing visitor for IP %s / UA %s: %s' % (ip_address, user_agent, visitor.id))
            else:
                # it's probably safe to assume that the visitor is brand new
                visitor = Visitor(**attrs)
                log.debug('Created a new visitor: %s' % attrs)
        except:
            return

        # determine whether or not the user is logged in
        user = request.user
        if isinstance(user, AnonymousUser):
            user = None

        # update the tracking information
        visitor.user = user
        visitor.user_agent = user_agent

        # if the visitor record is new, or the visitor hasn't been here for
        # at least an hour, update their referrer URL
        one_hour_ago = now - timedelta(hours=1)
        if not visitor.last_update or visitor.last_update <= one_hour_ago:
            visitor.referrer = utils.u_clean(request.META.get('HTTP_REFERER', 'unknown')[:255])

            # reset the number of pages they've been to
            visitor.page_views = 0
            visitor.session_start = now

        visitor.url = request.path
        visitor.page_views += 1
        visitor.last_update = now
        try:
            visitor.save()
        except DatabaseError:
            log.error('There was a problem saving visitor information:\n%s\n\n%s' % (traceback.format_exc(), locals()))
예제 #18
0
    def process_response(self, request, response):
        # Session framework not installed, nothing to see here..
        if not hasattr(request, 'session'):
            return response

        # Do not track AJAX requests..
        if request.is_ajax() and not TRACK_AJAX_REQUESTS:
            return response

        # Do not track if HTTP HttpResponse status_code blacklisted
        if response.status_code in TRACK_IGNORE_STATUS_CODES:
            return response

        # If dealing with a non-authenticated user, we still should track the
        # session since if authentication happens, the `session_key` carries
        # over, thus having a more accurate start time of session
        user = getattr(request, 'user', None)

        # Check for anonymous users
        if not user or user.is_anonymous():
            if not TRACK_ANONYMOUS_USERS:
                return response
            user = None

        # Force a save to generate a session key if one does not exist
        if not request.session.session_key:
            request.session.save()

        # A Visitor row is unique by session_key
        session_key = request.session.session_key

        try:
            visitor = Visitor.objects.get(pk=session_key)
        except Visitor.DoesNotExist:
            # Log the ip address. Start time is managed via the
            # field `default` value
            visitor = Visitor(pk=session_key,
                              ip_address=get_ip_address(request),
                              user_agent=request.META.get(
                                  'HTTP_USER_AGENT', None))

        # Update the user field if the visitor user is not set. This
        # implies authentication has occured on this request and now
        # the user is object exists. Check using `user_id` to prevent
        # a database hit.
        if user and not visitor.user_id:
            visitor.user = user

        visitor.expiry_age = request.session.get_expiry_age()
        visitor.expiry_time = request.session.get_expiry_date()

        # Be conservative with the determining time on site since simply
        # increasing the session timeout could greatly skew results. This
        # is the only time we can guarantee.
        now = timezone.now()
        time_on_site = 0
        if visitor.start_time:
            time_on_site = (now - visitor.start_time).seconds
        visitor.time_on_site = time_on_site

        visitor.save()

        if TRACK_PAGEVIEWS:
            # Match against `path_info` to not include the SCRIPT_NAME..
            path = request.path_info.lstrip('/')
            for url in TRACK_IGNORE_URLS:
                if url.match(path):
                    break
            else:
                referer = None
                if TRACK_REFERER:
                    referer = request.META.get('HTTP_REFERER', None)

                pageview = Pageview(visitor=visitor,
                                    url=request.path,
                                    view_time=now,
                                    method=request.method,
                                    referer=referer)
                pageview.save()

        return response