def extractor(self, line):
        info=TongjicnzzRequestParser.parse(line.request)
        if not info:return
        kind=info['kind']
        site = ''
        if kind=='keywords_log':
            try:
                try:keyword=unquote(info['keyword'].encode('utf-8')).decode('utf-8')
                except UnicodeDecodeError:keyword=unquote(info['keyword'].encode('utf-8')).decode('gbk')
            # print keyword
            except UnicodeDecodeError:return
            if keyword and info.get('site'):
                site = info['site']
                if site=='taobao':
                    self.data['tmall']['keyword'].setdefault(keyword,0)
                    self.data['tmall']['keyword'][keyword]+=1
                if site in self.sites:
                    self.data[site]['keyword'].setdefault(keyword,0)
                    self.data[site]['keyword'][keyword]+=1
        elif kind=='visit':
            if not info.get('action') or len(info['action'])<2:return
            # action,site=info['action'][0],info['action'][1]
            for i in info['action']:
                if i in self.sites:
                    site=i
                    break
            if site:
                ip=line.remote_addr
                location=Ip_Locator.locate(ip)
                location=location[0] if location else ''
                action=info['action'][0]
                userid=info.get('userid')
                if userid:
                    self.data[site]['uv'].add(userid)
                    if '北京'in location:
                        self.data[site]['beijing_uv'].add(userid)
                if action=='page_view':
                    self.data[site]['pv']+=1
                    if '北京'in location:
                        self.data[site]['beijing_pv']+=1
                elif action=='category':
                    self.data[site]['category'].setdefault(info['action'][2],0)
                    self.data[site]['category'][info['action'][2]]+=1
                    if '北京'in location:
                        self.data[site]['beijing_category'].setdefault(info['action'][2], dict(url=line.http_referer,count=0))
                        self.data[site]['beijing_category'][info['action'][2]]['count'] += 1
                elif action=='product_page_visit':
                    self.data[site]['product_page']+=1
                elif action=='click' and info['action'][1]=='collection_click':
                    self.data[site]['collection']+=1
                elif action=='productaddcart_success_page' or action=='otheraddcart_success_page':
                    self.data[site]['addcart']+=1
                elif action=='productcart_page_visit' or action=='othercart_page_visit':
                    self.data[site]['cart']+=1
                elif action=='productorderwrite_page_visit' or action=='otherorderwrite_page_visit':
                    self.data[site]['orderwrite']+=1
                elif action=='productordersuccess_page_visit' or action=='otherordersuccess_page_visit':
                    self.data[site]['ordersuccess']+=1
                elif action=='productvip_page_visit' or action=='othervip_page_visit':
                    self.data[site]['vip_pv']+=1
                    self.data[site]['vip_uv'].add(info['userid'])
        if site :
            if info.get('refer')==None:
                return
            elif info.get('refer')=='':
                self.data[site]['referer'].setdefault('direct_or_other',0)
                self.data[site]['referer']['direct_or_other']+=1
            else:
                # print info['refer']
                referer=Domain_Parser.parse(unquote(info['refer'].replace('-','%')).strip())

                if referer:
                    if site=='360buy' and referer.original_url=='u.gwdang.com':return
                    self.data[site]['referer'].setdefault(referer.SLD,0)
                    self.data[site]['referer'][referer.SLD]+=1
                else:
                    self.data[site]['referer'].setdefault('direct_or_other', 0)
                    self.data[site]['referer']['direct_or_other'] += 1
Example #2
0
 def extractor(self, line):
     ip=line.remote_addr
     location=Ip_Locator.locate(ip)
     if location:
         self.data.setdefault(location,0)
         self.data[location]+=1