def extractor(self, line): info=TongjicnzzRequestParser.parse(line.request) if not info: return if info.get('userid') and 'aoyou'in info['userid']: parsed_referer=Domain_Parser.parse(line.http_referer) if parsed_referer: self.data['aoyou']['pv']+=1 self.data['aoyou']['uv'].add(info['userid']) if info.get('refer') == None: return elif info.get('refer') == '': self.data['aoyou']['referer']['direct_or_other'] += 1 else: # print info['refer'] referer = Domain_Parser.parse(unquote(info['refer'].replace('-', '%')).strip()) print referer if referer: self.data['aoyou']['referer'].setdefault(referer.SLD, 0) self.data['aoyou']['referer'][referer.SLD] += 1 else: self.data['aoyou']['referer']['direct_or_other'] += 1
def extractor(self, line): url = line.http_referer parsed_url = Domain_Parser.parse(url) if not parsed_url: return sld = parsed_url.SLD for site_url in self.criteria.keys(): if sld in site_url: flag = False for match_pattern in self.criteria[site_url]['match_patterns']: match=match_pattern.search(url) if match: url=match.group(0) flag = True break if flag: for sub_pattern in self.criteria[site_url]['sub_patterns']: url = sub_pattern.sub('', url) self.data.setdefault(url,dict(site_id=self.criteria[site_url]['site_id'],count=0)) self.data[url]['count']+=1
def extractor(self, line): info=TongjicnzzRequestParser.parse(line.request) if not info:return kind=info['kind'] site = '' if kind=='keywords_log': try: try:keyword=unquote(info['keyword'].encode('utf-8')).decode('utf-8') except UnicodeDecodeError:keyword=unquote(info['keyword'].encode('utf-8')).decode('gbk') # print keyword except UnicodeDecodeError:return if keyword and info.get('site'): site = info['site'] if site=='taobao': self.data['tmall']['keyword'].setdefault(keyword,0) self.data['tmall']['keyword'][keyword]+=1 if site in self.sites: self.data[site]['keyword'].setdefault(keyword,0) self.data[site]['keyword'][keyword]+=1 elif kind=='visit': if not info.get('action') or len(info['action'])<2:return # action,site=info['action'][0],info['action'][1] for i in info['action']: if i in self.sites: site=i break if site: ip=line.remote_addr location=Ip_Locator.locate(ip) location=location[0] if location else '' action=info['action'][0] userid=info.get('userid') if userid: self.data[site]['uv'].add(userid) if '北京'in location: self.data[site]['beijing_uv'].add(userid) if action=='page_view': self.data[site]['pv']+=1 if '北京'in location: self.data[site]['beijing_pv']+=1 elif action=='category': self.data[site]['category'].setdefault(info['action'][2],0) self.data[site]['category'][info['action'][2]]+=1 if '北京'in location: self.data[site]['beijing_category'].setdefault(info['action'][2], dict(url=line.http_referer,count=0)) self.data[site]['beijing_category'][info['action'][2]]['count'] += 1 elif action=='product_page_visit': self.data[site]['product_page']+=1 elif action=='click' and info['action'][1]=='collection_click': self.data[site]['collection']+=1 elif action=='productaddcart_success_page' or action=='otheraddcart_success_page': self.data[site]['addcart']+=1 elif action=='productcart_page_visit' or action=='othercart_page_visit': self.data[site]['cart']+=1 elif action=='productorderwrite_page_visit' or action=='otherorderwrite_page_visit': self.data[site]['orderwrite']+=1 elif action=='productordersuccess_page_visit' or action=='otherordersuccess_page_visit': self.data[site]['ordersuccess']+=1 elif action=='productvip_page_visit' or action=='othervip_page_visit': self.data[site]['vip_pv']+=1 self.data[site]['vip_uv'].add(info['userid']) if site : if info.get('refer')==None: return elif info.get('refer')=='': self.data[site]['referer'].setdefault('direct_or_other',0) self.data[site]['referer']['direct_or_other']+=1 else: # print info['refer'] referer=Domain_Parser.parse(unquote(info['refer'].replace('-','%')).strip()) if referer: if site=='360buy' and referer.original_url=='u.gwdang.com':return self.data[site]['referer'].setdefault(referer.SLD,0) self.data[site]['referer'][referer.SLD]+=1 else: self.data[site]['referer'].setdefault('direct_or_other', 0) self.data[site]['referer']['direct_or_other'] += 1