def fetch(self): parser = etree .HTMLParser(encoding='utf-8') time = datetime.datetime.now().strftime('%Y-%m-%d') text = urllib2.urlopen(ALL_URL).read() tree = etree.HTML(text,parser=parser) nodes = tree.xpath(XPATH) for node in nodes: print node.text city_url = urlparse.urljoin(ALL_URL,node.attrib['href']) # print city_url text1 = urllib2.urlopen(city_url).read() tree = etree.HTML(text1,parser=parser) list_nodes = tree.xpath(XPATH) for list_node in list_nodes: addr_url = urlparse.urljoin(city_url,list_node.attrib['href']) # print addr_url text2 = urllib2.urlopen(addr_url).read() tree = etree.HTML(text2,parser=parser) city_nodes = tree.xpath(CITY_XPATH) # print city_nodes for city_node in city_nodes: # print city_node name_node = city_node.find('li[2]') storename = name_node.text print u'店名:'+storename addr_node = city_node.find('li[3]') storeaddr = addr_node.text print u'地址:'+storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'hm-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='LevisCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='LevisCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): parser = etree.HTMLParser(encoding='utf-8') text = urllib2.urlopen(ADDR_URL).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(CITY_XPATH) time = datetime.datetime.now().strftime('%Y-%m-%d') for node in nodes: print node.text info = urlparse.urljoin(ADDR_URL,node.attrib['href']) print info CITY_URL = info text1 = urllib2.urlopen(CITY_URL).read() tree1 = etree.HTML(text1, parser=parser) stores = tree1.xpath(STORE_XPATH) for store in stores: store_id = urlparse.urljoin(CITY_URL,store.attrib['rel'])[33:] url = STORE_URL % (store_id) # print url text = urllib2.urlopen(url).read() tree = etree.HTML(text, parser=parser) name = tree.xpath(NAME_XPATH) storename = name[0].text print u'店名:'+storename addr = tree.xpath(ADDR_XPATH) storeaddr = addr[0].text print u'地址:'+storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'coach-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='CoachCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='CoachCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): time = datetime.datetime.now().strftime('%Y-%m-%d') addrs={'all':[{'city':'上海','info':[{'name':'正大广场店','addr':'上海市浦东新区陆家嘴西路168号正大广场内GF13-16'},{'name':'大宁店','addr':'上海市闸北区共和新路1868号大宁国际商业广场1(S2)栋一层101-106'},{'name':'龙之梦店','addr':'上海市长宁路1018号龙之梦购物中心1楼'},{'name':'淮海店','addr':'上海市淮海中路627-641号'},{'name':'金桥国际商业广场店','addr':'上海市浦东新区张扬路3611弄金桥国际商业广场3号一层二层101-103,105-108/201-207,210-214'},{'name':'四川北路店','addr':'上海市虹口区四川北路1318号盛邦国际大厦一层二层'},{'name':'长风景畔广场店','addr':'上海市普陀区大渡河路196号长风景畔广场娱乐商业中心C1幢113室、208室、209室和210室'},{'name':'上海百联徐汇商业广场店','addr':'华山路2038号百联徐汇商业广场一层二层'}]}, {'city':'北京','info':[{'name':'新东安店','addr':'北京市东城区王府井大街138号新东安广场208-210'},{'name':'富力广场店','addr':'北京市朝阳区东三环中路65号218室'},{'name':'国瑞购物中心店','addr':'北京市崇文区崇文门外大街18号国瑞购物中心二层F2-18号'},{'name':'欧美汇购中心店','addr':'北京市海淀区丹棱街1号欧美汇购物中心二层三层F2-17, F3-13,F3-14'},{'name':'华联万柳店','addr':'北京市海淀区巴沟路2号北京华联万柳购物中心一层二层'}]}, {'city':'辽宁','info':[{'name':'大悦城店','addr':'沈阳市大东区小东路5号大悦城B座一层二层B119-120,B219-220'},{'name':'万达店','addr':'沈阳市和平区太原南街2号沈阳万达广场城中城'},{'name':'大连天兴罗斯福国际中心店','addr':'大连沙河口区西安路139号罗斯福国际中心一层二层'},{'name':'沈阳龙之梦购物中心店','addr':'沈阳市大东区滂江街22号龙之梦购物中心一层'}]}, {'city':'河北','info':[{'name':'石家庄裕华万达广场店','addr':'石家庄市裕华区建华南大街136号石家庄裕华万达广场一层1030室'},{'name':'唐山万达广场店','addr':'河北省唐山市路南区新华东道100号唐山万达广场1020和2023'}]}, {'city':'河南','info':[{'name':'360国贸中心店','addr':'郑州市金水区花园路39号郑州国贸中心一层'},{'name':'郑州市','addr':'郑州市民主路88号印象城购物中心一层二层1024/2020'}]}, {'city':'四川','info':[{'name':'富力天汇店','addr':'四川成都市顺城大街289号富力天汇购物中心2楼'}]}, {'city':'浙江','info':[{'name':'宁波世纪东方广场','addr':'宁波市中山东路1083号宁波世纪东方广场一层二层'}]}, {'city':'天津','info':[{'name':'利福广场店','addr':'天津市和平区滨江道219号利福广场一层二层'}]}, {'city':'江苏','info':[{'name':'无锡市保利广场店','addr':'无锡市解放东路1000号保利广场一层185-187室,二层174-189室'},{'name':'苏州市印象城购物中心店','addr':'苏州市工业园区现代大道1699号印象城购物中心一层二层1001-1002/2001-2002'},{'name':'泰州万达广场店','addr':'江苏省泰州市海陵区济川东路226号泰州万达广场226-1-A'},{'name':'C&A无锡新之城店','addr':'无锡市新区新光路555号新之城全生活广场B区1F01与2F01'}]}, {'city':'湖南','info':[{'name':'乐和城店','addr':'长沙市黄兴中路188号乐和城一层二层'}]}, {'city':'湖北','info':[{'name':'光谷国际广场店','addr':'武汉市东湖新技术开发区珞瑜路889号光谷国际广场一层二层'},{'name':'武汉汉街店','addr':'武汉市武昌区汉街49号'},{'name':'汉商银座购物中心店','addr':'湖北武汉汉阳大道139号汉商银座购物中心地上一层和地上二层1-01,F1&F2,'},{'name':'武汉摩尔城店','addr':'武汉龙阳大道特六号,武汉摩尔城B栋一层'}]}, {'city':'山东','info':[{'name':'济南和谐广场店','addr':'济南市槐荫区经十路22799号和谐广场一层二层L119-L121及L216-L218 1-2F'}]}, {'city':'重庆','info':[{'name':'重庆日月光中心广场店','addr':'重庆市渝中区民权路89号日月光中心广场LG072-075'},{'name':'重庆南坪万达广场店','addr':'重庆市南岸区江南大道10号南坪万达广场一层JD1-2, JD1-3'}]}] } addrall = addrs['all'] for addrs in addrall: cityname = addrs['city'] addrsinfo = addrs['info'] for addr in addrsinfo: storename = cityname+addr['name'] storeaddr = addr['addr'] print '店名:'+storename print '地址:'+storeaddr #print type(storeaddr) self.logger.info('店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'ca-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='CACollector' ) from shopping.signals import shop_found shop_found.send( self, brand='CACollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): time = datetime.datetime.now().strftime('%Y-%m-%d') parser = etree .HTMLParser(encoding='utf-8') text = urllib2.urlopen(ADDR_URL).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(XPATH) # print nodes for node in nodes: info = node.attrib['href'] allinfo = info[info.find('geocode=&q=')+11 :] # print allinfo infolist = allinfo.split('+') city = '' if len(infolist)==4: city = infolist[3][infolist[3].find('&ll=')-2 :infolist[3].find('&ll=')]+u'市' if infolist[3].find('KUNMING') >0: city='昆明' storename = infolist[0] print u'店名:'+storename storeaddr = infolist[1] print u'地址:'+city+storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'Mango-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='MangoCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='MangoCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): text = urllib2.urlopen(ADDR_URL).read() # print text dict = json.loads(text,encoding='utf-8') # print dict info = dict["storesCompleteResponse"]["storesComplete"]["storeComplete"] # print info for row in info: city = row["city"] #print city storename = city + row["name"] print storename storeaddr = row["address"]["addressLine"] time = datetime.datetime.now().strftime('%Y-%m-%d') if isinstance(storeaddr,int): continue if isinstance(storeaddr,str): continue if isinstance(storeaddr,unicode): continue if isinstance(storeaddr,list): ss = unicode(storeaddr[1]) + unicode(storeaddr[0]) # ss = ss[7:] print ss self.logger.info(u'店名: %s 地址: %s ' % (storename,ss)) latlng = getGoogleAPI.getgoogleapi(ss) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'hm-'+storename+'-'+ss, storeaddr=ss, lat = latlng[0], lng = latlng[1], brand='HMCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='HMCollector', address=ss, lat = latlng[0], lng = latlng[1], )
def getData(self,citycode,storenuum,cityname): URL = ADDF_URL % (citycode,1) URL = URL + ADDL_URL text = urllib2.urlopen(URL).read()[3:] # print text dict = json.loads(text,encoding='utf-8') # print dict info = dict["wsResponse"] # print info nums = info["results"] print nums all = info["result"] # print all print cityname time = datetime.datetime.now().strftime('%Y-%m-%d') for num in range(1,int(nums)): storename = all[num]["name"] storeaddr = all[num]["street1"] print u'店名:'+ storename print u'地址:'+ storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'adidas-'+cityname+'-'+storename, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='AdidasCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='AdidasCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): parser = etree.HTMLParser(encoding='utf-8') text = urllib2.urlopen(ADDR_URL).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(XPATH) # print nodes time = datetime.datetime.now().strftime('%Y-%m-%d') for node in nodes: addr = etree.tostring(node, method='html', encoding='utf-8') # print addr storeaddr = addr [addr.index('></span>')+len('></span>'):addr.index('</li>')] if storeaddr.find('amp;') > 0 : storeaddr = storeaddr.replace('amp;','') print storeaddr storename = node.find('span').text #print storename self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) print latlng if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'CityMe-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='CityMeCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='CityMeCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): time = datetime.datetime.now().strftime('%Y-%m-%d') parser = etree .HTMLParser(encoding='utf-8') text = urllib2.urlopen(ADDR_URL).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(XPATH) for i in range(0,len(nodes),4): name_node = nodes[i].find('td[1]') storename = name_node.text if storename is None : continue print storename addr_node = nodes[i].find('td[3]') storeaddr = addr_node.text print storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'Roxy-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='RoxyCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='RoxyCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def getData(self, code1, code2, cityname): time = datetime.datetime.now().strftime("%Y-%m-%d") print time parser = etree.HTMLParser(encoding="utf-8") url = ADDR_URL % (code1, code2) text = urllib2.urlopen(url).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(XPATH) print cityname for node in nodes: city_node = node.find("a/span") name_node = city_node.find("strong") storename = city_node.text.strip() + name_node.text.strip() print u"店名:" + storename addr_node = node.find("div/span/strong") storeaddr = addr_node.text print u"地址:" + storeaddr self.logger.info(u"店名: %s 地址: %s " % (storename, storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time=time, title=storename, url="Zara-" + storename + "-" + storeaddr, storeaddr=storeaddr, lat=latlng[0], lng=latlng[1], brand="ZaraCollector", ) from shopping.signals import shop_found shop_found.send(self, brand="ZaraCollector", address=storeaddr, lat=latlng[0], lng=latlng[1])
def fetch(self): time = datetime.datetime.now().strftime('%Y-%m-%d') for URL in ADDR_URL: text = urllib2.urlopen(URL).read() ntext = text [text.find('{"locations":'):-2] dict = json.loads(ntext,encoding='utf-8') infos = dict['locations'] for info in infos: storename = info['name'] print storename storeaddr = info['street'] print storeaddr self.logger.info(u'店名: %s 地址: %s ' % (storename,storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'Nike-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='NikeCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='NikeCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): parser = etree.HTMLParser(encoding='utf-8') text = urllib2.urlopen(ADDR_URL).read() tree = etree.HTML(text, parser=parser) nodes = tree.xpath(XPATH) time = datetime.datetime.now().strftime('%Y-%m-%d') print nodes for node in nodes: for i in range(2,101): sub_node = node.find('option['+str(i)+']') city = sub_node.text city = city.replace(' ','+') city = urllib.quote(city.encode('utf-8')) # print city CITY_URL = ADDR_URL + u'state=' + city CITY_URL = CITY_URL.replace('%2B','+') print CITY_URL # CITY_URL = urllib.quote(CITY_URL.encode('utf-8')) text = urllib2.urlopen(CITY_URL).read() tree = etree.HTML(text, parser=parser) stores = tree.xpath(STORE_XPATH) for store in stores: name_nodes = store.find('p[1]') name_nodes_del = store.find('p[1]/b') if name_nodes is not None and name_nodes_del is not None: cityname = name_nodes.text nameinfo = name_nodes_del.text storename = cityname + nameinfo print storename self.logger.info(u'店名: %s ' % (storename)) addr_nodes = store.find('p[3]') if addr_nodes is not None: storeaddr = addr_nodes.text print storeaddr self.logger.info(u'地址: %s ' % (storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'crocs-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='CrocsCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='CrocsCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )
def fetch(self): parser = etree .HTMLParser(encoding='utf-8') time = datetime.datetime.now().strftime('%Y-%m-%d') text = urllib2.urlopen(ALL_URL).read() tree = etree.HTML(text,parser=parser) nodes = tree.xpath(ALL_XPATH) node = nodes[1] city_url = urlparse.urljoin(ALL_URL,node.attrib['href']) print city_url text1 = urllib2.urlopen(city_url).read() tree = etree.HTML(text1,parser=parser) list_nodes = tree.xpath(CITY_XPATH) # print list_nodes for list_node in list_nodes: # print list_node addr_url = urlparse.urljoin(city_url,list_node.attrib['href']) print addr_url text2 = urllib2.urlopen(addr_url).read() tree = etree.HTML(text2,parser=parser) city_nodes = tree.xpath(NAME_XPATH) for city_node in city_nodes: # print city_node name_node = city_node.find('h2/span') if name_node is not None: print name_node storename = name_node.text print u'店名:'+storename self.logger.info(u'店名: %s' % (storename)) addr_node = city_node.find('div/div/table/tr[1]/td') if addr_node is not None: print addr_node storeaddr = addr_node.text if storeaddr is None: sub_addr=addr_node.find('p') storeaddr = sub_addr.text print u'地址:'+storeaddr self.logger.info(u'地址: %s ' % (storeaddr)) latlng = getGoogleAPI.getgoogleapi(storeaddr) if latlng == None: print "can not find the latlng!!!!" continue collector.object_found.send( self, time = time, title = storename, url = 'uniqlo-'+storename+'-'+storeaddr, storeaddr=storeaddr, lat = latlng[0], lng = latlng[1], brand='UniqloCollector' ) from shopping.signals import shop_found shop_found.send( self, brand='UniqloCollector', address=storeaddr, lat = latlng[0], lng = latlng[1], )