def fetch_store_detail(s, data, isOfficial=False): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = cm.html2plain(s['name']).strip() entry[cm.country_e] = data['country'] val = cm.html2plain(s['city']).strip().upper() entry[cm.city_e] = cm.extract_city(val if val and val != '' else data['city'])[0] entry[cm.addr_e] = cm.html2plain(s['address']).strip() entry[cm.email] = s['email'].strip() entry[cm.tel] = s['phone'].strip() entry[cm.fax] = s['fax'].strip() entry[cm.store_class] = 'Official Retailer' if isOfficial else 'Retailer' try: entry[cm.lat] = string.atof(s['lat']) if s['lat'] != '' else '' except (ValueError, KeyError, TypeError) as e: cm.dump('Error in fetching lat: %s' % str(e), log_name) try: entry[cm.lng] = string.atof(s['lng']) if s['lng'] != '' else '' except (ValueError, KeyError, TypeError) as e: cm.dump('Error in fetching lng: %s' % str(e), log_name) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) return entry
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] store_list = [] for m in re.finditer(ur'<item id="\d+">', body): sub = cm.extract_closure(body[m.start():], ur'<item\b', ur'</item>')[0] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m1 = re.search(ur'<country>([^<>]+)</country>', sub) if m1 is not None: tmp = m1.group(1).split('/') for v in tmp: ret = gs.look_up(v.strip().upper(), 1) if ret is not None: entry[cm.country_e] = ret['name_e'] break m1 = re.search(ur'<city>([^<>]+)</city>', sub) if m1 is not None: val = cm.reformat_addr(m1.group(1)) if entry[cm.country_e] == 'UNITED STATES': tmp_list = tuple(tmp.strip() for tmp in cm.reformat_addr(val).strip(',')) if len(tmp_list) == 2: if re.search('[A-Z]{2}', tmp_list[1]): entry[cm.province_e] = tmp_list[1] entry[cm.city_e] = cm.extract_city(m1.group(1))[0] m1 = re.search(ur'<brands>([^<>]+)</brands>', sub) if m1 is not None: tmp = m1.group(1).split('/') brand_list = [] for v in tmp: if v.strip() != '': brand_list.append(v) entry[cm.store_type] = ', '.join(brand_map[key] for key in brand_list) m1 = re.search(ur'<name>([^<>]+)</name>', sub) if m1 is not None: entry[cm.name_e] = m1.group(1).strip() m1 = re.search(ur'<address>([^<>]+)</address>', sub) if m1 is not None: entry[cm.addr_e] = cm.reformat_addr(m1.group(1)) m1 = re.search(ur'<tel>([^<>]+)</tel>', sub) if m1 is not None: entry[cm.tel] = m1.group(1).strip() m1 = re.search(ur'sll=(-?\d+\.\d+),(-?\d+\.\d+)', sub) if m1 is not None: entry[cm.lat] = string.atof(m1.group(1)) entry[cm.lng] = string.atof(m1.group(2)) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None: entry[cm.province_e] = ret[1] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_store_details(data): # http://maps.oasis-stores.com/index-v2.php?coutnryISO=GB&brand=oasis&lat=51.42014&lng=-0.20954 url = data['store_url'] code = data['country_code'] city = data['city_e'] try: html = cm.get_data(url, { 'latitude': data['lat'], 'longitude': data['lng'], 'brand': 'oasis' }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] raw = json.loads(html) entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = raw['name'] addr_list = [] for i in xrange(1, 4): tmp = cm.html2plain(raw['address%d' % i]).strip() if tmp != '': addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) state = raw['countryRegion'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() state = raw['state'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() state = raw['county'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() entry[cm.zip_code] = raw['postcode'] entry[cm.country_e] = data['country_e'] entry[cm.city_e] = cm.extract_city(data['city_e'])[0] entry[cm.lat] = string.atof(data['lat']) entry[cm.lng] = string.atof(data['lng']) entry[cm.tel] = raw['phone'] entry[cm.email] = raw['email'] tmp = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] entry[cm.hours] = ', '.join([raw[d + '_open_times'] for d in tmp]) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') return [entry]
def fetch_stores(data): url = data['store_url'] try: body = cm.get_data(url, { 'country': data['country'], 'city': data['city'] }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] raw = json.loads(body) store_list = [] for item in raw['items']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country'].strip().upper() tmp = cm.extract_city(data['city'])[0] if entry[cm.country_e] == 'USA': entry[cm.province_e] = tmp else: entry[cm.city_e] = tmp gs.field_sense(entry) addr = cm.reformat_addr(item['address'].replace(u'\\', '')) addr_list = [tmp.strip() for tmp in addr.split(',')] tel = cm.extract_tel(addr_list[-1]) if tel != '': entry[cm.tel] = tel del addr_list[-1] entry[cm.addr_e] = ', '.join(addr_list) entry[cm.store_type] = item['shop_type'] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def get_detailed_store(html, store_cat): store_list = [] start = 0 while True: sub_html, start, end = common.extract_closure(html, ur"<li\b", ur"</li>") if end == 0: break # 得到单个门店的页面代码 html = html[end:] entry = common.init_store_entry(brand_id, brandname_e, brandname_c) m = re.findall(ur'<div class="store-title -h3a">(.+?)</div>', sub_html) if len(m) > 0: entry[common.name_e] = common.reformat_addr(m[0]) m = re.findall(ur'<div class="store-address">(.+?)</div>', sub_html, re.S) if len(m) > 0: addr = common.reformat_addr(m[0]) # 最后一行是否为电话号码? terms = addr.split(", ") tel = common.extract_tel(terms[-1]) if tel != "": addr = ", ".join(terms[:-1]) entry[common.tel] = tel entry[common.addr_e] = addr # 获得门店类型 # store_type = [store_cat] type_html, type_start, type_end = common.extract_closure(sub_html, ur'<ul class="service-list">', ur"</ul>") if type_end != 0: store_type = [m for m in re.findall(ur'<li class="service-item">(.+?)</li>', type_html)] store_type.insert(0, store_cat) entry[common.store_type] = ", ".join(store_type) else: entry[common.store_type] = store_cat # 获得经纬度 m = re.findall(ur'data-latitude="(-?\d+\.\d+)"', sub_html) if len(m) > 0: entry[common.lat] = string.atof(m[0]) m = re.findall(ur'data-longitude="(-?\d+\.\d+)"', sub_html) if len(m) > 0: entry[common.lng] = string.atof(m[0]) entry[common.city_e] = common.extract_city(data[common.city_e])[0] entry[common.country_e] = common.reformat_addr(data[common.country_e]).strip().upper() gs.field_sense(entry) print "%s: Found store: %s, %s (%s, %s, %s)" % ( brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.city_e], entry[common.country_e], entry[common.continent_e], ) db.insert_record(entry, "stores") store_list.append(entry) return store_list
def fetch_stores(data): url = data['url'] try: body = cm.post_data(url, {'rsp': 'json', 'country': data['country_code']}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] raw = json.loads(body) store_list = [] for s in raw['stores']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = cm.html2plain(s['name']).strip() addr_list = [] for key in ['address1', 'address2']: if s[key].strip() != '': addr_list.append(cm.reformat_addr(s[key])) entry[cm.addr_e] = ' '.join(addr_list) # r=s['region'].strip().upper() # m = re.search(ur'\b([A-Z]{2})\b', r) # if data[cm.country_e]=='UNITED STATES' and m is not None: # # 美国 # ret = gs.look_up(m.group(1), 2) # if ret is not None: # r = ret['name_e'] # entry[cm.province_e] = r entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.zip_code] = s['zip'].strip() entry[cm.country_e] = data[cm.country_e] entry[cm.lat] = string.atof(s['lat']) entry[cm.lng] = string.atof(s['lng']) entry[cm.tel] = s['phone'].strip() entry[cm.fax] = s['fax'].strip() entry[cm.email] = s['emailaddress'].strip() entry[cm.url] = s['website'].strip() days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'] opening = [] if 'openingHours' in s and s['openingHours'] is not None: for m in re.finditer(ur'i:(\d);s:\d+:\\?"([^\\"]+?)\\?"', s['openingHours']): opening.append('%s: %s' % (days[string.atoi(m.group(1))], m.group(2).strip())) entry[cm.hours] = ', '.join(opening) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')
def fetch_stores(data): param = {'action': 'getStoresFromAjax', 'country': data['country_code'], 'region': data['city'], 'collection': ''} url = data['url'] try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] for m1 in re.finditer(ur'<div class="shop-type-container">', body): sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0] store_class = '' m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S) if m2 is not None: store_class = cm.reformat_addr(m2.group(1)) for m2 in re.finditer(ur'<div class="shop"', sub): store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b', ur'</div>')[0] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.store_class] = store_class entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] m3 = re.search(ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)', store_sub) if m3 is not None: data['store_id'] = string.atoi(m3.group(1)) entry[cm.lat] = string.atof(m3.group(2)) entry[cm.lng] = string.atof(m3.group(3)) entry[cm.store_type] = ', '.join(get_detail(data)) m3 = re.search(ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>', store_sub) if m3 is not None: entry[cm.name_e] = m3.group(1).strip() addr_list = [] m3 = re.search(ur'<div class="shop-street shop-details">([^<>]+)</div>', store_sub) if m3 is not None: addr_list.append(cm.reformat_addr(m3.group(1))) m3 = re.search(ur'<div class="shop-city shop-details">([^<>]+)</div>', store_sub) if m3 is not None: tmp = cm.reformat_addr(m3.group(1)) m3 = re.search(ur'(\d{4,})', tmp) if m3 is not None: entry[cm.zip_code] = m3.group(1).strip() addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['url'] param = {'country_id': data['country_code'], 'city': '', 'label_id': '', 'lang': 'en'} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] start = body.find(ur'<stores>') if start == -1: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] body = cm.extract_closure(body[start:], ur'<stores>', ur'</stores>')[0] store_list=[] for m in re.findall(ur'<store\b[^<>]+>(.+?)</store>', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country_code'] m1 = re.search(ur'<name>(.+?)</name>', m) if m1 is not None: entry[cm.name_e] = cm.reformat_addr(m1.group(1).strip()) m1 = re.search(ur'<address>(.+?)</address>', m) if m1 is not None: entry[cm.addr_e] = cm.reformat_addr(m1.group(1).strip()) m1 = re.search(ur'<city>(.+)</city>', m) if m1 is not None: entry[cm.city_e] = cm.extract_city(m1.group(1))[0] m1 = re.search(ur'<zip>(.+?)</zip>', m) if m1 is not None: entry[cm.zip_code] = m1.group(1).strip() m1 = re.search(ur'<tel>(.+?)</tel>', m) if m1 is not None: entry[cm.tel] = m1.group(1).strip() m1 = re.search(ur'<fax>(.+?)</fax>', m) if m1 is not None: entry[cm.fax] = m1.group(1).strip() m1 = re.search(ur'<email>(.+?)</email>', m) if m1 is not None: entry[cm.email] = m1.group(1).strip() m1 = re.search(ur'<link>(.+?)</link>', m) if m1 is not None: entry[cm.url] = m1.group(1).strip() gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data["host"] + data["country_url"] % data["country_id"] try: body = cm.get_data(url) except Exception: cm.dump("Error in fetching countries: %s" % url, log_name) return [] raw = json.loads(body)["rawPos"] store_list = [] for s in raw: entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"]) addr_list = [] for tmp2 in [cm.html2plain(s[tmp1]).strip() for tmp1 in ["address%d" % v for v in xrange(1, 5)]]: if tmp2 != "": addr_list.append(tmp2) entry[cm.addr_e] = ", ".join(addr_list) entry[cm.city_e] = cm.extract_city(s["city"]["name"])[0] entry[cm.country_e] = s["country"]["countryCode"] entry[cm.email] = s["email"] entry[cm.fax] = s["fax"] if s["latitude"] != "": entry[cm.lat] = string.atof(s["latitude"]) if s["longitude"] != "": entry[cm.lng] = string.atof(s["longitude"]) entry[cm.hours] = cm.reformat_addr(s["openingSchedule"]) phone_list = [] for key in ["phone1", "phone2"]: if s[key].strip() != "": phone_list.append(s[key].strip()) entry[cm.tel] = ", ".join(phone_list) entry[cm.zip_code] = s["postalCode"] entry[cm.name_e] = s["shopName"] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == "": entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == "": entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump( "(%s / %d) Found store: %s, %s (%s, %s)" % ( data["brandname_e"], data["brand_id"], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e], ), log_name, ) db.insert_record(entry, "stores") store_list.append(entry) return store_list
def fetch_uk(body, data): start = body.find(u'<div class="fableft">') if start == -1: print "Error in finding %s stores" % data["name"] return [] body, start, end = cm.extract_closure(body[start:], ur"<div\b", ur"</div>") if end == 0: print "Error in finding %s stores" % data["name"] return [] store_list = [] for m in re.findall(ur"<div>\s*(.+?)\s*</div>", body, re.S): entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"]) entry[cm.country_e] = data["name"] addr_list = re.findall(ur"<p>\s*(.+?)\s*</p>", m) tel = cm.extract_tel(addr_list[-1]) if tel != "": entry[cm.tel] = tel del addr_list[-1] if data["name"] == "AUSTRALIA": country, province, city = gs.addr_sense(", ".join(addr_list), data["name"]) if city is not None: entry[cm.city_e] = city if province is not None: entry[cm.province_e] = province else: city = addr_list[-2].strip().upper() entry[cm.city_e] = city ret = gs.look_up(city, 3) if ret is not None and ret["country"]["name_e"] == gs.look_up("UK", 1)["name_e"]: entry[cm.city_e] = ret["name_e"] entry[cm.zip_code] = addr_list[-1].strip().upper() entry[cm.addr_e] = ", ".join(addr_list) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == "": entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == "": entry[cm.city_e] = ret[2] gs.field_sense(entry) print "(%s / %d) Found store: %s, %s (%s, %s, %s)" % ( data["brandname_e"], data["brand_id"], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e], ) db.insert_record(entry, "stores") store_list.append(entry)
def get_detailed_store(html, store_cat): store_list = [] start = 0 while True: sub_html, start, end = common.extract_closure(html, ur'<li\b', ur'</li>') if end == 0: break # 得到单个门店的页面代码 html = html[end:] entry = common.init_store_entry(brand_id, brandname_e, brandname_c) m = re.findall(ur'<div class="store-title -h3a">(.+?)</div>', sub_html) if len(m) > 0: entry[common.name_e] = common.reformat_addr(m[0]) m = re.findall(ur'<div class="store-address">(.+?)</div>', sub_html, re.S) if len(m) > 0: addr = common.reformat_addr(m[0]) # 最后一行是否为电话号码? terms = addr.split(', ') tel = common.extract_tel(terms[-1]) if tel != '': addr = ', '.join(terms[:-1]) entry[common.tel] = tel entry[common.addr_e] = addr # 获得门店类型 # store_type = [store_cat] type_html, type_start, type_end = common.extract_closure(sub_html, ur'<ul class="service-list">', ur'</ul>') if type_end != 0: store_type = [m for m in re.findall(ur'<li class="service-item">(.+?)</li>', type_html)] store_type.insert(0, store_cat) entry[common.store_type] = ', '.join(store_type) else: entry[common.store_type] = store_cat # 获得经纬度 m = re.findall(ur'data-latitude="(-?\d+\.\d+)"', sub_html) if len(m) > 0: entry[common.lat] = string.atof(m[0]) m = re.findall(ur'data-longitude="(-?\d+\.\d+)"', sub_html) if len(m) > 0: entry[common.lng] = string.atof(m[0]) entry[common.city_e] = common.extract_city(data[common.city_e])[0] entry[common.country_e] = common.reformat_addr(data[common.country_e]).strip().upper() gs.field_sense(entry) print '%s: Found store: %s, %s (%s, %s, %s)' % ( brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.city_e], entry[common.country_e], entry[common.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def f(m): store_name = m[0].strip() addr_str = m[1].strip() spl = addr_str.split('<br/>') store_type = cm.html2plain(spl[0].strip()) store_addr = spl[1].strip() hour_idx = 2 store_tel = '' for i in xrange(2, len(spl)): # If this is not a phone number: tel = cm.extract_tel(spl[i]) if tel == '': store_addr += ', ' + spl[i] hour_idx = i + 1 else: store_tel = spl[i].strip() hour_idx = i + 1 break if hour_idx < len(spl): store_hour = cm.html2plain(', '.join(spl[hour_idx:])).strip() else: store_hour = '' # store_addr = cm.reformat_addr('\r\n'.join([val.strip() for val in spl[1:-3]])) store_addr = cm.reformat_addr(store_addr) store_entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) cm.update_entry(store_entry, {cm.continent_e: opt[cm.continent_e].strip().upper(), cm.city_e: opt[cm.city_e].strip().upper(), cm.country_e: opt[cm.country_e].strip().upper(), cm.name_e: cm.name_e, cm.addr_e: store_addr, cm.store_type: store_type, cm.hours: store_hour, cm.tel: store_tel}) if opt.has_key(cm.province_e): store_entry[cm.province_e] = opt[cm.province_e] else: store_entry[cm.province_e] = '' store_entry[cm.city_e] = cm.extract_city(store_entry[cm.city_e])[0] gs.field_sense(store_entry) ret = gs.addr_sense(store_entry[cm.addr_e], store_entry[cm.country_e]) if ret[1] is not None and store_entry[cm.province_e] == '': store_entry[cm.province_e] = ret[1] if ret[2] is not None and store_entry[cm.city_e] == '': store_entry[cm.city_e] = ret[2] gs.field_sense(store_entry) print '%s Found store: %s, %s (%s, %s)' % ( brandname_e, store_entry[cm.name_e], store_entry[cm.addr_e], store_entry[cm.country_e], store_entry[cm.continent_e]) db.insert_record(store_entry, 'stores') return store_entry
def fetch_stores(data): """ 获得门店的详细信息 :rtype : [entries] :param data: """ try: html = cm.get_data(data['url']) except Exception: print 'Error occured: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id} cm.dump(dump_data) return [] entries = [] start = html.find(u'<ul class="store-list">') if start == -1: return entries start += len(u'<ul class="store-list">') end = html.find(u'</ul>', start) html = html[start:end] for m1 in re.findall(ur'<li class="(.*?)">(.*?)</li>', html, re.S): store = cm.init_store_entry(brand_id, brandname_e, brandname_c) store[cm.store_type] = m1[0] sub_html = m1[1] m2 = re.findall(ur'<h3 class="store-name">(.*?)</h3>', sub_html) if len(m2) > 0: store[cm.name_e] = cm.reformat_addr(m2[0]) m2 = re.findall(ur'<p class="store-address">(.*?)</p>', sub_html, re.S) if len(m2) > 0: store[cm.addr_e] = cm.reformat_addr(m2[0]) cm.update_entry(store, {cm.continent_e: data[cm.continent_e].strip().upper(), cm.country_e: data[cm.country_e].strip().upper(), cm.city_e: data[cm.city_e].strip().upper()}) entry = store gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] print '%s: Found store: %s, %s (%s, %s)' % ( brandname_e, store[cm.name_e], store[cm.addr_e], store[cm.country_e], store[cm.continent_e]) db.insert_record(store, 'stores') entries.append(store)
def fetch_stores(data): url = data['url'] param = { 'action': 'getStoresByCity', 'idCity': data['city_id'], 'filter': 'clothing;lacoste%20l!ve' } try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] raw = json.loads(body)['root']['DATA']['stores'] store_list = [] for s in [tmp['store'] for tmp in raw]: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = s['name'].strip() entry[cm.country_e] = data['country_code'] entry[cm.addr_e] = cm.html2plain(s['address']).strip() entry[cm.store_type] = s['category'].strip() entry[cm.city_e] = cm.extract_city(s['city'])[0] if s['email'] is not None: entry[cm.email] = s['email'].strip() if s['fax'] is not None: entry[cm.fax] = s['fax'].strip() if s['infoHours'] is not None: entry[cm.hours] = s['infoHours'].strip() if s['latitude'] is not None and s['latitude'].strip() != '': entry[cm.lat] = string.atof(s['latitude']) if s['longitude'] is not None and s['longitude'].strip() != '': entry[cm.lat] = string.atof(s['longitude']) if s['phone'] is not None: entry[cm.tel] = s['phone'].strip() if s['postCode'] is not None: entry[cm.zip_code] = s['postCode'].strip() gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_stores(data): url = '%s/en/shops/%s.json' % ( data['host'], urllib.quote(data['country_id'].encode('utf-8'))) try: body = cm.get_data(url) raw = json.loads(body) except Exception: cm.dump('Error in fetching stores: %s' % url, 'unode50_log.txt') dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_items = [] for s in raw['distributors']: s['store_class'] = 'distributor' store_items.append(s) for s in raw['shops']: s['store_class'] = 'shop' store_items.append(s) store_list = [] for s in store_items: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.store_class] = s['store_class'] entry[cm.country_e] = data['country'] entry[cm.name_e] = s['title'] if s['lat'] is not None: entry[cm.lat] = string.atof(str(s['lat'])) if s['lng'] is not None: entry[cm.lng] = string.atof(str(s['lng'])) entry[cm.addr_e] = s['address'] entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.tel] = s['phone'] entry[cm.zip_code] = s['postal_code'] gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]), 'unode50_log.txt') db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_uk(body, data): start = body.find(u'<div class="fableft">') if start == -1: print 'Error in finding %s stores' % data['name'] return [] body, start, end = cm.extract_closure(body[start:], ur'<div\b', ur'</div>') if end == 0: print 'Error in finding %s stores' % data['name'] return [] store_list = [] for m in re.findall(ur'<div>\s*(.+?)\s*</div>', body, re.S): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['name'] addr_list = re.findall(ur'<p>\s*(.+?)\s*</p>', m) tel = cm.extract_tel(addr_list[-1]) if tel != '': entry[cm.tel] = tel del addr_list[-1] if data['name'] == 'AUSTRALIA': country, province, city = gs.addr_sense(', '.join(addr_list), data['name']) if city is not None: entry[cm.city_e] = city if province is not None: entry[cm.province_e] = province else: city = addr_list[-2].strip().upper() entry[cm.city_e] = city ret = gs.look_up(city, 3) if ret is not None and ret['country']['name_e'] == gs.look_up('UK', 1)['name_e']: entry[cm.city_e] = ret['name_e'] entry[cm.zip_code] = addr_list[-1].strip().upper() entry[cm.addr_e] = ', '.join(addr_list) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['store_url'] param = {'myid': data['key'], 'idioma': 'in'} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name) return [] store_list = [] for s in json.loads(body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country'] entry[cm.city_e] = cm.extract_city(data['city'])[0] entry[cm.name_e] = cm.reformat_addr(s['title']) m = re.search(ur'(.+?)-\s*<', s['key']) addr_list = [entry[cm.name_e]] if m is not None: m1 = re.search(ur'-+', m.group(1)) if m1 is not None: tmp = [m.group(1)[:m1.start()], m.group(1)[m1.end():]] else: tmp = [m.group(1)] if len(tmp) > 1: entry[cm.tel] = cm.extract_tel(tmp[1]) m1 = re.search(ur'\d{4,}', tmp[0]) if m1 is not None: entry[cm.zip_code] = m1.group() addr_list.append(tmp[0].strip()) entry[cm.addr_e] = ', '.join(addr_list) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_stores(data): url = data['data_url'] param = {'output': 'json', 'country': data['country_code'], 'brand': 'dkny'} page = 0 tot_page = -1 store_list = [] while True: page += 1 if tot_page != -1 and page > tot_page: break param['p'] = page try: body = cm.get_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return () raw = json.loads(body) tot_page = raw['Stores']['TotalPages'] if data['country_code'] not in region_map: # 构造州列表 region_map[data['country_code']] = dict((item['RegionId'], item['Name']) for item in raw['Regions']) for s in raw['Stores']['Items']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country_code'].upper() entry[cm.city_e] = cm.extract_city(s['City'])[0] entry[cm.name_e] = cm.html2plain(s['Name']).strip() entry[cm.addr_e] = cm.reformat_addr(s['Address']) entry[cm.tel] = s['Phone'].strip() if s['Phone'] else '' entry[cm.fax] = s['Fax'].strip() if s['Fax'] else '' entry[cm.email] = s['Email'].strip() if s['Email'] else '' entry[cm.lat] = s['Latitude'] if s['Latitude'] else '' entry[cm.lng] = s['Longitude'] if s['Longitude'] else '' region_id = s['RegionId'] if region_id in region_map[data['country_code']]: entry[cm.province_e] = cm.html2plain(region_map[data['country_code']][region_id]).strip().upper() gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) code = data['country_code'] if gs.look_up(code, 1) is None: entry[cm.country_e] = cm.html2plain(data['country']).strip().upper() else: entry[cm.country_e] = code entry[cm.name_e] = data['store_name'] entry[cm.city_e] = cm.extract_city(data['city'])[0] entry[cm.lat] = data['lat'] if data['lat'] is not None else '' entry[cm.lng] = data['lng'] if data['lng'] is not None else '' m = re.search(ur'data-boutique\s*=\s*"%s"' % data['store_id'], data['content']) sub = data['content'][m.end():] m1 = re.search(ur'<li class="isDistributeur[^<>]+>(.+?)</li>', sub) if m1 is not None: entry[cm.store_class] = cm.reformat_addr(m1.group(1)) m1 = re.search(ur'<li class="place-title[^<>]+>(.+?)</li>', sub, re.S) if m1 is not None: entry[cm.addr_e] = cm.reformat_addr(m1.group(1)) m1 = re.search(ur'<li class="contacts[^<>]+>(.+?)</li>', sub, re.S) if m1 is not None: m2 = re.search(ur'<a class="popupLaunch" href="([^"]+)"', m1.group(1)) if m2: entry = fetch_details(data, m2.group(1), entry) m2 = re.search(ur'<p>(.+?)</p>', m1.group(1), re.S) if m2: ct_list = tuple(tmp.strip() for tmp in cm.reformat_addr(m2.group(1)).split(',')) entry[cm.tel] = cm.extract_tel(ct_list[0]) if len(ct_list) > 1: entry[cm.email] = ct_list[1].strip() gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') return tuple(entry)
def fetch_stores(data): url = data['store_url'] try: body = cm.get_data(url, {'country': data['country'], 'city': data['city']}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] raw = json.loads(body) store_list = [] for item in raw['items']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country'].strip().upper() tmp = cm.extract_city(data['city'])[0] if entry[cm.country_e] == 'USA': entry[cm.province_e] = tmp else: entry[cm.city_e] = tmp gs.field_sense(entry) addr = cm.reformat_addr(item['address'].replace(u'\\', '')) addr_list = [tmp.strip() for tmp in addr.split(',')] tel = cm.extract_tel(addr_list[-1]) if tel !='': entry[cm.tel]=tel del addr_list[-1] entry[cm.addr_e]=', '.join(addr_list) entry[cm.store_type] = item['shop_type'] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_details(data): url = data[cm.url] try: html = cm.get_data(url) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = data[cm.name_e] start = html.find(ur'<div class="field-address">') if start == -1: return [] sub, start, end = cm.extract_closure(html[start:], ur'<div\b', ur'</div>') if end == 0: return [] m1 = re.search(ur'<div class="locality">(.+?)</div>', sub) if m1 is not None: entry[cm.city_e] = cm.extract_city(m1.group(1))[0] m1 = re.search(ur'<div class="postal-code">(.+?)</div>', sub) if m1 is not None: entry[cm.zip_code] = m1.group(1).strip() entry[cm.country_e] = data[cm.country_e] pat = re.compile(ur'<[^<>]+?>', re.S) entry[cm.addr_e] = cm.reformat_addr(re.sub(pat, u'\r\n', sub)) m1 = re.search(ur'<div class="field-telephone"><a href=".+?" class="tel">(.+?)</a></div>', html) if m1 is not None: entry[cm.tel] = m1.group(1).strip() m1 = re.search(ur'<div class="field-opening-hours">\s*<p>(.+?)</p>\s*</div>', html, re.S) if m1 is not None: entry[cm.hours] = cm.reformat_addr(m1.group(1)) m1 = re.search(ur'"coordinates":\[(-?\d+\.\d{4,})\s*,\s*(-?\d+\.\d{4,})\]', html) if m1 is not None: lat = string.atof(m1.group(1)) lng = string.atof(m1.group(2)) cm.update_entry(entry, {cm.lat: lat, cm.lng: lng}) entry[cm.continent_e] = data[cm.continent_e] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') return [entry]
def fetch_stores(data): url = data['host'] + data['country_url'] % data['country_id'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching countries: %s' % url, log_name) return [] raw = json.loads(body)['rawPos'] store_list = [] for s in raw: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) addr_list = [] for tmp2 in [cm.html2plain(s[tmp1]).strip() for tmp1 in ['address%d' % v for v in xrange(1, 5)]]: if tmp2 != '': addr_list.append(tmp2) entry[cm.addr_e] = ', '.join(addr_list) entry[cm.city_e] = cm.extract_city(s['city']['name'])[0] entry[cm.country_e] = s['country']['countryCode'] entry[cm.email] = s['email'] entry[cm.fax] = s['fax'] if s['latitude'] != '': entry[cm.lat] = string.atof(s['latitude']) if s['longitude'] != '': entry[cm.lng] = string.atof(s['longitude']) entry[cm.hours] = cm.reformat_addr(s['openingSchedule']) phone_list = [] for key in ['phone1', 'phone2']: if s[key].strip() != '': phone_list.append(s[key].strip()) entry[cm.tel] = ', '.join(phone_list) entry[cm.zip_code] = s['postalCode'] entry[cm.name_e] = s['shopName'] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_stores(data): url = data['store_url'] param = {'myid': data['key'], 'idioma': 'in'} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name) return [] store_list = [] for s in json.loads(body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country'] entry[cm.city_e] = cm.extract_city(data['city'])[0] entry[cm.name_e] = cm.reformat_addr(s['title']) m = re.search(ur'(.+?)-\s*<', s['key']) addr_list = [entry[cm.name_e]] if m is not None: m1 = re.search(ur'-+', m.group(1)) if m1 is not None: tmp = [m.group(1)[:m1.start()], m.group(1)[m1.end():]] else: tmp = [m.group(1)] if len(tmp) > 1: entry[cm.tel] = cm.extract_tel(tmp[1]) m1 = re.search(ur'\d{4,}', tmp[0]) if m1 is not None: entry[cm.zip_code] = m1.group() addr_list.append(tmp[0].strip()) entry[cm.addr_e] = ', '.join(addr_list) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_store_details(data): # http://maps.oasis-stores.com/index-v2.php?coutnryISO=GB&brand=oasis&lat=51.42014&lng=-0.20954 url = data['store_url'] code = data['country_code'] city = data['city_e'] try: html = cm.get_data(url, {'latitude': data['lat'], 'longitude': data['lng'], 'brand': 'oasis'}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] raw = json.loads(html) entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = raw['name'] addr_list = [] for i in xrange(1, 4): tmp = cm.html2plain(raw['address%d' % i]).strip() if tmp!='': addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) state = raw['countryRegion'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() state = raw['state'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() state = raw['county'] if state is not None and state.strip() != '': entry[cm.province_e] = state.strip().upper() entry[cm.zip_code] = raw['postcode'] entry[cm.country_e] = data['country_e'] entry[cm.city_e] = cm.extract_city(data['city_e'])[0] entry[cm.lat] = string.atof(data['lat']) entry[cm.lng] = string.atof(data['lng']) entry[cm.tel] = raw['phone'] entry[cm.email] = raw['email'] tmp = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'] entry[cm.hours] = ', '.join([raw[d + '_open_times'] for d in tmp]) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') return [entry]
def fetch_stores(data): url = data['home_url'] try: body = cm.post_data(url, {'lz_sf': data['province'], 'lz_sx': data['city']}) except Exception: cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']), 'samsonite_log.txt') dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] start = body.find(u'搜索结果') if start == -1: cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']), 'samsonite_log.txt') return [] body = body[start + 4:] store_list = [] for m in re.findall(ur'</script>\s*(\S+)\s*</span>', body, re.S): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = m.strip() entry[cm.addr_e] = m.strip() entry[cm.city_c] = data['city'] ret = gs.look_up(data['city'], 3) if ret is not None: entry[cm.city_e] = cm.extract_city(ret['name_e'])[0] if ret['province'] != '': entry[cm.province_e] = ret['province']['name_e'] entry[cm.province_c] = data['province'] ret = gs.look_up(data['province'], 2) if ret is not None: entry[cm.province_e] = ret['name_e'] entry[cm.country_e] = u'CHINA' gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), 'benetton_log.txt', False) db.insert_record(entry, 'stores') store_list.append(entry)
def get_store_details(data): url = data['url'] try: html = cm.post_data(url, {'country': data['country_id'], 'city': data['city_id'], 'recordid': data['store_id']}) except Exception: print 'Error occured: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id} cm.dump(dump_data) return [] entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) info = json.loads(html)['elements'] addr = cm.reformat_addr(info['address'].replace('\\', '').replace('<p>', ',').replace('</p>', ',')) # 第一行为商店名称 terms = addr.split(',') if len(terms) > 0: entry[cm.name_e] = cm.reformat_addr(terms[0]) entry[cm.addr_e] = addr gmap_url = info['gmap'] m = re.findall(ur'(-?\d+\.\d+),(-?\d+\.\d+)', gmap_url) if len(m) > 0: cm.update_entry(entry, {cm.lat: string.atof(m[0][0]), cm.lng: string.atof(m[0][1])}) entry[cm.url] = info['shareurl'].replace('\\', '') entry[cm.hours] = info['openingtimes'] entry[cm.comments] = info['other'] # Geo country = data['country'] city = data['city'] cm.update_entry(entry, {cm.country_e: country, cm.city_e: city}) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') return entry
def fetch_stores(data): url = data['url'] param = {'action': 'getStoresByCity', 'idCity': data['city_id'], 'filter': 'clothing;lacoste%20l!ve'} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] raw = json.loads(body)['root']['DATA']['stores'] store_list = [] for s in [tmp['store'] for tmp in raw]: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = s['name'].strip() entry[cm.country_e] = data['country_code'] entry[cm.addr_e] = cm.html2plain(s['address']).strip() entry[cm.store_type] = s['category'].strip() entry[cm.city_e] = cm.extract_city(s['city'])[0] if s['email'] is not None: entry[cm.email] = s['email'].strip() if s['fax'] is not None: entry[cm.fax] = s['fax'].strip() if s['infoHours'] is not None: entry[cm.hours] = s['infoHours'].strip() if s['latitude'] is not None and s['latitude'].strip() != '': entry[cm.lat] = string.atof(s['latitude']) if s['longitude'] is not None and s['longitude'].strip() != '': entry[cm.lat] = string.atof(s['longitude']) if s['phone'] is not None: entry[cm.tel] = s['phone'].strip() if s['postCode'] is not None: entry[cm.zip_code] = s['postCode'].strip() gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_cities(data): url = data['location_url'] try: body = cm.get_data(url, {'lang': data['lang'], 'country': data['country_id'], 'region': data['region_id']}) except Exception: cm.dump('Error in fetching cities: %s, %s' % (url, data['region']), 'tudor_log.txt') dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] results = [] for item in pq(body.encode('utf-8'))('city[id!=""]'): d = data.copy() d['city_id'] = string.atoi(item.attrib['id']) tmp = item.attrib['name'] d['city_name'] = cm.extract_city(re.sub(ur'市$', '', re.sub(ur'省$', '', tmp).strip()).strip())[0] results.append(d) return results
def fetch_stores(data): # country=Greece&city=ATHENS&adutl=+01&kids=+02&undercolor=+06&togetmap=mapdata url = data['data_url'] param = {'country': data['country'], 'city': data['city'], 'adutl': ' 01', 'kids': ' 02', 'undercolor': ' 06', 'togetmap': 'mapdata'} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), 'benetton_log.txt', False) dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] store_list = [] for m in re.findall(ur'<marker (.+?)>', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m1 = re.search(ur'name=\\"(.+?)\\"', m) if m1 is not None: entry[cm.name_e] = cm.html2plain(m1.group(1).strip().replace(u'\\', '')) m1 = re.search(ur'address=\\"(.+?)\\"', m) if m1 is not None: addr = cm.reformat_addr(cm.html2plain(m1.group(1)).replace(u'\\', '')) tel = cm.extract_tel(addr) if tel != '': entry[cm.tel] = tel addr = addr.replace(tel, '') entry[cm.addr_e] = cm.reformat_addr(addr) m1 = re.search(ur'lat=\\"(.+?)\\"', m) if m1 is not None: entry[cm.lat] = string.atof(m1.group(1)) m1 = re.search(ur'lng=\\"(.+?)\\"', m) if m1 is not None: entry[cm.lng] = string.atof(m1.group(1)) entry[cm.country_e] = data['country'].strip().upper() entry[cm.city_e] = cm.extract_city(data['city'])[0] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), 'benetton_log.txt', False) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] m = re.search(ur'var\s+retailers\s*=\s*', body) if m is None: cm.dump('Error in fetching stores: %s' % url, log_name) return [] end = body.find(u']', m.end()) if end == -1: cm.dump('Error in fetching stores: %s' % url, log_name) return [] pat = re.compile(ur'[\{,]([a-zA-Z_\d]+):') store_list = [] for s in json.loads( re.sub(re.compile(ur'([\{,])([a-zA-Z_\d]+):'), ur'\1"\2":', body[m.end():end + 1])): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) name_list = [] for tmp in ['name', 'name_line_2']: if tmp in s and s[tmp] is not None and cm.html2plain( s[tmp]).strip() != '': name_list.append(cm.html2plain(s[tmp]).strip()) entry[cm.name_e] = ', '.join(name_list) addr_list = [] for tmp in ['address', 'address_line_2']: if tmp in s and s[tmp] is not None and cm.html2plain( s[tmp]).strip() != '': addr_list.append(cm.html2plain(s[tmp]).strip()) entry[cm.addr_e] = ', '.join(addr_list) entry[cm.country_e] = s['country'].strip().upper() entry[cm.city_e] = cm.extract_city(s['city'])[0] region = cm.html2plain(s['region']) if re.search( ur'\d+', region) is None and '&' not in region and ';' not in region: entry[cm.province_e] = region.strip().upper()
def fetch_stores(data): url = '%s/en/shops/%s.json' % (data['host'], urllib.quote(data['country_id'].encode('utf-8'))) try: body = cm.get_data(url) raw = json.loads(body) except Exception: cm.dump('Error in fetching stores: %s' % url, 'unode50_log.txt') dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']} cm.dump(dump_data) return [] store_items = [] for s in raw['distributors']: s['store_class'] = 'distributor' store_items.append(s) for s in raw['shops']: s['store_class'] = 'shop' store_items.append(s) store_list = [] for s in store_items: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.store_class] = s['store_class'] entry[cm.country_e] = data['country'] entry[cm.name_e] = s['title'] if s['lat'] is not None: entry[cm.lat] = string.atof(str(s['lat'])) if s['lng'] is not None: entry[cm.lng] = string.atof(str(s['lng'])) entry[cm.addr_e] = s['address'] entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.tel] = s['phone'] entry[cm.zip_code] = s['postal_code'] gs.field_sense(entry) cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]), 'unode50_log.txt') db.insert_record(entry, 'stores') store_list.append(entry) return store_list
def fetch_stores(data): url = data['host'] + data['store_url'] param = {'CC': data['country_code'], 'City': data['city']} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] # pat_tel = re.compile(ur'tel:\s*', re.I) # pat_fax = re.compile(ur'fax:\s*', re.I) # pat_email = re.compile(ur'email:\s*', re.I) pat_tel = re.compile(ur'tel:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) pat_fax = re.compile(ur'fax:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) pat_email = re.compile(ur'email:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) for m in re.finditer(ur'<div class="store-info">', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0] m1 = re.search(ur'<h2 class="store-name[^"]*">(.+?)</h2>', sub) if m1 is not None: entry[cm.name_e] = cm.reformat_addr(m1.group(1)) entry[cm.store_class] = entry[cm.name_e] m1 = re.search(ur'<dt class="address"', sub) if m1 is not None: tmp = cm.reformat_addr( cm.extract_closure(sub[m1.end():], ur'<dd>', ur'</dd>')[0]) entry[cm.addr_e] = tmp if len(tmp) > 1: m1 = re.search(ur'[\d\-]{4,}', tmp.split(',')[-2]) if m1 is not None and len(re.findall(ur'\d', m1.group())) >= 4: entry[cm.zip_code] = m1.group().strip()
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] m = re.search(ur'var\s+retailers\s*=\s*', body) if m is None: cm.dump('Error in fetching stores: %s' % url, log_name) return [] end = body.find(u']', m.end()) if end == -1: cm.dump('Error in fetching stores: %s' % url, log_name) return [] pat = re.compile(ur'[\{,]([a-zA-Z_\d]+):') store_list = [] for s in json.loads(re.sub(re.compile(ur'([\{,])([a-zA-Z_\d]+):'), ur'\1"\2":', body[m.end():end + 1])): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) name_list = [] for tmp in ['name', 'name_line_2']: if tmp in s and s[tmp] is not None and cm.html2plain(s[tmp]).strip() != '': name_list.append(cm.html2plain(s[tmp]).strip()) entry[cm.name_e] = ', '.join(name_list) addr_list = [] for tmp in ['address', 'address_line_2']: if tmp in s and s[tmp] is not None and cm.html2plain(s[tmp]).strip() != '': addr_list.append(cm.html2plain(s[tmp]).strip()) entry[cm.addr_e] = ', '.join(addr_list) entry[cm.country_e] = s['country'].strip().upper() entry[cm.city_e] = cm.extract_city(s['city'])[0] region = cm.html2plain(s['region']) if re.search(ur'\d+', region) is None and '&' not in region and ';' not in region: entry[cm.province_e] = region.strip().upper()
def fetch_stores(data): url = data['host'] + data['store_url'] param = {'CC': data['country_code'], 'City': data['city']} try: body = cm.get_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] # pat_tel = re.compile(ur'tel:\s*', re.I) # pat_fax = re.compile(ur'fax:\s*', re.I) # pat_email = re.compile(ur'email:\s*', re.I) pat_tel = re.compile(ur'tel:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) pat_fax = re.compile(ur'fax:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) pat_email = re.compile(ur'email:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S) for m in re.finditer(ur'<div class="store-info">', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0] m1 = re.search(ur'<h2 class="store-name[^"]*">(.+?)</h2>', sub) if m1 is not None: entry[cm.name_e] = cm.reformat_addr(m1.group(1)) entry[cm.store_class] = entry[cm.name_e] m1 = re.search(ur'<dt class="address"', sub) if m1 is not None: tmp = cm.reformat_addr(cm.extract_closure(sub[m1.end():], ur'<dd>', ur'</dd>')[0]) entry[cm.addr_e] = tmp if len(tmp) > 1: m1 = re.search(ur'[\d\-]{4,}', tmp.split(',')[-2]) if m1 is not None and len(re.findall(ur'\d', m1.group())) >= 4: entry[cm.zip_code] = m1.group().strip()
def fetch_stores(data): url = data['url'] if 'page' in data: page = data['page'] else: page = 1 tot = -1 tot_page = -1 store_ids = set([]) store_list = [] page_size = 400 f = open('err_log_%s.log' % data['brandname_e'], 'w') while True: msg = 'Fetching page %d...' % page print msg f.write('%s\n' % msg) try: # html = cm.get_data(url, {'brand': 'adidas', 'geoengine': 'google', 'method': 'get', # 'category': 'store', 'latlng': '51.729663,5.310298,100', # 'page': '%d' % page, 'pagesize': page_size, # 'fields': 'name,street1,street2,addressline,buildingname,postal_code,city,' # 'state,store_owner,country,storetype,longitude_google,' # 'latitude_google,store_owner,state,performance,brand_store,' # 'factory_outlet,originals,neo_label,y3,slvr,children,woman,' # 'footwear,football,basketball,outdoor,porsche_design,miadidas,' # 'miteam,stella_mccartney,eyewear,micoach,opening_ceremony', # 'format': 'json', 'storetype': ''}) html = cm.get_data( url, { 'brand': 'adidas', 'geoengine': 'google', 'method': 'get', 'category': 'store', 'latlng': '31.22434895,121.47675279999999, 10000', 'page': '%d' % page, 'pagesize': page_size, 'fields': 'name,street1,street2,addressline,buildingname,postal_code,city,' 'state,store_owner,country,storetype,longitude_google,' 'latitude_google,store_owner,state,performance,brand_store,' 'factory_outlet,originals,neo_label,y3,slvr,children,woman,' 'footwear,football,basketball,outdoor,porsche_design,miadidas,' 'miteam,stella_mccartney,eyewear,micoach,opening_ceremony', 'format': 'json', 'storetype': '' }) except Exception: msg = 'Error occured: %s' % url print msg f.write('%s\n' % msg) dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) page += 1 if 'page' in data: break else: if page > tot_page: break else: continue try: start = html.find('{') if start != -1: html = html[start:] # 去掉控制字符 pat = re.compile(u'[\r\n]') html = re.sub(pat, ' ', html) pat = re.compile(ur'\\.') html = re.sub(pat, ' ', html) raw = json.loads(html)['wsResponse'] if tot == -1: tot = string.atoi(raw['results']) tot_page = (tot - 1) / page_size + 1 raw = raw['result'] def addr_func(addr_list, addr_map, key): if key in addr_map: addr_list.append(addr_map[key].strip()) for s in raw: try: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) if s['id'] in store_ids: continue store_ids.add(s['id']) entry[cm.name_e] = s['name'] addr_list = [] map(lambda key: addr_func(addr_list, s, key), ['addressline', 'buildingname', 'street1', 'street2']) entry[cm.addr_e] = ', '.join(addr_list) entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.country_e] = s['country'].strip().upper() if 'storetype' in s: entry[cm.store_type] = s['storetype'] entry[cm.lat] = string.atof(s['latitude_google']) entry[cm.lng] = string.atof(s['longitude_google']) entry[cm.store_class] = 'adidas' gs.field_sense(entry) msg = '(%s / %d) Found store: %s, %s (%s, %s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]) print msg f.write('%s\n' % msg.encode('utf-8')) store_list.append(entry) db.insert_record(entry, 'stores') except Exception, e: msg = 'Error processing. Reason: %s, content: %s' % ( str(e), s) print msg f.write('%s\n' % msg.encode('utf-8')) except Exception, e: msg = 'Error processing page %d, reason: %s' % (page, str(e)) print msg f.write('%s\n' % msg) finally:
param = {'lang': 'EN_US', 'geo_id': data['city_id']} try: body = cm.get_data(url, param) except Exception, e: cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name) return [] store_list = [] raw = json.loads(body) if 'storeList' not in raw: return [] for s in raw['storeList']: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.city_e] = cm.extract_city(data['city'])[0] entry[cm.province_e] = data['state'].upper() entry[cm.country_e] = data['country'].upper() entry[cm.store_class] = s['type']['name'] entry[cm.store_type] = ', '.join(type_map[item['name']] for item in s['categories']) entry[cm.name_e] = s['name'].strip() loc = s['location'] entry[cm.addr_e] = cm.reformat_addr(loc['address']) if 'phone' in loc and loc['phone'] is not None: entry[cm.tel] = loc['phone'].strip() if 'fax' in loc and loc['fax'] is not None: entry[cm.fax] = loc['fax'].strip() if 'postalCode' in loc and loc['postalCode'] is not None: entry[cm.zip_code] = loc['postalCode'].strip()
entry[cm.province_e] = cm.html2plain(val).strip().upper() val = s['description'] if val: entry[cm.comments] = cm.html2plain(val).strip().upper() addr_list = [] for key in ('street', 'street2'): if not s[key]: continue term = cm.reformat_addr(s[key]) if term != '': addr_list.append(term) entry[cm.addr_e] = ', '.join(addr_list) entry[cm.store_type] = ', '.join(item['code'] for item in s['categories']) entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if '???' not in entry[cm.addr_e] and '???' not in entry[cm.name_e]: cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') return entry else:
re.S): entry[common.hours] = common.reformat_addr(m1) break m1 = re.findall(ur'href="/(.+?)" title="View on map"', sub_html) if len(m1) > 0: entry[common.url] = host + '/' + m1[0] lat, lng = get_coordinates(entry[common.url]) common.update_entry(entry, {common.lat: lat, common.lng: lng}) # geo city_e = cities[city_id]['name'].strip() country_e = cities[city_id]['country']['name'].strip().upper() continent_e = cities[city_id]['country']['continent'].strip().upper() common.update_entry(entry, {common.city_e: common.extract_city(city_e)[0], common.country_e: country_e, common.continent_e: continent_e}) gs.field_sense(entry) # ret = common.geo_translate(country_e.strip()) # if len(ret) > 0: # common.update_entry(entry, {common.continent_c: ret[common.continent_c], # common.continent_e: ret[common.continent_e], # common.country_c: ret[common.country_c], # common.country_e: ret[common.country_e]}) # common.update_entry(entry, {common.brandname_c: brandname_c, common.brandname_e: brandname_e}) # common.chn_check(entry) print '%s Found store: %s, %s (%s, %s)' % ( brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.country_e], entry[common.continent_e]) db.insert_record(entry, 'stores')
try: entry[cm.lng] = string.atof(store.attrib['data-longitude']) except (ValueError, KeyError, TypeError) as e: cm.dump('Error in fetching lng: %s' % str(e), log_name) if entry[cm.lat] == 0 and entry[cm.lng] == 0: entry[cm.lat], entry[cm.lng] = '', '' item = pq(store) tmp = item('h1') entry[cm.name_e] = cm.html2plain( tmp[0].text).strip() if len(tmp) > 0 and tmp[0].text else '' tmp = item('dd.location') tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else '' entry[cm.city_e] = cm.extract_city(tmp)[0] tmp = item('dd.street') tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else '' entry[cm.addr_e] = cm.reformat_addr(tmp) tmp = item('dd.phone') tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else '' entry[cm.tel] = tmp.strip() tmp = item('dd.hours') tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else '' entry[cm.hours] = tmp.strip() tmp = item('dd.products') tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
city = '' country = '' for m1 in re.findall( ur'<span itemprop="streetAddress">(.*?)</span>', m, re.S): if len(m1.strip()) > 0: street_addr = cm.reformat_addr(m1) break for m1 in re.findall(ur'<span itemprop="postalCode">(.*?)</span>', m): if len(m1.strip()) > 0: zip_code = m1 break for m1 in re.findall( ur'<span itemprop="addressLocality">(.*?)</span>', m): if len(m1.strip()) > 0: city = cm.extract_city(m1)[0] break for m1 in re.findall( ur'<span itemprop="addressCountry">(.*?)</span>', m): if len(m1.strip()) > 0: country = m1 break entry[cm.zip_code] = zip_code # 没有上述标签的情况 if street_addr == '': tmp = cm.reformat_addr(m) terms = tmp.split(',') ret = gs.look_up(terms[-1], 1) if ret is not None: # t2 = cm.geo_translate(terms[-1]) # if len(t2) != 0:
def fetch_stores(data): url = data['url'] try: body = cm.post_data( url, { 'searchtype': 'normal', 'reiter_selected': 'reiter1', 'country_id': data['country_code'], 'city_id': data['city'] }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_list = [] while True: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) m = re.search(ur'<h4>\s*(.+?)\s*</h4>', body) if m is None: break entry[cm.store_class] = m.group(1) end = body.find(u'</div>', m.end()) sub = body[m.end():end] body = body[end:] if ('Country' in m.group(1) and 'Language' in m.group(1)) \ or 'href' in m.group(1) or 'products' in m.group(1): continue tmp = cm.reformat_addr(sub).split(',') addr_list = [] for term in tmp: if u'Show on map' in term: continue elif u'電話' in term or u'Phone' in term: entry[cm.tel] = term.replace(u'電話', '').replace(u'Phone', '').strip() else: addr_list.append(term) entry[cm.addr_e] = ', '.join(addr_list) m = re.search(re.compile(ur'<h4>(products|產品)</h4>', re.I), body) if m is not None: end = body.find(ur'</div>', m.end()) entry[cm.store_type] = cm.reformat_addr(body[m.end():end]) # tmp = re.compile(ur'<h4>products</h4>', re.I) # m = re.search(tmp, body[end:]) # if mis # prodstart = body.find(, end) # if prodstart!=-1: # prodstart += len(u'<h4>產品</h4>') # prodend = body.find(u'</div>', prodstart) # entry[cm.store_type] = cm.reformat_addr(body[prodstart:prodend]) entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores') return store_list
return () store_list = [] body = re.sub(ur'GetLocalLevisCallback\(', '', body)[:-1] for s in json.loads(body)['d']['results']: try: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) uid = s['__metadata']['uri'] if uid in store_map: cm.dump(u'%s already exists.' % uid, log_name) continue entry[cm.country_e] = cm.html2plain(s['CountryRegion']).strip().upper() entry[cm.native_id] = uid entry[cm.city_e] = cm.extract_city(s['Locality'])[0] entry[cm.addr_e] = cm.reformat_addr(s['AddressLine']) entry[cm.zip_code] = s['PostalCode'] entry[cm.tel] = s['Phone'] entry[cm.name_e] = cm.html2plain(s['BranchName']).strip() if s['BranchName'] else '' try: entry[cm.lat] = string.atof(s['Latitude']) if s['Latitude'] != '' else '' except (ValueError, KeyError, TypeError) as e: cm.dump('Error in fetching lat: %s' % str(e), log_name) try: entry[cm.lng] = string.atof(s['Longitude']) if s['Longitude'] != '' else '' except (ValueError, KeyError, TypeError) as e: cm.dump('Error in fetching lng: %s' % str(e), log_name)
while True: entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) sub, start, end = cm.extract_closure(html, ur'\{', ur'\}') if end == 0: break js = json.loads(sub) start = end html = html[start:] raw = js['StoreLocator'] entry[cm.name_e] = raw['name'] addr1 = raw['adress1'] addr2 = raw['adress2'] entry[cm.addr_e] = cm.reformat_addr(', '.join([addr1, addr2])) entry[cm.zip_code] = raw['postcode'] entry[cm.city_e] = cm.extract_city(raw['city'])[0] entry[cm.province_e] = raw['region'] entry[cm.tel] = raw['phone'] entry[cm.fax] = raw['fax'] entry[cm.email] = raw['email'] entry[cm.hours] = cm.reformat_addr(raw['opening']) entry[cm.lat] = string.atof(raw['latitude']) entry[cm.lng] = string.atof(raw['longitude']) entry[cm.url] = raw['link'].replace('\\', '') raw = js['Country'] entry[cm.country_e] = raw['name'] raw = js['StoreLocatorType'] entry[cm.store_type] = raw['name'] gs.field_sense(entry)
url = data['url'] param = {'operation': 'coSearch', 'numResults': 999999, 'mnlt': -89, 'mxlt': 89, 'mnln': -179, 'mxln': 179, 'token': 'LEVI', 'heavy': 'true'} try: body = cm.get_data(url, param) except Exception, e: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return () store_list = [] for s in json.loads(body)['RESULTS']: s = s['store'] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = s['countryCode'] entry[cm.city_e] = cm.extract_city(s['city'])[0] entry[cm.province_e] = s['stateCode'] addr_list = [] if 'address1' in s: val = s['address1'] val = cm.html2plain(val).strip() if val else '' if val != '': addr_list.append(val) if 'address2' in s: val = s['address2'] val = cm.html2plain(val).strip() if val else '' if val != '': addr_list.append(val) entry[cm.addr_e] = ', '.join(addr_list)
try: try: entry[cm.lat] = string.atof(str(s['location'][0])) entry[cm.lng] = string.atof(str(s['location'][1])) except (KeyError, IndexError, ValueError, TypeError): pass s = s['content'] try: entry[cm.name_e] = cm.html2plain(s['title']).strip() except (KeyError, TypeError): pass tmp_list = s['analytics_label'].split('-') entry[cm.country_e] = tmp_list[0] entry[cm.city_e] = cm.extract_city(tmp_list[1])[0] try: entry[cm.addr_e] = cm.reformat_addr(s['address']).strip() except (KeyError, TypeError): pass try: entry[cm.fax] = s['fax'].strip() except (KeyError, TypeError): pass try: entry[cm.tel] = s['phone'].strip() except (KeyError, TypeError): pass try:
entry[common.hours] = common.reformat_addr(m1) break m1 = re.findall(ur'href="/(.+?)" title="View on map"', sub_html) if len(m1) > 0: entry[common.url] = host + '/' + m1[0] lat, lng = get_coordinates(entry[common.url]) common.update_entry(entry, {common.lat: lat, common.lng: lng}) # geo city_e = cities[city_id]['name'].strip() country_e = cities[city_id]['country']['name'].strip().upper() continent_e = cities[city_id]['country']['continent'].strip().upper() common.update_entry( entry, { common.city_e: common.extract_city(city_e)[0], common.country_e: country_e, common.continent_e: continent_e }) gs.field_sense(entry) # ret = common.geo_translate(country_e.strip()) # if len(ret) > 0: # common.update_entry(entry, {common.continent_c: ret[common.continent_c], # common.continent_e: ret[common.continent_e], # common.country_c: ret[common.country_c], # common.country_e: ret[common.country_e]}) # common.update_entry(entry, {common.brandname_c: brandname_c, common.brandname_e: brandname_e}) # common.chn_check(entry) print '%s Found store: %s, %s (%s, %s)' % (
def fetch_stores(data): print '(%s/%d) Found city: %s' % (data['brandname_e'], data['brand_id'], data['city_e']) url = data['url'] try: html = cm.get_data(url) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] # 处理重定向 m = re.search('<h2>Object moved to <a href="(.+?)">', html) if m is not None: data['url'] = data['host'] + m.group(1) return fetch_countries(data) m = re.search('var\s+data\s*=\s*', html) if m is None: return [] sub, start, end = cm.extract_closure(html[m.end():], r'\[', r'\]') if end == 0: return [] store_list = [] for s in json.loads(sub): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) name = s['Name'] if cm.is_chinese(name): entry[cm.name_c] = name else: entry[cm.name_e] = name entry[cm.addr_e] = cm.html2plain(s['Street']) entry[cm.city_e] = cm.extract_city(data['city_e'])[0] entry[cm.country_e] = data['country_e'] entry[cm.province_e] = data['province_e'] pat = re.compile(ur'tel[\.: ]*', re.I) entry[cm.tel] = re.sub(pat, '', s['Phone']).strip() pat = re.compile(ur'fax[\.: ]*', re.I) entry[cm.fax] = re.sub(pat, '', s['Fax']).strip() entry[cm.email] = s['Email'].strip() entry[cm.url] = s['Website'].strip() coord = s['LatLng'] if coord is not None and len(coord) >= 2: if coord[0] is not None: entry[cm.lat] = string.atof(coord[0]) if coord[1] is not None: entry[cm.lng] = string.atof(coord[1]) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s/%d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores') return store_list
store_map[val] = entry val = store.getiterator("name")[0].text entry[cm.name_e] = cm.html2plain(val).strip() if val else "" addr_list = [] for idx in xrange(1, 3): val = store.getiterator("address%d" % idx)[0].text if val: val = cm.reformat_addr(val) if val != "": addr_list.append(val) entry[cm.addr_e] = ", ".join(addr_list) val = store.getiterator("city")[0].text entry[cm.city_e] = cm.extract_city(val)[0] if val else "" val = store.getiterator("province")[0].text entry[cm.province_e] = cm.html2plain(val).strip().upper() if val else "" if entry[cm.province_e] == "": val = store.getiterator("state")[0].text entry[cm.province_e] = cm.html2plain(val).strip().upper() if val else "" val = store.getiterator("country")[0].text entry[cm.country_e] = val.strip().upper() if val else "" val = store.getiterator("email")[0].text entry[cm.email] = val if val else "" val = store.getiterator("phone")[0].text entry[cm.tel] = val if val else "" val = store.getiterator("postalcode")[0].text entry[cm.zip_code] = val if val else ""
data['brandname_c']) entry[cm.country_e] = data['country_code'] sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0] m1 = re.search(ur'<div class="fn org">(.+?)</div>', sub, re.S) entry[cm.name_e] = cm.reformat_addr(m1.group(1)) if m1 else '' m1 = re.search(ur'<div class="adr">', sub, re.S) if m1: addr_sub = cm.extract_closure(sub[m1.start():], ur'<div\b', ur'</div>')[0] entry[cm.addr_e] = cm.reformat_addr(addr_sub) m2 = re.search(ur'<span class="locality">([^<>?]+?),*\s*</span>', addr_sub) city = cm.html2plain(m2.group(1)).strip().upper() if m2 else '' entry[cm.city_e] = cm.extract_city(city if city != ',' else '')[0] m2 = re.search(ur'<span\s+class="region"\s+title="([^"]+)"[^<>]*>', addr_sub) entry[cm.province_e] = cm.html2plain( m2.group(1)).strip().upper() if m2 else '' m2 = re.search(ur'<span\s+class="postal-code"[^<>]*>([^<>]+)', addr_sub) entry[cm.zip_code] = m2.group(1).strip() if m2 else '' m1 = re.search(ur'<div class="tel">([^<>]+)</div>', sub, re.S) entry[cm.tel] = m1.group(1).strip() if m1 else '' m1 = re.search(ur'<div class="hours_wrapper">', sub) if m1: hours_sub = cm.extract_closure(sub[m1.start():], ur'<div\b', ur'</div>')[0]
pat = re.compile(ur',\s*"terms":\s*\{.+?\}', re.S) tmp = re.sub(pat, '', tmp) try: raw = json.loads(tmp)['stores'] except ValueError, e: print e store_list = [] for s in raw: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.name_e] = s['name'] entry[cm.country_e] = data['country_code'] addr = s['address'] m = re.search(ur'<span class=\"locality\">([^<>]+?)</span>', addr) if m is not None: entry[cm.city_e] = cm.extract_city(m.group(1))[0] m = re.search(ur'<span class=\"region\">([^<>]+?)</span>', addr) if m is not None: entry[cm.province_e] = m.group(1).strip().upper() m = re.search(ur'<span class=\"postal-code\">([^<>]+?)</span>', addr) if m is not None: entry[cm.zip_code] = m.group(1).strip() entry[cm.addr_e] = cm.reformat_addr(addr) entry[cm.tel] = s['phone'] if s['lat'] is not None and s['lat'] != '': entry[cm.lat] = string.atof(s['lat']) if s['lng'] is not None and s['lng'] != '': entry[cm.lng] = string.atof(s['lng']) entry[cm.store_type] = s['type'] if s['event_link'] is not None:
def fetch_stores(data): param = { 'action': 'getStoresFromAjax', 'country': data['country_code'], 'region': data['city'], 'collection': '' } url = data['url'] try: body = cm.post_data(url, param) except Exception: cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name) return [] store_list = [] for m1 in re.finditer(ur'<div class="shop-type-container">', body): sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0] store_class = '' m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S) if m2 is not None: store_class = cm.reformat_addr(m2.group(1)) for m2 in re.finditer(ur'<div class="shop"', sub): store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b', ur'</div>')[0] entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.store_class] = store_class entry[cm.country_e] = data['country_code'] entry[cm.city_e] = cm.extract_city(data['city'])[0] m3 = re.search( ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)', store_sub) if m3 is not None: data['store_id'] = string.atoi(m3.group(1)) entry[cm.lat] = string.atof(m3.group(2)) entry[cm.lng] = string.atof(m3.group(3)) entry[cm.store_type] = ', '.join(get_detail(data)) m3 = re.search( ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>', store_sub) if m3 is not None: entry[cm.name_e] = m3.group(1).strip() addr_list = [] m3 = re.search( ur'<div class="shop-street shop-details">([^<>]+)</div>', store_sub) if m3 is not None: addr_list.append(cm.reformat_addr(m3.group(1))) m3 = re.search( ur'<div class="shop-city shop-details">([^<>]+)</div>', store_sub) if m3 is not None: tmp = cm.reformat_addr(m3.group(1)) m3 = re.search(ur'(\d{4,})', tmp) if m3 is not None: entry[cm.zip_code] = m3.group(1).strip() addr_list.append(tmp) entry[cm.addr_e] = ', '.join(addr_list) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['store_url'] try: body = cm.post_data( url, { 'continent': data['continent'], 'country': data['country'], 'city': data['city'], 'send': 1, 'page': 0 }) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 0, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': data['brand_id'] } cm.dump(dump_data) return [] store_list = [] for m in re.finditer(ur'<div class="shop">', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) sub, start, end = cm.extract_closure(body[m.end():], ur'<div\b', ur'</div>') if end == 0: continue m1 = re.search(ur'<h3>\s*(.+?)\s*</h3>', sub, re.S) if m1 is not None: entry[cm.name_e] = m1.group(1) m1 = re.search(ur'<p[^>]*>(.+?)</p>', sub, re.S) if m1 is not None: entry[cm.store_type] = re.sub(re.compile(ur'\s*\+\s*', re.S), ', ', m1.group(1).strip()) addr_sub, start, end = cm.extract_closure(sub, ur'<ul\b', ur'</ul>') if end != 0: tmp = re.findall(ur'<li>\s*(.+?)\s*</li>', addr_sub) addr_list = [] if len(tmp) >= 3: entry[cm.tel] = tmp[-1].strip() del tmp[-1] for term in tmp: term = cm.html2plain(term).strip() if term != '': addr_list.append(term) entry[cm.addr_e] = ', '.join(addr_list) start = sub.lower().find(ur'opening hours') if start != -1: opening_sub, start, end = cm.extract_closure( sub[start:], ur'<ul\b', ur'</ul>') tmp = re.findall(ur'<li>\s*(.+?)\s*</li>', opening_sub) opening_list = [] for term in tmp: term = cm.html2plain(term).strip() if term != '': opening_list.append(term) entry[cm.hours] = ', '.join(opening_list) cm.update_entry( entry, { cm.continent_e: data['continent'].strip().upper(), cm.country_e: data['country'].strip().upper() }) entry[cm.city_e] = cm.extract_city(data['city'])[0] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e]) if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) store_list.append(entry) db.insert_record(entry, 'stores')