gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if entry[cm.country_e] == '' or entry[cm.city_e] == '': ret = None location_valid = True if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: location_valid = False ret = gs.geocode( '%s, %s, %s' % (entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e])) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper()
entry[cm.lng] = data['lng'] gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if entry[cm.country_e] == '' or entry[cm.city_e] == '': ret = None if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: ret = gs.geocode(entry[cm.addr_e]) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']:
gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if entry[cm.country_e] == '' or entry[cm.city_e] == '': ret = None location_valid = True if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: location_valid = False ret = gs.geocode(', '.join((entry[cm.addr_e], entry[cm.country_e]))) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper()
entry = fetch_contact_info(data, entry, s['id']) gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if entry[cm.country_e] == '' or entry[cm.city_e] == '': ret = None if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: ret = gs.geocode(', '.join((entry[cm.addr_e], s['city']))) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']:
sub = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')[0] m = re.search(ur'<div class="box-adress-store">(.+?)</div>', sub, re.S) if m is None: cm.dump('Error in fetching stores: %s' % url, log_name) return [] entry[cm.addr_e] = cm.reformat_addr(m.group(1)) m = re.search(ur'<h4>(.+?)</h4>', sub) if m is not None and 't:' in m.group(1).lower(): entry[cm.tel] = cm.extract_tel(m.group(1)) m = re.search(ur'<div class="box-open-store">(.+?)</div>', body, re.S) if m is not None: entry[cm.hours] = cm.reformat_addr(m.group(1)) ret = None if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: ret = gs.geocode(', '.join((entry[cm.addr_e], data['zone']))) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: country = v['long_name'].strip().upper()
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] store_list = [] for m in re.finditer(ur'<div class="searchResult[^"]*"', body): if 'intro' in m.group(): continue sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0] m1 = re.search(ur'<div id=[^<>]+>(.+?)</div>', sub) if m1 is None: continue entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.country_e] = data['country'] entry[cm.city_e] = data['city'] addr_list = [ tmp.strip() for tmp in cm.reformat_addr(m1.group(1)).split(',') ] tel = cm.extract_tel(addr_list[-1]) if tel != '': entry[cm.tel] = tel del addr_list[-1] else: m1 = re.search(ur'Tel:([^<>]+)', sub) if m1 is not None: entry[cm.tel] = cm.extract_tel(m1.group(1)) entry[cm.addr_e] = ', '.join(addr_list) m1 = re.search(ur"show_map\('(-?\d+\.\d+)'\s*,\s*'(-?\d+\.\d+)'", sub) if m1 is not None: entry[cm.lat] = string.atof(m1.group(1)) entry[cm.lng] = string.atof(m1.group(2)) start = sub.find(ur'Opening hours:') if start != -1: entry[cm.hours] = cm.extract_closure(sub[start:], ur'<p>', ur'</p>')[0].strip() ret = None if entry[cm.lat] != '' and entry[cm.lng] != '': ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: tmp = [tmp1.strip() for tmp1 in entry[cm.addr_e].split(',')] if 'Max Mara' in tmp[0]: del tmp[0] if len(tmp) > 0: ret = gs.geocode(', '.join(tmp)) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: country = v['long_name'].strip().upper() elif 'postal_code' in v['types']: zip_code = v['long_name'].strip() entry[cm.country_e] = country entry[cm.province_e] = province entry[cm.city_e] = city entry[cm.zip_code] = zip_code gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry)
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] store_list = [] for m in re.finditer(ur'<div\s+class\s*=\s*"storeItem"', body): entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) sub = cm.extract_closure(body[m.end():], ur'<div\b', ur'</div>')[0] m1 = re.search(ur'<div class="bubbleInfo">(.+?)</div>', sub) if m1 is not None: entry[cm.addr_e] = cm.reformat_addr(m1.group(1)) m1 = re.search(ur'lat="(-?\d+\.\d+)"', sub) if m1 is not None: entry[cm.lat] = string.atof(m1.group(1)) m1 = re.search(ur'lng="(-?\d+\.\d+)"', sub) if m1 is not None: entry[cm.lng] = string.atof(m1.group(1)) m1 = re.search(ur'<span>\s*Tel:\s*([^<>]+)</span>', sub) if m1 is not None: entry[cm.tel] = m1.group(1).strip() m1 = re.search(ur'http://maps\.google\.com/maps\?q=([^&"]+)', sub) if m1 is None: continue ret = gs.geocode(latlng=m1.group(1)) if ret is None: tmp = [tmp1.strip() for tmp1 in entry[cm.addr_e].split(',')] if 'MAX' in tmp[0]: del tmp[0] if cm.extract_tel(tmp[-1]) != '': del tmp[-1] if len(tmp) > 0: ret = gs.geocode(', '.join(tmp)) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: country = v['long_name'].strip().upper() elif 'postal_code' in v['types']: zip_code = v['long_name'].strip() entry[cm.country_e] = country entry[cm.province_e] = province entry[cm.city_e] = city entry[cm.zip_code] = zip_code gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) else: cm.dump( 'Error in fetching stores: latlng=%s, addr=%s' % (m1.group(1), entry[cm.addr_e]), log_name) continue
type_list = [] for item in pq(body)('#map-panel ul li'): if item.text: val = cm.html2plain(item.text).strip() if val != '': type_list.append(val) entry[cm.store_type] = ', '.join(type_list) tmp = pq(body)('#map-panel iframe[src!=""]') if len(tmp) > 0: # map_url = tmp[0].attrib['src'] m = re.search(ur'daddr=([^&]+)', tmp[0].attrib['src']) if m: map_url = 'http://maps.googleapis.com/maps/api/geocode/json?address=%s&sensor=false' % m.group( 1) ret = gs.geocode(url=map_url) if ret: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: country = v['long_name'].strip().upper() elif 'postal_code' in v['types']: zip_code = v['long_name'].strip()
def fetch_stores(data): url = data['url'] try: body = cm.get_data(url) except Exception: cm.dump('Error in fetching stores: %s' % url, log_name) return [] m = re.search(ur'var\s+geoShops\s*=', body) if m is None: cm.dump('Error in fetching stores: %s' % url, log_name) return [] tmp = cm.extract_closure(body[m.end():], ur'\[', ur'\]')[0] raw = json.loads( re.sub(ur'(?<!")(city|address|lat|lng)(?!")', ur'"\1"', tmp)) store_list = [] for s in raw: entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c']) entry[cm.city_e] = s['city'].strip().upper() if s['lat'] is not None and s['lat'] != '': entry[cm.lat] = string.atof(s['lat']) if s['lng'] is not None and s['lng'] != '': entry[cm.lng] = string.atof(s['lng']) addr = cm.reformat_addr(s['address']) pat = re.compile(ur'ph[\.:](.*)$', re.I) m = re.search(pat, addr) if m is not None: entry[cm.tel] = m.group(1).strip() entry[cm.addr_e] = re.sub(pat, '', addr).strip() addr1 = re.sub(ur'[\u2e80-\u9fff]+', '', '%s, %s' % (addr, s['city'])).strip() ret = gs.geocode(addr1, '%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is None: ret = gs.geocode(addr1) if ret is None: ret = gs.geocode(latlng='%f,%f' % (entry[cm.lat], entry[cm.lng])) if ret is not None: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: country = v['long_name'].strip().upper() elif 'postal_code' in v['types']: zip_code = v['long_name'].strip() entry[cm.country_e] = country entry[cm.province_e] = province entry[cm.city_e] = city entry[cm.zip_code] = zip_code else: ret = gs.addr_sense(addr1) if ret[0] is not None: entry[cm.country_e] = ret[0] if ret[1] is not None: entry[cm.province_e] = ret[1] if ret[2] is not None: entry[cm.city_e] = ret[2] gs.field_sense(entry) cm.dump( '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'], entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]), log_name) db.insert_record(entry, 'stores') store_list.append(entry) return store_list
entry[cm.city_e], entry[cm.city_c] = city_e, city_c entry[cm.name_e] = name entry[cm.addr_e] = name gs.field_sense(entry) ret = gs.addr_sense(entry[cm.addr_e]) if ret[0] is not None and entry[cm.country_e] == '': entry[cm.country_e] = ret[0] if ret[1] is not None and entry[cm.province_e] == '': entry[cm.province_e] = ret[1] if ret[2] is not None and entry[cm.city_e] == '': entry[cm.city_e] = ret[2] gs.field_sense(entry) if entry[cm.country_e] == '' or entry[cm.city_e] == '': ret = gs.geocode(', '.join( (entry[cm.name_e], entry[cm.city_c], entry[cm.country_c]))) if not ret: ret = gs.geocode(', '.join( (entry[cm.city_c], entry[cm.country_c]))) if ret: city = '' province = '' country = '' zip_code = '' tmp = ret[0]['address_components'] for v in tmp: if 'locality' in v['types']: city = v['long_name'].strip().upper() elif 'administrative_area_level_1' in v['types']: province = v['long_name'].strip().upper() elif 'country' in v['types']: