def fetch(level=1, data=None, user='******', passwd=''): db = cm.StoresDb() db.connect_db(user=user, passwd=passwd) db.execute(u'DELETE FROM %s WHERE brand_id=%d' % ('stores', brand_id)) try: html = cm.get_data(url) except Exception: print 'Error occured: %s' % url dump_data = { 'level': 1, 'time': cm.format_time(), 'data': { 'url': url }, 'brand_id': brand_id } cm.dump(dump_data) return [] js = json.loads(html) store_list = [] for s in js['data']['list']: entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) cm.update_entry( entry, { cm.lat: string.atof(s['geo']['lat']), cm.lng: string.atof(s['geo']['lng']) }) entry[cm.name_e] = s['contact']['title'] entry[cm.addr_e] = cm.reformat_addr(s['contact']['address']) entry[cm.tel] = s['contact']['phone'] entry[cm.fax] = s['contact']['fax'] entry[cm.hours] = cm.reformat_addr(s['contact']['hours']) entry[cm.store_type] = s['contact']['selling'] entry[cm.url] = host + s['link'] gs.update_city_map(s['city'], s['country'], s['continent']) cm.update_entry( entry, { cm.continent_e: s['continent'], cm.country_e: s['country'], cm.city_e: s['city'] }) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry) db.disconnect_db() gs.commit_maps(1) gs.commit_maps(3) return store_list
def fetch(level=1, data=None, user='******', passwd=''): db = cm.StoresDb() db.connect_db(user=user, passwd=passwd) db.execute(u'DELETE FROM %s WHERE brand_id=%d' % ('stores', brand_id)) try: html = cm.get_data(url) except Exception: print 'Error occured: %s' % url dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id} cm.dump(dump_data) return [] js = json.loads(html) store_list = [] for s in js['data']['list']: entry = cm.init_store_entry(brand_id, brandname_e, brandname_c) cm.update_entry(entry, {cm.lat: string.atof(s['geo']['lat']), cm.lng: string.atof(s['geo']['lng'])}) entry[cm.name_e] = s['contact']['title'] entry[cm.addr_e] = cm.reformat_addr(s['contact']['address']) entry[cm.tel] = s['contact']['phone'] entry[cm.fax] = s['contact']['fax'] entry[cm.hours] = cm.reformat_addr(s['contact']['hours']) entry[cm.store_type]=s['contact']['selling'] entry[cm.url]=host+s['link'] gs.update_city_map(s['city'], s['country'], s['continent']) cm.update_entry(entry,{cm.continent_e:s['continent'], cm.country_e:s['country'], cm.city_e:s['city']}) gs.field_sense(entry) print '(%s / %d) Found store: %s, %s (%s, %s)' % ( brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]) db.insert_record(entry, 'stores') store_list.append(entry) db.disconnect_db() gs.commit_maps(1) gs.commit_maps(3) return store_list
def fetch_stores(url, type, data): """ type: 0: 国家, 1: 地区,2:城市 """ if type == 0: url = 'http://www.donnakaran.com/store' opt = ['param param-country', r'<option\s+value="/store\?country=(\w+)\s*".*?>([\w\s]+)</option>', 'http://www.donnakaran.com/store?country=%s'] countries = get_district(url, opt) stores = [] for c in countries: # if c['code']!='us': # continue url = 'http://www.donnakaran.com/store/formpartial?' + urllib.urlencode({'country': c['code']}) print('Fetching for %s...' % c['name']) if c['code'].__eq__('us'): col = fetch_stores(url, 1, {'country_name': c['name'], 'code': c['code']}) if col is not None: stores.extend(col) else: col = fetch_stores(url, 2, {'country_name':c['name'], 'code': c['code'], 'region': 0}) if col is not None: stores.extend(col) return stores elif type == 1: # 获得洲列表 opt = ['param param-region', r'<option value="/store/list\?country=us®ion=(\d+).*?"\s*>([\w\s]+)</option>'] states = get_district(url, opt) stores = [] for s in states: url = 'http://www.donnakaran.com/store/formpartial?' + \ urllib.urlencode({'country': 'us', 'region': s['code'], 'p': 1}) print('Fetching for %s...' % s['name']) d = dict(data) d['region'] = s['code'] d['province_name'] = s['name'].strip().upper() col = fetch_stores(url, 2, d) if col is not None: stores.extend(col) elif type == 2: # 获得城市列表 opt = ['param param-city', r'<option\s+value="([^\s]+)"\s*>([\w\s]+)</option>', 'http://www.donnakaran.com%s'] cities = get_district(url, opt) stores = [] country_code = data['code'] region = data['region'] for c in cities: # country=ca®ion=0&city=burlington&zip=&brand=dkny&p=1&output=json url = 'http://www.donnakaran.com/store/listpartial?' + \ urllib.urlencode({'output': 'json', 'country': country_code, 'region': region, 'city': c['name'], 'zip': '', 'brand': __brand__, 'p': 1}) print('\tFetching for %s...' % c['name']) d = dict(data) d['city_name'] = c['name'] col = fetch_stores(url, 3, d) if col is not None: stores.extend(col) return stores elif type == 3: # 获得城市中的商店信息 if 'province_name' in data: gs.update_city_map(data['city_name'], data['country_name'], province_name=data['province_name']) else: gs.update_city_map(data['city_name'], data['country_name']) return get_stores(url, data)
def fetch_stores(url, type, data): """ type: 0: 国家, 1: 地区,2:城市 """ if type == 0: url = 'http://www.donnakaran.com/store' opt = [ 'param param-country', r'<option\s+value="/store\?country=(\w+)\s*".*?>([\w\s]+)</option>', 'http://www.donnakaran.com/store?country=%s' ] countries = get_district(url, opt) stores = [] for c in countries: # if c['code']!='us': # continue url = 'http://www.donnakaran.com/store/formpartial?' + urllib.urlencode( {'country': c['code']}) print('Fetching for %s...' % c['name']) if c['code'].__eq__('us'): col = fetch_stores(url, 1, { 'country_name': c['name'], 'code': c['code'] }) if col is not None: stores.extend(col) else: col = fetch_stores(url, 2, { 'country_name': c['name'], 'code': c['code'], 'region': 0 }) if col is not None: stores.extend(col) return stores elif type == 1: # 获得洲列表 opt = [ 'param param-region', r'<option value="/store/list\?country=us®ion=(\d+).*?"\s*>([\w\s]+)</option>' ] states = get_district(url, opt) stores = [] for s in states: url = 'http://www.donnakaran.com/store/formpartial?' + \ urllib.urlencode({'country': 'us', 'region': s['code'], 'p': 1}) print('Fetching for %s...' % s['name']) d = dict(data) d['region'] = s['code'] d['province_name'] = s['name'].strip().upper() col = fetch_stores(url, 2, d) if col is not None: stores.extend(col) elif type == 2: # 获得城市列表 opt = [ 'param param-city', r'<option\s+value="([^\s]+)"\s*>([\w\s]+)</option>', 'http://www.donnakaran.com%s' ] cities = get_district(url, opt) stores = [] country_code = data['code'] region = data['region'] for c in cities: # country=ca®ion=0&city=burlington&zip=&brand=dkny&p=1&output=json url = 'http://www.donnakaran.com/store/listpartial?' + \ urllib.urlencode({'output': 'json', 'country': country_code, 'region': region, 'city': c['name'], 'zip': '', 'brand': __brand__, 'p': 1}) print('\tFetching for %s...' % c['name']) d = dict(data) d['city_name'] = c['name'] col = fetch_stores(url, 3, d) if col is not None: stores.extend(col) return stores elif type == 3: # 获得城市中的商店信息 if 'province_name' in data: gs.update_city_map(data['city_name'], data['country_name'], province_name=data['province_name']) else: gs.update_city_map(data['city_name'], data['country_name']) return get_stores(url, data)