예제 #1
0
def fetch_store_detail(s, data, isOfficial=False):
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])

    entry[cm.name_e] = cm.html2plain(s['name']).strip()
    entry[cm.country_e] = data['country']
    val = cm.html2plain(s['city']).strip().upper()
    entry[cm.city_e] = cm.extract_city(val if val and val != '' else data['city'])[0]
    entry[cm.addr_e] = cm.html2plain(s['address']).strip()
    entry[cm.email] = s['email'].strip()
    entry[cm.tel] = s['phone'].strip()
    entry[cm.fax] = s['fax'].strip()
    entry[cm.store_class] = 'Official Retailer' if isOfficial else 'Retailer'

    try:
        entry[cm.lat] = string.atof(s['lat']) if s['lat'] != '' else ''
    except (ValueError, KeyError, TypeError) as e:
        cm.dump('Error in fetching lat: %s' % str(e), log_name)
    try:
        entry[cm.lng] = string.atof(s['lng']) if s['lng'] != '' else ''
    except (ValueError, KeyError, TypeError) as e:
        cm.dump('Error in fetching lng: %s' % str(e), log_name)

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    return entry
예제 #2
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    store_list = []
    for m in re.finditer(ur'<item id="\d+">', body):
        sub = cm.extract_closure(body[m.start():], ur'<item\b', ur'</item>')[0]
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m1 = re.search(ur'<country>([^<>]+)</country>', sub)
        if m1 is not None:
            tmp = m1.group(1).split('/')
            for v in tmp:
                ret = gs.look_up(v.strip().upper(), 1)
                if ret is not None:
                    entry[cm.country_e] = ret['name_e']
                    break
        m1 = re.search(ur'<city>([^<>]+)</city>', sub)
        if m1 is not None:
            val = cm.reformat_addr(m1.group(1))
            if entry[cm.country_e] == 'UNITED STATES':
                tmp_list = tuple(tmp.strip() for tmp in cm.reformat_addr(val).strip(','))
                if len(tmp_list) == 2:
                    if re.search('[A-Z]{2}', tmp_list[1]):
                        entry[cm.province_e] = tmp_list[1]
            entry[cm.city_e] = cm.extract_city(m1.group(1))[0]
        m1 = re.search(ur'<brands>([^<>]+)</brands>', sub)
        if m1 is not None:
            tmp = m1.group(1).split('/')
            brand_list = []
            for v in tmp:
                if v.strip() != '':
                    brand_list.append(v)
            entry[cm.store_type] = ', '.join(brand_map[key] for key in brand_list)
        m1 = re.search(ur'<name>([^<>]+)</name>', sub)
        if m1 is not None:
            entry[cm.name_e] = m1.group(1).strip()
        m1 = re.search(ur'<address>([^<>]+)</address>', sub)
        if m1 is not None:
            entry[cm.addr_e] = cm.reformat_addr(m1.group(1))
        m1 = re.search(ur'<tel>([^<>]+)</tel>', sub)
        if m1 is not None:
            entry[cm.tel] = m1.group(1).strip()
        m1 = re.search(ur'sll=(-?\d+\.\d+),(-?\d+\.\d+)', sub)
        if m1 is not None:
            entry[cm.lat] = string.atof(m1.group(1))
            entry[cm.lng] = string.atof(m1.group(2))
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None:
            entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
예제 #3
0
파일: oasis.py 프로젝트: haizi-zh/firenze
def fetch_store_details(data):
    # http://maps.oasis-stores.com/index-v2.php?coutnryISO=GB&brand=oasis&lat=51.42014&lng=-0.20954
    url = data['store_url']
    code = data['country_code']
    city = data['city_e']

    try:
        html = cm.get_data(url, {
            'latitude': data['lat'],
            'longitude': data['lng'],
            'brand': 'oasis'
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    raw = json.loads(html)
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                data['brandname_c'])
    entry[cm.name_e] = raw['name']
    addr_list = []
    for i in xrange(1, 4):
        tmp = cm.html2plain(raw['address%d' % i]).strip()
        if tmp != '':
            addr_list.append(tmp)
    entry[cm.addr_e] = ', '.join(addr_list)
    state = raw['countryRegion']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    state = raw['state']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    state = raw['county']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    entry[cm.zip_code] = raw['postcode']
    entry[cm.country_e] = data['country_e']
    entry[cm.city_e] = cm.extract_city(data['city_e'])[0]
    entry[cm.lat] = string.atof(data['lat'])
    entry[cm.lng] = string.atof(data['lng'])
    entry[cm.tel] = raw['phone']
    entry[cm.email] = raw['email']
    tmp = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
    entry[cm.hours] = ', '.join([raw[d + '_open_times'] for d in tmp])
    gs.field_sense(entry)
    print '(%s / %d) Found store: %s, %s (%s, %s)' % (
        data['brandname_e'], data['brand_id'], entry[cm.name_e],
        entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
    db.insert_record(entry, 'stores')

    return [entry]
예제 #4
0
파일: kipling.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['store_url']
    try:
        body = cm.get_data(url, {
            'country': data['country'],
            'city': data['city']
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []

    for item in raw['items']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['country'].strip().upper()
        tmp = cm.extract_city(data['city'])[0]
        if entry[cm.country_e] == 'USA':
            entry[cm.province_e] = tmp
        else:
            entry[cm.city_e] = tmp
        gs.field_sense(entry)

        addr = cm.reformat_addr(item['address'].replace(u'\\', ''))
        addr_list = [tmp.strip() for tmp in addr.split(',')]
        tel = cm.extract_tel(addr_list[-1])
        if tel != '':
            entry[cm.tel] = tel
            del addr_list[-1]
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.store_type] = item['shop_type']

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #5
0
파일: dunhill.py 프로젝트: haizi-zh/firenze
    def get_detailed_store(html, store_cat):
        store_list = []
        start = 0
        while True:
            sub_html, start, end = common.extract_closure(html, ur"<li\b", ur"</li>")
            if end == 0:
                break

            # 得到单个门店的页面代码
            html = html[end:]
            entry = common.init_store_entry(brand_id, brandname_e, brandname_c)

            m = re.findall(ur'<div class="store-title -h3a">(.+?)</div>', sub_html)
            if len(m) > 0:
                entry[common.name_e] = common.reformat_addr(m[0])
            m = re.findall(ur'<div class="store-address">(.+?)</div>', sub_html, re.S)
            if len(m) > 0:
                addr = common.reformat_addr(m[0])
                # 最后一行是否为电话号码?
                terms = addr.split(", ")
                tel = common.extract_tel(terms[-1])
                if tel != "":
                    addr = ", ".join(terms[:-1])
                    entry[common.tel] = tel
                entry[common.addr_e] = addr

            # 获得门店类型
            # store_type = [store_cat]
            type_html, type_start, type_end = common.extract_closure(sub_html, ur'<ul class="service-list">', ur"</ul>")
            if type_end != 0:
                store_type = [m for m in re.findall(ur'<li class="service-item">(.+?)</li>', type_html)]
                store_type.insert(0, store_cat)
                entry[common.store_type] = ", ".join(store_type)
            else:
                entry[common.store_type] = store_cat

            # 获得经纬度
            m = re.findall(ur'data-latitude="(-?\d+\.\d+)"', sub_html)
            if len(m) > 0:
                entry[common.lat] = string.atof(m[0])
            m = re.findall(ur'data-longitude="(-?\d+\.\d+)"', sub_html)
            if len(m) > 0:
                entry[common.lng] = string.atof(m[0])

            entry[common.city_e] = common.extract_city(data[common.city_e])[0]
            entry[common.country_e] = common.reformat_addr(data[common.country_e]).strip().upper()
            gs.field_sense(entry)

            print "%s: Found store: %s, %s (%s, %s, %s)" % (
                brandname_e,
                entry[common.name_e],
                entry[common.addr_e],
                entry[common.city_e],
                entry[common.country_e],
                entry[common.continent_e],
            )
            db.insert_record(entry, "stores")
            store_list.append(entry)

        return store_list
예제 #6
0
파일: hublot.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url, {'rsp': 'json', 'country': data['country_code']})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []
    for s in raw['stores']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = cm.html2plain(s['name']).strip()

        addr_list = []
        for key in ['address1', 'address2']:
            if s[key].strip() != '':
                addr_list.append(cm.reformat_addr(s[key]))
        entry[cm.addr_e] = ' '.join(addr_list)

        # r=s['region'].strip().upper()
        # m = re.search(ur'\b([A-Z]{2})\b', r)
        # if data[cm.country_e]=='UNITED STATES' and m is not None:
        #     # 美国
        #     ret = gs.look_up(m.group(1), 2)
        #     if ret is not None:
        #         r = ret['name_e']
        # entry[cm.province_e] = r

        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.zip_code] = s['zip'].strip()
        entry[cm.country_e] = data[cm.country_e]
        entry[cm.lat] = string.atof(s['lat'])
        entry[cm.lng] = string.atof(s['lng'])
        entry[cm.tel] = s['phone'].strip()
        entry[cm.fax] = s['fax'].strip()
        entry[cm.email] = s['emailaddress'].strip()
        entry[cm.url] = s['website'].strip()

        days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
        opening = []
        if 'openingHours' in s and s['openingHours'] is not None:
            for m in re.finditer(ur'i:(\d);s:\d+:\\?"([^\\"]+?)\\?"', s['openingHours']):
                opening.append('%s: %s' % (days[string.atoi(m.group(1))], m.group(2).strip()))
            entry[cm.hours] = ', '.join(opening)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                              entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e],
                                                              entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
예제 #7
0
파일: liujo.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    param = {'action': 'getStoresFromAjax', 'country': data['country_code'],
             'region': data['city'], 'collection': ''}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="shop-type-container">', body):
        sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0]
        store_class = ''
        m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S)
        if m2 is not None:
            store_class = cm.reformat_addr(m2.group(1))

        for m2 in re.finditer(ur'<div class="shop"', sub):
            store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b', ur'</div>')[0]
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.store_class] = store_class
            entry[cm.country_e] = data['country_code']
            entry[cm.city_e] = cm.extract_city(data['city'])[0]

            m3 = re.search(ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)', store_sub)
            if m3 is not None:
                data['store_id'] = string.atoi(m3.group(1))
                entry[cm.lat] = string.atof(m3.group(2))
                entry[cm.lng] = string.atof(m3.group(3))
                entry[cm.store_type] = ', '.join(get_detail(data))

            m3 = re.search(ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>', store_sub)
            if m3 is not None:
                entry[cm.name_e] = m3.group(1).strip()
            addr_list = []
            m3 = re.search(ur'<div class="shop-street shop-details">([^<>]+)</div>', store_sub)
            if m3 is not None:
                addr_list.append(cm.reformat_addr(m3.group(1)))
            m3 = re.search(ur'<div class="shop-city shop-details">([^<>]+)</div>', store_sub)
            if m3 is not None:
                tmp = cm.reformat_addr(m3.group(1))
                m3 = re.search(ur'(\d{4,})', tmp)
                if m3 is not None:
                    entry[cm.zip_code] = m3.group(1).strip()
                addr_list.append(tmp)
            entry[cm.addr_e] = ', '.join(addr_list)

            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
            cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                    entry[cm.name_e], entry[cm.addr_e],
                                                                    entry[cm.city_e],
                                                                    entry[cm.country_e],
                                                                    entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
예제 #8
0
def fetch_stores(data):
    url = data['url']
    param = {'country_id': data['country_code'], 'city': '', 'label_id': '', 'lang': 'en'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    start = body.find(ur'<stores>')
    if start == -1:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
    body = cm.extract_closure(body[start:], ur'<stores>', ur'</stores>')[0]

    store_list=[]
    for m in re.findall(ur'<store\b[^<>]+>(.+?)</store>', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.country_e] = data['country_code']
        m1 = re.search(ur'<name>(.+?)</name>', m)
        if m1 is not None:
            entry[cm.name_e] = cm.reformat_addr(m1.group(1).strip())
        m1 = re.search(ur'<address>(.+?)</address>', m)
        if m1 is not None:
            entry[cm.addr_e] = cm.reformat_addr(m1.group(1).strip())
        m1 = re.search(ur'<city>(.+)</city>', m)
        if m1 is not None:
            entry[cm.city_e] = cm.extract_city(m1.group(1))[0]
        m1 = re.search(ur'<zip>(.+?)</zip>', m)
        if m1 is not None:
            entry[cm.zip_code] = m1.group(1).strip()
        m1 = re.search(ur'<tel>(.+?)</tel>', m)
        if m1 is not None:
            entry[cm.tel] = m1.group(1).strip()
        m1 = re.search(ur'<fax>(.+?)</fax>', m)
        if m1 is not None:
            entry[cm.fax] = m1.group(1).strip()
        m1 = re.search(ur'<email>(.+?)</email>', m)
        if m1 is not None:
            entry[cm.email] = m1.group(1).strip()
        m1 = re.search(ur'<link>(.+?)</link>', m)
        if m1 is not None:
            entry[cm.url] = m1.group(1).strip()

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
예제 #9
0
def fetch_stores(data):
    url = data["host"] + data["country_url"] % data["country_id"]
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump("Error in fetching countries: %s" % url, log_name)
        return []

    raw = json.loads(body)["rawPos"]
    store_list = []
    for s in raw:
        entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"])
        addr_list = []
        for tmp2 in [cm.html2plain(s[tmp1]).strip() for tmp1 in ["address%d" % v for v in xrange(1, 5)]]:
            if tmp2 != "":
                addr_list.append(tmp2)
        entry[cm.addr_e] = ", ".join(addr_list)
        entry[cm.city_e] = cm.extract_city(s["city"]["name"])[0]
        entry[cm.country_e] = s["country"]["countryCode"]
        entry[cm.email] = s["email"]
        entry[cm.fax] = s["fax"]
        if s["latitude"] != "":
            entry[cm.lat] = string.atof(s["latitude"])
        if s["longitude"] != "":
            entry[cm.lng] = string.atof(s["longitude"])
        entry[cm.hours] = cm.reformat_addr(s["openingSchedule"])
        phone_list = []
        for key in ["phone1", "phone2"]:
            if s[key].strip() != "":
                phone_list.append(s[key].strip())
        entry[cm.tel] = ", ".join(phone_list)
        entry[cm.zip_code] = s["postalCode"]
        entry[cm.name_e] = s["shopName"]
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == "":
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == "":
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        cm.dump(
            "(%s / %d) Found store: %s, %s (%s, %s)"
            % (
                data["brandname_e"],
                data["brand_id"],
                entry[cm.name_e],
                entry[cm.addr_e],
                entry[cm.country_e],
                entry[cm.continent_e],
            ),
            log_name,
        )
        db.insert_record(entry, "stores")
        store_list.append(entry)

    return store_list
예제 #10
0
def fetch_uk(body, data):
    start = body.find(u'<div class="fableft">')
    if start == -1:
        print "Error in finding %s stores" % data["name"]
        return []
    body, start, end = cm.extract_closure(body[start:], ur"<div\b", ur"</div>")
    if end == 0:
        print "Error in finding %s stores" % data["name"]
        return []

    store_list = []
    for m in re.findall(ur"<div>\s*(.+?)\s*</div>", body, re.S):
        entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"])
        entry[cm.country_e] = data["name"]

        addr_list = re.findall(ur"<p>\s*(.+?)\s*</p>", m)
        tel = cm.extract_tel(addr_list[-1])
        if tel != "":
            entry[cm.tel] = tel
            del addr_list[-1]

        if data["name"] == "AUSTRALIA":
            country, province, city = gs.addr_sense(", ".join(addr_list), data["name"])
            if city is not None:
                entry[cm.city_e] = city
            if province is not None:
                entry[cm.province_e] = province
        else:
            city = addr_list[-2].strip().upper()
            entry[cm.city_e] = city
            ret = gs.look_up(city, 3)
            if ret is not None and ret["country"]["name_e"] == gs.look_up("UK", 1)["name_e"]:
                entry[cm.city_e] = ret["name_e"]
            entry[cm.zip_code] = addr_list[-1].strip().upper()
        entry[cm.addr_e] = ", ".join(addr_list)
        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == "":
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == "":
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print "(%s / %d) Found store: %s, %s (%s, %s, %s)" % (
            data["brandname_e"],
            data["brand_id"],
            entry[cm.name_e],
            entry[cm.addr_e],
            entry[cm.city_e],
            entry[cm.country_e],
            entry[cm.continent_e],
        )

        db.insert_record(entry, "stores")
        store_list.append(entry)
예제 #11
0
    def get_detailed_store(html, store_cat):
        store_list = []
        start = 0
        while True:
            sub_html, start, end = common.extract_closure(html, ur'<li\b', ur'</li>')
            if end == 0:
                break

            # 得到单个门店的页面代码
            html = html[end:]
            entry = common.init_store_entry(brand_id, brandname_e, brandname_c)

            m = re.findall(ur'<div class="store-title -h3a">(.+?)</div>', sub_html)
            if len(m) > 0:
                entry[common.name_e] = common.reformat_addr(m[0])
            m = re.findall(ur'<div class="store-address">(.+?)</div>', sub_html, re.S)
            if len(m) > 0:
                addr = common.reformat_addr(m[0])
                # 最后一行是否为电话号码?
                terms = addr.split(', ')
                tel = common.extract_tel(terms[-1])
                if tel != '':
                    addr = ', '.join(terms[:-1])
                    entry[common.tel] = tel
                entry[common.addr_e] = addr

            # 获得门店类型
            # store_type = [store_cat]
            type_html, type_start, type_end = common.extract_closure(sub_html, ur'<ul class="service-list">',
                                                                     ur'</ul>')
            if type_end != 0:
                store_type = [m for m in re.findall(ur'<li class="service-item">(.+?)</li>',
                                                    type_html)]
                store_type.insert(0, store_cat)
                entry[common.store_type] = ', '.join(store_type)
            else:
                entry[common.store_type] = store_cat

            # 获得经纬度
            m = re.findall(ur'data-latitude="(-?\d+\.\d+)"', sub_html)
            if len(m) > 0:
                entry[common.lat] = string.atof(m[0])
            m = re.findall(ur'data-longitude="(-?\d+\.\d+)"', sub_html)
            if len(m) > 0:
                entry[common.lng] = string.atof(m[0])

            entry[common.city_e] = common.extract_city(data[common.city_e])[0]
            entry[common.country_e] = common.reformat_addr(data[common.country_e]).strip().upper()
            gs.field_sense(entry)

            print '%s: Found store: %s, %s (%s, %s, %s)' % (
                brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.city_e], entry[common.country_e],
                entry[common.continent_e])
            db.insert_record(entry, 'stores')
            store_list.append(entry)

        return store_list
예제 #12
0
파일: ysl.py 프로젝트: haizi-zh/firenze
    def f(m):
        store_name = m[0].strip()
        addr_str = m[1].strip()

        spl = addr_str.split('<br/>')
        store_type = cm.html2plain(spl[0].strip())

        store_addr = spl[1].strip()
        hour_idx = 2
        store_tel = ''
        for i in xrange(2, len(spl)):
            # If this is not a phone number:
            tel = cm.extract_tel(spl[i])
            if tel == '':
                store_addr += ', ' + spl[i]
                hour_idx = i + 1
            else:
                store_tel = spl[i].strip()
                hour_idx = i + 1
                break

        if hour_idx < len(spl):
            store_hour = cm.html2plain(', '.join(spl[hour_idx:])).strip()
        else:
            store_hour = ''

        # store_addr = cm.reformat_addr('\r\n'.join([val.strip() for val in spl[1:-3]]))
        store_addr = cm.reformat_addr(store_addr)

        store_entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
        cm.update_entry(store_entry,
                        {cm.continent_e: opt[cm.continent_e].strip().upper(), cm.city_e: opt[cm.city_e].strip().upper(),
                         cm.country_e: opt[cm.country_e].strip().upper(),
                         cm.name_e: cm.name_e, cm.addr_e: store_addr, cm.store_type: store_type, cm.hours: store_hour,
                         cm.tel: store_tel})
        if opt.has_key(cm.province_e):
            store_entry[cm.province_e] = opt[cm.province_e]
        else:
            store_entry[cm.province_e] = ''
        store_entry[cm.city_e] = cm.extract_city(store_entry[cm.city_e])[0]

        gs.field_sense(store_entry)
        ret = gs.addr_sense(store_entry[cm.addr_e], store_entry[cm.country_e])
        if ret[1] is not None and store_entry[cm.province_e] == '':
            store_entry[cm.province_e] = ret[1]
        if ret[2] is not None and store_entry[cm.city_e] == '':
            store_entry[cm.city_e] = ret[2]
        gs.field_sense(store_entry)

        print '%s Found store: %s, %s (%s, %s)' % (
            brandname_e, store_entry[cm.name_e], store_entry[cm.addr_e], store_entry[cm.country_e],
            store_entry[cm.continent_e])
        db.insert_record(store_entry, 'stores')

        return store_entry
예제 #13
0
파일: y3.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    """
    获得门店的详细信息
    :rtype : [entries]
    :param data:
    """
    try:
        html = cm.get_data(data['url'])
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id}
        cm.dump(dump_data)
        return []

    entries = []
    start = html.find(u'<ul class="store-list">')
    if start == -1:
        return entries
    start += len(u'<ul class="store-list">')
    end = html.find(u'</ul>', start)
    html = html[start:end]

    for m1 in re.findall(ur'<li class="(.*?)">(.*?)</li>', html, re.S):
        store = cm.init_store_entry(brand_id, brandname_e, brandname_c)
        store[cm.store_type] = m1[0]
        sub_html = m1[1]
        m2 = re.findall(ur'<h3 class="store-name">(.*?)</h3>', sub_html)
        if len(m2) > 0:
            store[cm.name_e] = cm.reformat_addr(m2[0])
        m2 = re.findall(ur'<p class="store-address">(.*?)</p>', sub_html, re.S)
        if len(m2) > 0:
            store[cm.addr_e] = cm.reformat_addr(m2[0])

        cm.update_entry(store, {cm.continent_e: data[cm.continent_e].strip().upper(),
                                cm.country_e: data[cm.country_e].strip().upper(),
                                cm.city_e: data[cm.city_e].strip().upper()})

        entry = store
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        print '%s: Found store: %s, %s (%s, %s)' % (
            brandname_e, store[cm.name_e], store[cm.addr_e], store[cm.country_e],
            store[cm.continent_e])
        db.insert_record(store, 'stores')
        entries.append(store)
예제 #14
0
파일: lacoste.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['url']
    param = {
        'action': 'getStoresByCity',
        'idCity': data['city_id'],
        'filter': 'clothing;lacoste%20l!ve'
    }
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    raw = json.loads(body)['root']['DATA']['stores']
    store_list = []
    for s in [tmp['store'] for tmp in raw]:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.name_e] = s['name'].strip()
        entry[cm.country_e] = data['country_code']
        entry[cm.addr_e] = cm.html2plain(s['address']).strip()
        entry[cm.store_type] = s['category'].strip()
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        if s['email'] is not None:
            entry[cm.email] = s['email'].strip()
        if s['fax'] is not None:
            entry[cm.fax] = s['fax'].strip()
        if s['infoHours'] is not None:
            entry[cm.hours] = s['infoHours'].strip()
        if s['latitude'] is not None and s['latitude'].strip() != '':
            entry[cm.lat] = string.atof(s['latitude'])
        if s['longitude'] is not None and s['longitude'].strip() != '':
            entry[cm.lat] = string.atof(s['longitude'])
        if s['phone'] is not None:
            entry[cm.tel] = s['phone'].strip()
        if s['postCode'] is not None:
            entry[cm.zip_code] = s['postCode'].strip()

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
    return store_list
예제 #15
0
파일: unode50.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = '%s/en/shops/%s.json' % (
        data['host'], urllib.quote(data['country_id'].encode('utf-8')))
    try:
        body = cm.get_data(url)
        raw = json.loads(body)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, 'unode50_log.txt')
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_items = []
    for s in raw['distributors']:
        s['store_class'] = 'distributor'
        store_items.append(s)
    for s in raw['shops']:
        s['store_class'] = 'shop'
        store_items.append(s)

    store_list = []
    for s in store_items:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.store_class] = s['store_class']
        entry[cm.country_e] = data['country']
        entry[cm.name_e] = s['title']
        if s['lat'] is not None:
            entry[cm.lat] = string.atof(str(s['lat']))
        if s['lng'] is not None:
            entry[cm.lng] = string.atof(str(s['lng']))
        entry[cm.addr_e] = s['address']
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.tel] = s['phone']
        entry[cm.zip_code] = s['postal_code']
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
             entry[cm.continent_e]), 'unode50_log.txt')
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #16
0
def fetch_uk(body, data):
    start = body.find(u'<div class="fableft">')
    if start == -1:
        print 'Error in finding %s stores' % data['name']
        return []
    body, start, end = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')
    if end == 0:
        print 'Error in finding %s stores' % data['name']
        return []

    store_list = []
    for m in re.findall(ur'<div>\s*(.+?)\s*</div>', body, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.country_e] = data['name']

        addr_list = re.findall(ur'<p>\s*(.+?)\s*</p>', m)
        tel = cm.extract_tel(addr_list[-1])
        if tel != '':
            entry[cm.tel] = tel
            del addr_list[-1]

        if data['name'] == 'AUSTRALIA':
            country, province, city = gs.addr_sense(', '.join(addr_list), data['name'])
            if city is not None:
                entry[cm.city_e] = city
            if province is not None:
                entry[cm.province_e] = province
        else:
            city = addr_list[-2].strip().upper()
            entry[cm.city_e] = city
            ret = gs.look_up(city, 3)
            if ret is not None and ret['country']['name_e'] == gs.look_up('UK', 1)['name_e']:
                entry[cm.city_e] = ret['name_e']
            entry[cm.zip_code] = addr_list[-1].strip().upper()
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                              entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e],
                                                              entry[cm.country_e], entry[cm.continent_e])

        db.insert_record(entry, 'stores')
        store_list.append(entry)
예제 #17
0
def fetch_stores(data):
    url = data['store_url']
    param = {'myid': data['key'], 'idioma': 'in'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for s in json.loads(body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['country']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        entry[cm.name_e] = cm.reformat_addr(s['title'])

        m = re.search(ur'(.+?)-\s*<', s['key'])
        addr_list = [entry[cm.name_e]]
        if m is not None:
            m1 = re.search(ur'-+', m.group(1))
            if m1 is not None:
                tmp = [m.group(1)[:m1.start()], m.group(1)[m1.end():]]
            else:
                tmp = [m.group(1)]
            if len(tmp) > 1:
                entry[cm.tel] = cm.extract_tel(tmp[1])
            m1 = re.search(ur'\d{4,}', tmp[0])
            if m1 is not None:
                entry[cm.zip_code] = m1.group()
            addr_list.append(tmp[0].strip())
        entry[cm.addr_e] = ', '.join(addr_list)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #18
0
파일: dkny.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['data_url']
    param = {'output': 'json', 'country': data['country_code'], 'brand': 'dkny'}
    page = 0
    tot_page = -1
    store_list = []
    while True:
        page += 1
        if tot_page != -1 and page > tot_page:
            break
        param['p'] = page
        try:
            body = cm.get_data(url, param)
        except Exception, e:
            cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
            return ()

        raw = json.loads(body)
        tot_page = raw['Stores']['TotalPages']
        if data['country_code'] not in region_map:
            # 构造州列表
            region_map[data['country_code']] = dict((item['RegionId'], item['Name']) for item in raw['Regions'])

        for s in raw['Stores']['Items']:
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.country_e] = data['country_code'].upper()
            entry[cm.city_e] = cm.extract_city(s['City'])[0]
            entry[cm.name_e] = cm.html2plain(s['Name']).strip()
            entry[cm.addr_e] = cm.reformat_addr(s['Address'])
            entry[cm.tel] = s['Phone'].strip() if s['Phone'] else ''
            entry[cm.fax] = s['Fax'].strip() if s['Fax'] else ''
            entry[cm.email] = s['Email'].strip() if s['Email'] else ''
            entry[cm.lat] = s['Latitude'] if s['Latitude'] else ''
            entry[cm.lng] = s['Longitude'] if s['Longitude'] else ''
            region_id = s['RegionId']
            if region_id in region_map[data['country_code']]:
                entry[cm.province_e] = cm.html2plain(region_map[data['country_code']][region_id]).strip().upper()

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)

            cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                                entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
예제 #19
0
def fetch_stores(data):
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
    code = data['country_code']
    if gs.look_up(code, 1) is None:
        entry[cm.country_e] = cm.html2plain(data['country']).strip().upper()
    else:
        entry[cm.country_e] = code
    entry[cm.name_e] = data['store_name']
    entry[cm.city_e] = cm.extract_city(data['city'])[0]
    entry[cm.lat] = data['lat'] if data['lat'] is not None else ''
    entry[cm.lng] = data['lng'] if data['lng'] is not None else ''

    m = re.search(ur'data-boutique\s*=\s*"%s"' % data['store_id'], data['content'])
    sub = data['content'][m.end():]

    m1 = re.search(ur'<li class="isDistributeur[^<>]+>(.+?)</li>', sub)
    if m1 is not None:
        entry[cm.store_class] = cm.reformat_addr(m1.group(1))

    m1 = re.search(ur'<li class="place-title[^<>]+>(.+?)</li>', sub, re.S)
    if m1 is not None:
        entry[cm.addr_e] = cm.reformat_addr(m1.group(1))

    m1 = re.search(ur'<li class="contacts[^<>]+>(.+?)</li>', sub, re.S)
    if m1 is not None:
        m2 = re.search(ur'<a class="popupLaunch" href="([^"]+)"', m1.group(1))
        if m2:
            entry = fetch_details(data, m2.group(1), entry)

        m2 = re.search(ur'<p>(.+?)</p>', m1.group(1), re.S)
        if m2:
            ct_list = tuple(tmp.strip() for tmp in cm.reformat_addr(m2.group(1)).split(','))
            entry[cm.tel] = cm.extract_tel(ct_list[0])
            if len(ct_list) > 1:
                entry[cm.email] = ct_list[1].strip()

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                        entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                        entry[cm.continent_e]), log_name)
    db.insert_record(entry, 'stores')

    return tuple(entry)
예제 #20
0
파일: kipling.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['store_url']
    try:
        body = cm.get_data(url, {'country': data['country'], 'city': data['city']})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []

    for item in raw['items']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.country_e] = data['country'].strip().upper()
        tmp = cm.extract_city(data['city'])[0]
        if entry[cm.country_e] == 'USA':
            entry[cm.province_e] = tmp
        else:
            entry[cm.city_e] = tmp
        gs.field_sense(entry)

        addr = cm.reformat_addr(item['address'].replace(u'\\', ''))
        addr_list = [tmp.strip() for tmp in addr.split(',')]
        tel = cm.extract_tel(addr_list[-1])
        if tel !='':
            entry[cm.tel]=tel
            del addr_list[-1]
        entry[cm.addr_e]=', '.join(addr_list)
        entry[cm.store_type] = item['shop_type']

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                          entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                          entry[cm.continent_e])
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #21
0
def fetch_details(data):
    url = data[cm.url]
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
    entry[cm.name_e] = data[cm.name_e]
    start = html.find(ur'<div class="field-address">')
    if start == -1:
        return []
    sub, start, end = cm.extract_closure(html[start:], ur'<div\b', ur'</div>')
    if end == 0:
        return []
    m1 = re.search(ur'<div  class="locality">(.+?)</div>', sub)
    if m1 is not None:
        entry[cm.city_e] = cm.extract_city(m1.group(1))[0]
    m1 = re.search(ur'<div  class="postal-code">(.+?)</div>', sub)
    if m1 is not None:
        entry[cm.zip_code] = m1.group(1).strip()
    entry[cm.country_e] = data[cm.country_e]
    pat = re.compile(ur'<[^<>]+?>', re.S)
    entry[cm.addr_e] = cm.reformat_addr(re.sub(pat, u'\r\n', sub))

    m1 = re.search(ur'<div class="field-telephone"><a href=".+?" class="tel">(.+?)</a></div>', html)
    if m1 is not None:
        entry[cm.tel] = m1.group(1).strip()

    m1 = re.search(ur'<div class="field-opening-hours">\s*<p>(.+?)</p>\s*</div>', html, re.S)
    if m1 is not None:
        entry[cm.hours] = cm.reformat_addr(m1.group(1))

    m1 = re.search(ur'"coordinates":\[(-?\d+\.\d{4,})\s*,\s*(-?\d+\.\d{4,})\]', html)
    if m1 is not None:
        lat = string.atof(m1.group(1))
        lng = string.atof(m1.group(2))
        cm.update_entry(entry, {cm.lat: lat, cm.lng: lng})

    entry[cm.continent_e] = data[cm.continent_e]
    gs.field_sense(entry)
    print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                      entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                      entry[cm.continent_e])
    db.insert_record(entry, 'stores')
    return [entry]
예제 #22
0
def fetch_stores(data):
    url = data['host'] + data['country_url'] % data['country_id']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []

    raw = json.loads(body)['rawPos']
    store_list = []
    for s in raw:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        addr_list = []
        for tmp2 in [cm.html2plain(s[tmp1]).strip() for tmp1 in ['address%d' % v for v in xrange(1, 5)]]:
            if tmp2 != '':
                addr_list.append(tmp2)
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.city_e] = cm.extract_city(s['city']['name'])[0]
        entry[cm.country_e] = s['country']['countryCode']
        entry[cm.email] = s['email']
        entry[cm.fax] = s['fax']
        if s['latitude'] != '':
            entry[cm.lat] = string.atof(s['latitude'])
        if s['longitude'] != '':
            entry[cm.lng] = string.atof(s['longitude'])
        entry[cm.hours] = cm.reformat_addr(s['openingSchedule'])
        phone_list = []
        for key in ['phone1', 'phone2']:
            if s[key].strip() != '':
                phone_list.append(s[key].strip())
        entry[cm.tel] = ', '.join(phone_list)
        entry[cm.zip_code] = s['postalCode']
        entry[cm.name_e] = s['shopName']
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #23
0
파일: mango.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['store_url']
    param = {'myid': data['key'], 'idioma': 'in'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for s in json.loads(body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.country_e] = data['country']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        entry[cm.name_e] = cm.reformat_addr(s['title'])

        m = re.search(ur'(.+?)-\s*<', s['key'])
        addr_list = [entry[cm.name_e]]
        if m is not None:
            m1 = re.search(ur'-+', m.group(1))
            if m1 is not None:
                tmp = [m.group(1)[:m1.start()], m.group(1)[m1.end():]]
            else:
                tmp = [m.group(1)]
            if len(tmp) > 1:
                entry[cm.tel] = cm.extract_tel(tmp[1])
            m1 = re.search(ur'\d{4,}', tmp[0])
            if m1 is not None:
                entry[cm.zip_code] = m1.group()
            addr_list.append(tmp[0].strip())
        entry[cm.addr_e] = ', '.join(addr_list)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #24
0
파일: oasis.py 프로젝트: haizi-zh/firenze
def fetch_store_details(data):
    # http://maps.oasis-stores.com/index-v2.php?coutnryISO=GB&brand=oasis&lat=51.42014&lng=-0.20954
    url = data['store_url']
    code = data['country_code']
    city = data['city_e']

    try:
        html = cm.get_data(url, {'latitude': data['lat'], 'longitude': data['lng'], 'brand': 'oasis'})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    raw = json.loads(html)
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
    entry[cm.name_e] = raw['name']
    addr_list = []
    for i in xrange(1, 4):
        tmp = cm.html2plain(raw['address%d' % i]).strip()
        if tmp!='':
            addr_list.append(tmp)
    entry[cm.addr_e] = ', '.join(addr_list)
    state = raw['countryRegion']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    state = raw['state']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    state = raw['county']
    if state is not None and state.strip() != '':
        entry[cm.province_e] = state.strip().upper()
    entry[cm.zip_code] = raw['postcode']
    entry[cm.country_e] = data['country_e']
    entry[cm.city_e] = cm.extract_city(data['city_e'])[0]
    entry[cm.lat] = string.atof(data['lat'])
    entry[cm.lng] = string.atof(data['lng'])
    entry[cm.tel] = raw['phone']
    entry[cm.email] = raw['email']
    tmp = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
    entry[cm.hours] = ', '.join([raw[d + '_open_times'] for d in tmp])
    gs.field_sense(entry)
    print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                      entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                      entry[cm.continent_e])
    db.insert_record(entry, 'stores')

    return [entry]
예제 #25
0
def fetch_stores(data):
    url = data['home_url']
    try:
        body = cm.post_data(url, {'lz_sf': data['province'], 'lz_sx': data['city']})
    except Exception:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    start = body.find(u'搜索结果')
    if start == -1:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        return []

    body = body[start + 4:]

    store_list = []
    for m in re.findall(ur'</script>\s*(\S+)\s*</span>', body, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = m.strip()
        entry[cm.addr_e] = m.strip()
        entry[cm.city_c] = data['city']
        ret = gs.look_up(data['city'], 3)
        if ret is not None:
            entry[cm.city_e] = cm.extract_city(ret['name_e'])[0]
            if ret['province'] != '':
                entry[cm.province_e] = ret['province']['name_e']
        entry[cm.province_c] = data['province']
        ret = gs.look_up(data['province'], 2)
        if ret is not None:
            entry[cm.province_e] = ret['name_e']
        entry[cm.country_e] = u'CHINA'

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), 'benetton_log.txt', False)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
예제 #26
0
파일: cerruti.py 프로젝트: haizi-zh/firenze
def get_store_details(data):
    url = data['url']
    try:
        html = cm.post_data(url, {'country': data['country_id'], 'city': data['city_id'], 'recordid': data['store_id']})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id}
        cm.dump(dump_data)
        return []

    entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
    info = json.loads(html)['elements']
    addr = cm.reformat_addr(info['address'].replace('\\', '').replace('<p>', ',').replace('</p>', ','))
    # 第一行为商店名称
    terms = addr.split(',')
    if len(terms) > 0:
        entry[cm.name_e] = cm.reformat_addr(terms[0])
    entry[cm.addr_e] = addr

    gmap_url = info['gmap']
    m = re.findall(ur'(-?\d+\.\d+),(-?\d+\.\d+)', gmap_url)
    if len(m) > 0:
        cm.update_entry(entry, {cm.lat: string.atof(m[0][0]), cm.lng: string.atof(m[0][1])})

    entry[cm.url] = info['shareurl'].replace('\\', '')
    entry[cm.hours] = info['openingtimes']
    entry[cm.comments] = info['other']

    # Geo
    country = data['country']
    city = data['city']
    cm.update_entry(entry, {cm.country_e: country, cm.city_e: city})
    entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    print '(%s / %d) Found store: %s, %s (%s, %s)' % (
        brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
        entry[cm.continent_e])

    db.insert_record(entry, 'stores')
    return entry
예제 #27
0
파일: lacoste.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['url']
    param = {'action': 'getStoresByCity', 'idCity': data['city_id'],
             'filter': 'clothing;lacoste%20l!ve'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    raw = json.loads(body)['root']['DATA']['stores']
    store_list = []
    for s in [tmp['store'] for tmp in raw]:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = s['name'].strip()
        entry[cm.country_e] = data['country_code']
        entry[cm.addr_e] = cm.html2plain(s['address']).strip()
        entry[cm.store_type] = s['category'].strip()
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        if s['email'] is not None:
            entry[cm.email] = s['email'].strip()
        if s['fax'] is not None:
            entry[cm.fax] = s['fax'].strip()
        if s['infoHours'] is not None:
            entry[cm.hours] = s['infoHours'].strip()
        if s['latitude'] is not None and s['latitude'].strip() != '':
            entry[cm.lat] = string.atof(s['latitude'])
        if s['longitude'] is not None and s['longitude'].strip() != '':
            entry[cm.lat] = string.atof(s['longitude'])
        if s['phone'] is not None:
            entry[cm.tel] = s['phone'].strip()
        if s['postCode'] is not None:
            entry[cm.zip_code] = s['postCode'].strip()

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
    return store_list
예제 #28
0
파일: tudor.py 프로젝트: haizi-zh/firenze
def fetch_cities(data):
    url = data['location_url']
    try:
        body = cm.get_data(url, {'lang': data['lang'], 'country': data['country_id'], 'region': data['region_id']})
    except Exception:
        cm.dump('Error in fetching cities: %s, %s' % (url, data['region']), 'tudor_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    results = []
    for item in pq(body.encode('utf-8'))('city[id!=""]'):
        d = data.copy()
        d['city_id'] = string.atoi(item.attrib['id'])
        tmp = item.attrib['name']
        d['city_name'] = cm.extract_city(re.sub(ur'市$', '', re.sub(ur'省$', '', tmp).strip()).strip())[0]
        results.append(d)
    return results
예제 #29
0
def fetch_stores(data):
    # country=Greece&city=ATHENS&adutl=+01&kids=+02&undercolor=+06&togetmap=mapdata
    url = data['data_url']
    param = {'country': data['country'], 'city': data['city'], 'adutl': ' 01', 'kids': ' 02',
             'undercolor': ' 06', 'togetmap': 'mapdata'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), 'benetton_log.txt', False)
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.findall(ur'<marker (.+?)>', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m1 = re.search(ur'name=\\"(.+?)\\"', m)
        if m1 is not None:
            entry[cm.name_e] = cm.html2plain(m1.group(1).strip().replace(u'\\', ''))
        m1 = re.search(ur'address=\\"(.+?)\\"', m)
        if m1 is not None:
            addr = cm.reformat_addr(cm.html2plain(m1.group(1)).replace(u'\\', ''))
            tel = cm.extract_tel(addr)
            if tel != '':
                entry[cm.tel] = tel
                addr = addr.replace(tel, '')
            entry[cm.addr_e] = cm.reformat_addr(addr)

        m1 = re.search(ur'lat=\\"(.+?)\\"', m)
        if m1 is not None:
            entry[cm.lat] = string.atof(m1.group(1))

        m1 = re.search(ur'lng=\\"(.+?)\\"', m)
        if m1 is not None:
            entry[cm.lng] = string.atof(m1.group(1))

        entry[cm.country_e] = data['country'].strip().upper()
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), 'benetton_log.txt', False)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
예제 #30
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    m = re.search(ur'var\s+retailers\s*=\s*', body)
    if m is None:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    end = body.find(u']', m.end())
    if end == -1:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []
    pat = re.compile(ur'[\{,]([a-zA-Z_\d]+):')

    store_list = []
    for s in json.loads(
            re.sub(re.compile(ur'([\{,])([a-zA-Z_\d]+):'), ur'\1"\2":',
                   body[m.end():end + 1])):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        name_list = []
        for tmp in ['name', 'name_line_2']:
            if tmp in s and s[tmp] is not None and cm.html2plain(
                    s[tmp]).strip() != '':
                name_list.append(cm.html2plain(s[tmp]).strip())
        entry[cm.name_e] = ', '.join(name_list)
        addr_list = []
        for tmp in ['address', 'address_line_2']:
            if tmp in s and s[tmp] is not None and cm.html2plain(
                    s[tmp]).strip() != '':
                addr_list.append(cm.html2plain(s[tmp]).strip())
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.country_e] = s['country'].strip().upper()
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        region = cm.html2plain(s['region'])
        if re.search(
                ur'\d+',
                region) is None and '&' not in region and ';' not in region:
            entry[cm.province_e] = region.strip().upper()
예제 #31
0
파일: unode50.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = '%s/en/shops/%s.json' % (data['host'], urllib.quote(data['country_id'].encode('utf-8')))
    try:
        body = cm.get_data(url)
        raw = json.loads(body)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, 'unode50_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_items = []
    for s in raw['distributors']:
        s['store_class'] = 'distributor'
        store_items.append(s)
    for s in raw['shops']:
        s['store_class'] = 'shop'
        store_items.append(s)

    store_list = []
    for s in store_items:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.store_class] = s['store_class']
        entry[cm.country_e] = data['country']
        entry[cm.name_e] = s['title']
        if s['lat'] is not None:
            entry[cm.lat] = string.atof(str(s['lat']))
        if s['lng'] is not None:
            entry[cm.lng] = string.atof(str(s['lng']))
        entry[cm.addr_e] = s['address']
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.tel] = s['phone']
        entry[cm.zip_code] = s['postal_code']
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e],
                                                            entry[cm.country_e],
                                                            entry[cm.continent_e]), 'unode50_log.txt')
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
예제 #32
0
def fetch_stores(data):
    url = data['host'] + data['store_url']
    param = {'CC': data['country_code'], 'City': data['city']}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    # pat_tel = re.compile(ur'tel:\s*', re.I)
    # pat_fax = re.compile(ur'fax:\s*', re.I)
    # pat_email = re.compile(ur'email:\s*', re.I)

    pat_tel = re.compile(ur'tel:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S)
    pat_fax = re.compile(ur'fax:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S)
    pat_email = re.compile(ur'email:\s*(.+?)(?=(?:tel|fax|email|$))',
                           re.I | re.S)

    for m in re.finditer(ur'<div class="store-info">', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['country_code']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]

        sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0]
        m1 = re.search(ur'<h2 class="store-name[^"]*">(.+?)</h2>', sub)
        if m1 is not None:
            entry[cm.name_e] = cm.reformat_addr(m1.group(1))
            entry[cm.store_class] = entry[cm.name_e]
        m1 = re.search(ur'<dt class="address"', sub)
        if m1 is not None:
            tmp = cm.reformat_addr(
                cm.extract_closure(sub[m1.end():], ur'<dd>', ur'</dd>')[0])
            entry[cm.addr_e] = tmp
            if len(tmp) > 1:
                m1 = re.search(ur'[\d\-]{4,}', tmp.split(',')[-2])
                if m1 is not None and len(re.findall(ur'\d', m1.group())) >= 4:
                    entry[cm.zip_code] = m1.group().strip()
예제 #33
0
파일: iwc.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    m = re.search(ur'var\s+retailers\s*=\s*', body)
    if m is None:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    end = body.find(u']', m.end())
    if end == -1:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []
    pat = re.compile(ur'[\{,]([a-zA-Z_\d]+):')

    store_list = []
    for s in json.loads(re.sub(re.compile(ur'([\{,])([a-zA-Z_\d]+):'), ur'\1"\2":', body[m.end():end + 1])):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        name_list = []
        for tmp in ['name', 'name_line_2']:
            if tmp in s and s[tmp] is not None and cm.html2plain(s[tmp]).strip() != '':
                name_list.append(cm.html2plain(s[tmp]).strip())
        entry[cm.name_e] = ', '.join(name_list)
        addr_list = []
        for tmp in ['address', 'address_line_2']:
            if tmp in s and s[tmp] is not None and cm.html2plain(s[tmp]).strip() != '':
                addr_list.append(cm.html2plain(s[tmp]).strip())
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.country_e] = s['country'].strip().upper()
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        region = cm.html2plain(s['region'])
        if re.search(ur'\d+', region) is None and '&' not in region and ';' not in region:
            entry[cm.province_e] = region.strip().upper()
예제 #34
0
def fetch_stores(data):
    url = data['host'] + data['store_url']
    param = {'CC': data['country_code'], 'City': data['city']}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    # pat_tel = re.compile(ur'tel:\s*', re.I)
    # pat_fax = re.compile(ur'fax:\s*', re.I)
    # pat_email = re.compile(ur'email:\s*', re.I)

    pat_tel = re.compile(ur'tel:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S)
    pat_fax = re.compile(ur'fax:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S)
    pat_email = re.compile(ur'email:\s*(.+?)(?=(?:tel|fax|email|$))', re.I | re.S)

    for m in re.finditer(ur'<div class="store-info">', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.country_e] = data['country_code']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]

        sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0]
        m1 = re.search(ur'<h2 class="store-name[^"]*">(.+?)</h2>', sub)
        if m1 is not None:
            entry[cm.name_e] = cm.reformat_addr(m1.group(1))
            entry[cm.store_class] = entry[cm.name_e]
        m1 = re.search(ur'<dt class="address"', sub)
        if m1 is not None:
            tmp = cm.reformat_addr(cm.extract_closure(sub[m1.end():], ur'<dd>', ur'</dd>')[0])
            entry[cm.addr_e] = tmp
            if len(tmp) > 1:
                m1 = re.search(ur'[\d\-]{4,}', tmp.split(',')[-2])
                if m1 is not None and len(re.findall(ur'\d', m1.group())) >= 4:
                    entry[cm.zip_code] = m1.group().strip()
예제 #35
0
def fetch_stores(data):
    url = data['url']

    if 'page' in data:
        page = data['page']
    else:
        page = 1

    tot = -1
    tot_page = -1
    store_ids = set([])
    store_list = []
    page_size = 400
    f = open('err_log_%s.log' % data['brandname_e'], 'w')

    while True:
        msg = 'Fetching page %d...' % page
        print msg
        f.write('%s\n' % msg)
        try:
            # html = cm.get_data(url, {'brand': 'adidas', 'geoengine': 'google', 'method': 'get',
            #                          'category': 'store', 'latlng': '51.729663,5.310298,100',
            #                          'page': '%d' % page, 'pagesize': page_size,
            #                          'fields': 'name,street1,street2,addressline,buildingname,postal_code,city,'
            #                                    'state,store_owner,country,storetype,longitude_google,'
            #                                    'latitude_google,store_owner,state,performance,brand_store,'
            #                                    'factory_outlet,originals,neo_label,y3,slvr,children,woman,'
            #                                    'footwear,football,basketball,outdoor,porsche_design,miadidas,'
            #                                    'miteam,stella_mccartney,eyewear,micoach,opening_ceremony',
            #                          'format': 'json', 'storetype': ''})
            html = cm.get_data(
                url, {
                    'brand':
                    'adidas',
                    'geoengine':
                    'google',
                    'method':
                    'get',
                    'category':
                    'store',
                    'latlng':
                    '31.22434895,121.47675279999999, 10000',
                    'page':
                    '%d' % page,
                    'pagesize':
                    page_size,
                    'fields':
                    'name,street1,street2,addressline,buildingname,postal_code,city,'
                    'state,store_owner,country,storetype,longitude_google,'
                    'latitude_google,store_owner,state,performance,brand_store,'
                    'factory_outlet,originals,neo_label,y3,slvr,children,woman,'
                    'footwear,football,basketball,outdoor,porsche_design,miadidas,'
                    'miteam,stella_mccartney,eyewear,micoach,opening_ceremony',
                    'format':
                    'json',
                    'storetype':
                    ''
                })
        except Exception:
            msg = 'Error occured: %s' % url
            print msg
            f.write('%s\n' % msg)
            dump_data = {
                'level': 0,
                'time': cm.format_time(),
                'data': {
                    'url': url
                },
                'brand_id': data['brand_id']
            }
            cm.dump(dump_data)
            page += 1
            if 'page' in data:
                break
            else:
                if page > tot_page:
                    break
                else:
                    continue

        try:
            start = html.find('{')
            if start != -1:
                html = html[start:]
                # 去掉控制字符
            pat = re.compile(u'[\r\n]')
            html = re.sub(pat, ' ', html)
            pat = re.compile(ur'\\.')
            html = re.sub(pat, ' ', html)
            raw = json.loads(html)['wsResponse']
            if tot == -1:
                tot = string.atoi(raw['results'])
                tot_page = (tot - 1) / page_size + 1
            raw = raw['result']

            def addr_func(addr_list, addr_map, key):
                if key in addr_map:
                    addr_list.append(addr_map[key].strip())

            for s in raw:
                try:
                    entry = cm.init_store_entry(data['brand_id'],
                                                data['brandname_e'],
                                                data['brandname_c'])
                    if s['id'] in store_ids:
                        continue
                    store_ids.add(s['id'])
                    entry[cm.name_e] = s['name']

                    addr_list = []
                    map(lambda key: addr_func(addr_list, s, key),
                        ['addressline', 'buildingname', 'street1', 'street2'])
                    entry[cm.addr_e] = ', '.join(addr_list)

                    entry[cm.city_e] = cm.extract_city(s['city'])[0]
                    entry[cm.country_e] = s['country'].strip().upper()
                    if 'storetype' in s:
                        entry[cm.store_type] = s['storetype']
                    entry[cm.lat] = string.atof(s['latitude_google'])
                    entry[cm.lng] = string.atof(s['longitude_google'])
                    entry[cm.store_class] = 'adidas'

                    gs.field_sense(entry)
                    msg = '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (
                        data['brandname_e'], data['brand_id'],
                        entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e],
                        entry[cm.country_e], entry[cm.continent_e])
                    print msg
                    f.write('%s\n' % msg.encode('utf-8'))
                    store_list.append(entry)
                    db.insert_record(entry, 'stores')
                except Exception, e:
                    msg = 'Error processing. Reason: %s, content: %s' % (
                        str(e), s)
                    print msg
                    f.write('%s\n' % msg.encode('utf-8'))
        except Exception, e:
            msg = 'Error processing page %d, reason: %s' % (page, str(e))
            print msg
            f.write('%s\n' % msg)
        finally:
예제 #36
0
파일: bulgari.py 프로젝트: haizi-zh/firenze
    param = {'lang': 'EN_US', 'geo_id': data['city_id']}

    try:
        body = cm.get_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    raw = json.loads(body)
    if 'storeList' not in raw:
        return []
    for s in raw['storeList']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        entry[cm.province_e] = data['state'].upper()
        entry[cm.country_e] = data['country'].upper()
        entry[cm.store_class] = s['type']['name']
        entry[cm.store_type] = ', '.join(type_map[item['name']]
                                         for item in s['categories'])
        entry[cm.name_e] = s['name'].strip()

        loc = s['location']
        entry[cm.addr_e] = cm.reformat_addr(loc['address'])
        if 'phone' in loc and loc['phone'] is not None:
            entry[cm.tel] = loc['phone'].strip()
        if 'fax' in loc and loc['fax'] is not None:
            entry[cm.fax] = loc['fax'].strip()
        if 'postalCode' in loc and loc['postalCode'] is not None:
            entry[cm.zip_code] = loc['postalCode'].strip()
예제 #37
0
파일: nike.py 프로젝트: haizi-zh/firenze
        entry[cm.province_e] = cm.html2plain(val).strip().upper()
    val = s['description']
    if val:
        entry[cm.comments] = cm.html2plain(val).strip().upper()
    addr_list = []
    for key in ('street', 'street2'):
        if not s[key]:
            continue

        term = cm.reformat_addr(s[key])
        if term != '':
            addr_list.append(term)
    entry[cm.addr_e] = ', '.join(addr_list)
    entry[cm.store_type] = ', '.join(item['code'] for item in s['categories'])

    entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]
    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    if '???' not in entry[cm.addr_e] and '???' not in entry[cm.name_e]:
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        return entry
    else:
예제 #38
0
                             re.S):
            entry[common.hours] = common.reformat_addr(m1)
            break

        m1 = re.findall(ur'href="/(.+?)" title="View on map"', sub_html)
        if len(m1) > 0:
            entry[common.url] = host + '/' + m1[0]
            lat, lng = get_coordinates(entry[common.url])
            common.update_entry(entry, {common.lat: lat, common.lng: lng})

        # geo
        city_e = cities[city_id]['name'].strip()
        country_e = cities[city_id]['country']['name'].strip().upper()
        continent_e = cities[city_id]['country']['continent'].strip().upper()
        common.update_entry(entry,
                            {common.city_e: common.extract_city(city_e)[0], common.country_e: country_e, common.continent_e: continent_e})
        gs.field_sense(entry)

        # ret = common.geo_translate(country_e.strip())
        # if len(ret) > 0:
        #     common.update_entry(entry, {common.continent_c: ret[common.continent_c],
        #                                 common.continent_e: ret[common.continent_e],
        #                                 common.country_c: ret[common.country_c],
        #                                 common.country_e: ret[common.country_e]})
        # common.update_entry(entry, {common.brandname_c: brandname_c, common.brandname_e: brandname_e})
        # common.chn_check(entry)

        print '%s Found store: %s, %s (%s, %s)' % (
            brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.country_e],
            entry[common.continent_e])
        db.insert_record(entry, 'stores')
예제 #39
0
            try:
                entry[cm.lng] = string.atof(store.attrib['data-longitude'])
            except (ValueError, KeyError, TypeError) as e:
                cm.dump('Error in fetching lng: %s' % str(e), log_name)

            if entry[cm.lat] == 0 and entry[cm.lng] == 0:
                entry[cm.lat], entry[cm.lng] = '', ''

            item = pq(store)
            tmp = item('h1')
            entry[cm.name_e] = cm.html2plain(
                tmp[0].text).strip() if len(tmp) > 0 and tmp[0].text else ''

            tmp = item('dd.location')
            tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
            entry[cm.city_e] = cm.extract_city(tmp)[0]

            tmp = item('dd.street')
            tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
            entry[cm.addr_e] = cm.reformat_addr(tmp)

            tmp = item('dd.phone')
            tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
            entry[cm.tel] = tmp.strip()

            tmp = item('dd.hours')
            tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
            entry[cm.hours] = tmp.strip()

            tmp = item('dd.products')
            tmp = tmp[0].text if len(tmp) > 0 and tmp[0].text else ''
예제 #40
0
 city = ''
 country = ''
 for m1 in re.findall(
         ur'<span itemprop="streetAddress">(.*?)</span>', m, re.S):
     if len(m1.strip()) > 0:
         street_addr = cm.reformat_addr(m1)
     break
 for m1 in re.findall(ur'<span itemprop="postalCode">(.*?)</span>',
                      m):
     if len(m1.strip()) > 0:
         zip_code = m1
     break
 for m1 in re.findall(
         ur'<span itemprop="addressLocality">(.*?)</span>', m):
     if len(m1.strip()) > 0:
         city = cm.extract_city(m1)[0]
     break
 for m1 in re.findall(
         ur'<span itemprop="addressCountry">(.*?)</span>', m):
     if len(m1.strip()) > 0:
         country = m1
     break
 entry[cm.zip_code] = zip_code
 # 没有上述标签的情况
 if street_addr == '':
     tmp = cm.reformat_addr(m)
     terms = tmp.split(',')
     ret = gs.look_up(terms[-1], 1)
     if ret is not None:
         # t2 = cm.geo_translate(terms[-1])
         # if len(t2) != 0:
예제 #41
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(
            url, {
                'searchtype': 'normal',
                'reiter_selected': 'reiter1',
                'country_id': data['country_code'],
                'city_id': data['city']
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    while True:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        m = re.search(ur'<h4>\s*(.+?)\s*</h4>', body)
        if m is None:
            break
        entry[cm.store_class] = m.group(1)

        end = body.find(u'</div>', m.end())
        sub = body[m.end():end]
        body = body[end:]

        if ('Country' in m.group(1) and 'Language' in m.group(1)) \
            or 'href' in m.group(1) or 'products' in m.group(1):
            continue

        tmp = cm.reformat_addr(sub).split(',')
        addr_list = []
        for term in tmp:
            if u'Show on map' in term:
                continue
            elif u'電話' in term or u'Phone' in term:
                entry[cm.tel] = term.replace(u'電話', '').replace(u'Phone',
                                                                '').strip()
            else:
                addr_list.append(term)
        entry[cm.addr_e] = ', '.join(addr_list)

        m = re.search(re.compile(ur'<h4>(products|產品)</h4>', re.I), body)
        if m is not None:
            end = body.find(ur'</div>', m.end())
            entry[cm.store_type] = cm.reformat_addr(body[m.end():end])

        # tmp = re.compile(ur'<h4>products</h4>', re.I)
        # m = re.search(tmp, body[end:])
        # if mis
        # prodstart = body.find(, end)
        # if prodstart!=-1:
        #     prodstart += len(u'<h4>產品</h4>')
        #     prodend = body.find(u'</div>', prodstart)
        #     entry[cm.store_type] = cm.reformat_addr(body[prodstart:prodend])

        entry[cm.country_e] = data['country_code']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
예제 #42
0
        return ()

    store_list = []
    body = re.sub(ur'GetLocalLevisCallback\(', '', body)[:-1]
    for s in json.loads(body)['d']['results']:
        try:
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])

            uid = s['__metadata']['uri']
            if uid in store_map:
                cm.dump(u'%s already exists.' % uid, log_name)
                continue

            entry[cm.country_e] = cm.html2plain(s['CountryRegion']).strip().upper()
            entry[cm.native_id] = uid
            entry[cm.city_e] = cm.extract_city(s['Locality'])[0]
            entry[cm.addr_e] = cm.reformat_addr(s['AddressLine'])

            entry[cm.zip_code] = s['PostalCode']
            entry[cm.tel] = s['Phone']
            entry[cm.name_e] = cm.html2plain(s['BranchName']).strip() if s['BranchName'] else ''

            try:
                entry[cm.lat] = string.atof(s['Latitude']) if s['Latitude'] != '' else ''
            except (ValueError, KeyError, TypeError) as e:
                cm.dump('Error in fetching lat: %s' % str(e), log_name)
            try:
                entry[cm.lng] = string.atof(s['Longitude']) if s['Longitude'] != '' else ''
            except (ValueError, KeyError, TypeError) as e:
                cm.dump('Error in fetching lng: %s' % str(e), log_name)
예제 #43
0
    while True:
        entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
        sub, start, end = cm.extract_closure(html, ur'\{', ur'\}')
        if end == 0:
            break
        js = json.loads(sub)
        start = end
        html = html[start:]

        raw = js['StoreLocator']
        entry[cm.name_e] = raw['name']
        addr1 = raw['adress1']
        addr2 = raw['adress2']
        entry[cm.addr_e] = cm.reformat_addr(', '.join([addr1, addr2]))
        entry[cm.zip_code] = raw['postcode']
        entry[cm.city_e] = cm.extract_city(raw['city'])[0]
        entry[cm.province_e] = raw['region']
        entry[cm.tel] = raw['phone']
        entry[cm.fax] = raw['fax']
        entry[cm.email] = raw['email']
        entry[cm.hours] = cm.reformat_addr(raw['opening'])
        entry[cm.lat] = string.atof(raw['latitude'])
        entry[cm.lng] = string.atof(raw['longitude'])
        entry[cm.url] = raw['link'].replace('\\', '')

        raw = js['Country']
        entry[cm.country_e] = raw['name']
        raw = js['StoreLocatorType']
        entry[cm.store_type] = raw['name']
        gs.field_sense(entry)
예제 #44
0
    url = data['url']
    param = {'operation': 'coSearch', 'numResults': 999999, 'mnlt': -89, 'mxlt': 89, 'mnln': -179, 'mxln': 179,
             'token': 'LEVI', 'heavy': 'true'}
    try:
        body = cm.get_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()

    store_list = []
    for s in json.loads(body)['RESULTS']:
        s = s['store']
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])

        entry[cm.country_e] = s['countryCode']
        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.province_e] = s['stateCode']

        addr_list = []
        if 'address1' in s:
            val = s['address1']
            val = cm.html2plain(val).strip() if val else ''
            if val != '':
                addr_list.append(val)
        if 'address2' in s:
            val = s['address2']
            val = cm.html2plain(val).strip() if val else ''
            if val != '':
                addr_list.append(val)
        entry[cm.addr_e] = ', '.join(addr_list)
예제 #45
0
        try:
            try:
                entry[cm.lat] = string.atof(str(s['location'][0]))
                entry[cm.lng] = string.atof(str(s['location'][1]))
            except (KeyError, IndexError, ValueError, TypeError):
                pass

            s = s['content']
            try:
                entry[cm.name_e] = cm.html2plain(s['title']).strip()
            except (KeyError, TypeError):
                pass

            tmp_list = s['analytics_label'].split('-')
            entry[cm.country_e] = tmp_list[0]
            entry[cm.city_e] = cm.extract_city(tmp_list[1])[0]

            try:
                entry[cm.addr_e] = cm.reformat_addr(s['address']).strip()
            except (KeyError, TypeError):
                pass

            try:
                entry[cm.fax] = s['fax'].strip()
            except (KeyError, TypeError):
                pass
            try:
                entry[cm.tel] = s['phone'].strip()
            except (KeyError, TypeError):
                pass
            try:
예제 #46
0
            entry[common.hours] = common.reformat_addr(m1)
            break

        m1 = re.findall(ur'href="/(.+?)" title="View on map"', sub_html)
        if len(m1) > 0:
            entry[common.url] = host + '/' + m1[0]
            lat, lng = get_coordinates(entry[common.url])
            common.update_entry(entry, {common.lat: lat, common.lng: lng})

        # geo
        city_e = cities[city_id]['name'].strip()
        country_e = cities[city_id]['country']['name'].strip().upper()
        continent_e = cities[city_id]['country']['continent'].strip().upper()
        common.update_entry(
            entry, {
                common.city_e: common.extract_city(city_e)[0],
                common.country_e: country_e,
                common.continent_e: continent_e
            })
        gs.field_sense(entry)

        # ret = common.geo_translate(country_e.strip())
        # if len(ret) > 0:
        #     common.update_entry(entry, {common.continent_c: ret[common.continent_c],
        #                                 common.continent_e: ret[common.continent_e],
        #                                 common.country_c: ret[common.country_c],
        #                                 common.country_e: ret[common.country_e]})
        # common.update_entry(entry, {common.brandname_c: brandname_c, common.brandname_e: brandname_e})
        # common.chn_check(entry)

        print '%s Found store: %s, %s (%s, %s)' % (
예제 #47
0
파일: oris.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    print '(%s/%d) Found city: %s' % (data['brandname_e'], data['brand_id'],
                                      data['city_e'])
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    # 处理重定向
    m = re.search('<h2>Object moved to <a href="(.+?)">', html)
    if m is not None:
        data['url'] = data['host'] + m.group(1)
        return fetch_countries(data)

    m = re.search('var\s+data\s*=\s*', html)
    if m is None:
        return []
    sub, start, end = cm.extract_closure(html[m.end():], r'\[', r'\]')
    if end == 0:
        return []

    store_list = []
    for s in json.loads(sub):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        name = s['Name']
        if cm.is_chinese(name):
            entry[cm.name_c] = name
        else:
            entry[cm.name_e] = name
        entry[cm.addr_e] = cm.html2plain(s['Street'])
        entry[cm.city_e] = cm.extract_city(data['city_e'])[0]
        entry[cm.country_e] = data['country_e']
        entry[cm.province_e] = data['province_e']
        pat = re.compile(ur'tel[\.: ]*', re.I)
        entry[cm.tel] = re.sub(pat, '', s['Phone']).strip()
        pat = re.compile(ur'fax[\.: ]*', re.I)
        entry[cm.fax] = re.sub(pat, '', s['Fax']).strip()
        entry[cm.email] = s['Email'].strip()
        entry[cm.url] = s['Website'].strip()
        coord = s['LatLng']
        if coord is not None and len(coord) >= 2:
            if coord[0] is not None:
                entry[cm.lat] = string.atof(coord[0])
            if coord[1] is not None:
                entry[cm.lng] = string.atof(coord[1])

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s/%d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
예제 #48
0
        store_map[val] = entry

        val = store.getiterator("name")[0].text
        entry[cm.name_e] = cm.html2plain(val).strip() if val else ""

        addr_list = []
        for idx in xrange(1, 3):
            val = store.getiterator("address%d" % idx)[0].text
            if val:
                val = cm.reformat_addr(val)
                if val != "":
                    addr_list.append(val)
        entry[cm.addr_e] = ", ".join(addr_list)

        val = store.getiterator("city")[0].text
        entry[cm.city_e] = cm.extract_city(val)[0] if val else ""
        val = store.getiterator("province")[0].text
        entry[cm.province_e] = cm.html2plain(val).strip().upper() if val else ""
        if entry[cm.province_e] == "":
            val = store.getiterator("state")[0].text
            entry[cm.province_e] = cm.html2plain(val).strip().upper() if val else ""
        val = store.getiterator("country")[0].text
        entry[cm.country_e] = val.strip().upper() if val else ""

        val = store.getiterator("email")[0].text
        entry[cm.email] = val if val else ""
        val = store.getiterator("phone")[0].text
        entry[cm.tel] = val if val else ""
        val = store.getiterator("postalcode")[0].text
        entry[cm.zip_code] = val if val else ""
예제 #49
0
                                    data['brandname_c'])
        entry[cm.country_e] = data['country_code']
        sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0]

        m1 = re.search(ur'<div class="fn org">(.+?)</div>', sub, re.S)
        entry[cm.name_e] = cm.reformat_addr(m1.group(1)) if m1 else ''

        m1 = re.search(ur'<div class="adr">', sub, re.S)
        if m1:
            addr_sub = cm.extract_closure(sub[m1.start():], ur'<div\b',
                                          ur'</div>')[0]
            entry[cm.addr_e] = cm.reformat_addr(addr_sub)
            m2 = re.search(ur'<span class="locality">([^<>?]+?),*\s*</span>',
                           addr_sub)
            city = cm.html2plain(m2.group(1)).strip().upper() if m2 else ''
            entry[cm.city_e] = cm.extract_city(city if city != ',' else '')[0]
            m2 = re.search(ur'<span\s+class="region"\s+title="([^"]+)"[^<>]*>',
                           addr_sub)
            entry[cm.province_e] = cm.html2plain(
                m2.group(1)).strip().upper() if m2 else ''
            m2 = re.search(ur'<span\s+class="postal-code"[^<>]*>([^<>]+)',
                           addr_sub)
            entry[cm.zip_code] = m2.group(1).strip() if m2 else ''

        m1 = re.search(ur'<div class="tel">([^<>]+)</div>', sub, re.S)
        entry[cm.tel] = m1.group(1).strip() if m1 else ''

        m1 = re.search(ur'<div class="hours_wrapper">', sub)
        if m1:
            hours_sub = cm.extract_closure(sub[m1.start():], ur'<div\b',
                                           ur'</div>')[0]
예제 #50
0
    pat = re.compile(ur',\s*"terms":\s*\{.+?\}', re.S)
    tmp = re.sub(pat, '', tmp)

    try:
        raw = json.loads(tmp)['stores']
    except ValueError, e:
        print e
    store_list = []
    for s in raw:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = s['name']
        entry[cm.country_e] = data['country_code']
        addr = s['address']
        m = re.search(ur'<span class=\"locality\">([^<>]+?)</span>', addr)
        if m is not None:
            entry[cm.city_e] = cm.extract_city(m.group(1))[0]
        m = re.search(ur'<span class=\"region\">([^<>]+?)</span>', addr)
        if m is not None:
            entry[cm.province_e] = m.group(1).strip().upper()
        m = re.search(ur'<span class=\"postal-code\">([^<>]+?)</span>', addr)
        if m is not None:
            entry[cm.zip_code] = m.group(1).strip()
        entry[cm.addr_e] = cm.reformat_addr(addr)

        entry[cm.tel] = s['phone']
        if s['lat'] is not None and s['lat'] != '':
            entry[cm.lat] = string.atof(s['lat'])
        if s['lng'] is not None and s['lng'] != '':
            entry[cm.lng] = string.atof(s['lng'])
        entry[cm.store_type] = s['type']
        if s['event_link'] is not None:
예제 #51
0
def fetch_stores(data):
    param = {
        'action': 'getStoresFromAjax',
        'country': data['country_code'],
        'region': data['city'],
        'collection': ''
    }
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="shop-type-container">', body):
        sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0]
        store_class = ''
        m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S)
        if m2 is not None:
            store_class = cm.reformat_addr(m2.group(1))

        for m2 in re.finditer(ur'<div class="shop"', sub):
            store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b',
                                           ur'</div>')[0]
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                        data['brandname_c'])
            entry[cm.store_class] = store_class
            entry[cm.country_e] = data['country_code']
            entry[cm.city_e] = cm.extract_city(data['city'])[0]

            m3 = re.search(
                ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)',
                store_sub)
            if m3 is not None:
                data['store_id'] = string.atoi(m3.group(1))
                entry[cm.lat] = string.atof(m3.group(2))
                entry[cm.lng] = string.atof(m3.group(3))
                entry[cm.store_type] = ', '.join(get_detail(data))

            m3 = re.search(
                ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                entry[cm.name_e] = m3.group(1).strip()
            addr_list = []
            m3 = re.search(
                ur'<div class="shop-street shop-details">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                addr_list.append(cm.reformat_addr(m3.group(1)))
            m3 = re.search(
                ur'<div class="shop-city shop-details">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                tmp = cm.reformat_addr(m3.group(1))
                m3 = re.search(ur'(\d{4,})', tmp)
                if m3 is not None:
                    entry[cm.zip_code] = m3.group(1).strip()
                addr_list.append(tmp)
            entry[cm.addr_e] = ', '.join(addr_list)

            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
            cm.dump(
                '(%s / %d) Found store: %s, %s (%s, %s, %s)' %
                (data['brandname_e'], data['brand_id'], entry[cm.name_e],
                 entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
                 entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
예제 #52
0
파일: escada.py 프로젝트: haizi-zh/firenze
def fetch_stores(data):
    url = data['store_url']
    try:
        body = cm.post_data(
            url, {
                'continent': data['continent'],
                'country': data['country'],
                'city': data['city'],
                'send': 1,
                'page': 0
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<div class="shop">', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])

        sub, start, end = cm.extract_closure(body[m.end():], ur'<div\b',
                                             ur'</div>')
        if end == 0:
            continue
        m1 = re.search(ur'<h3>\s*(.+?)\s*</h3>', sub, re.S)
        if m1 is not None:
            entry[cm.name_e] = m1.group(1)

        m1 = re.search(ur'<p[^>]*>(.+?)</p>', sub, re.S)
        if m1 is not None:
            entry[cm.store_type] = re.sub(re.compile(ur'\s*\+\s*', re.S), ', ',
                                          m1.group(1).strip())

        addr_sub, start, end = cm.extract_closure(sub, ur'<ul\b', ur'</ul>')
        if end != 0:
            tmp = re.findall(ur'<li>\s*(.+?)\s*</li>', addr_sub)
            addr_list = []

            if len(tmp) >= 3:
                entry[cm.tel] = tmp[-1].strip()
                del tmp[-1]

            for term in tmp:
                term = cm.html2plain(term).strip()
                if term != '':
                    addr_list.append(term)
            entry[cm.addr_e] = ', '.join(addr_list)

        start = sub.lower().find(ur'opening hours')
        if start != -1:
            opening_sub, start, end = cm.extract_closure(
                sub[start:], ur'<ul\b', ur'</ul>')
            tmp = re.findall(ur'<li>\s*(.+?)\s*</li>', opening_sub)
            opening_list = []
            for term in tmp:
                term = cm.html2plain(term).strip()
                if term != '':
                    opening_list.append(term)
            entry[cm.hours] = ', '.join(opening_list)

        cm.update_entry(
            entry, {
                cm.continent_e: data['continent'].strip().upper(),
                cm.country_e: data['country'].strip().upper()
            })
        entry[cm.city_e] = cm.extract_city(data['city'])[0]

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')