Beispiel #1
0
def fetch_uk(body, data):
    start = body.find(u'<div class="fableft">')
    if start == -1:
        print 'Error in finding %s stores' % data['name']
        return []
    body, start, end = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')
    if end == 0:
        print 'Error in finding %s stores' % data['name']
        return []

    store_list = []
    for m in re.findall(ur'<div>\s*(.+?)\s*</div>', body, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['name']

        addr_list = re.findall(ur'<p>\s*(.+?)\s*</p>', m)
        tel = cm.extract_tel(addr_list[-1])
        if tel != '':
            entry[cm.tel] = tel
            del addr_list[-1]

        if data['name'] == 'AUSTRALIA':
            country, province, city = gs.addr_sense(', '.join(addr_list),
                                                    data['name'])
            if city is not None:
                entry[cm.city_e] = city
            if province is not None:
                entry[cm.province_e] = province
        else:
            city = addr_list[-2].strip().upper()
            entry[cm.city_e] = city
            ret = gs.look_up(city, 3)
            if ret is not None and ret['country']['name_e'] == gs.look_up(
                    'UK', 1)['name_e']:
                entry[cm.city_e] = ret['name_e']
            entry[cm.zip_code] = addr_list[-1].strip().upper()
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
            entry[cm.continent_e])

        db.insert_record(entry, 'stores')
        store_list.append(entry)
Beispiel #2
0
def fetch_store_detail(s, data, isOfficial=False):
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                data['brandname_c'])

    entry[cm.name_e] = cm.html2plain(s['name']).strip()
    entry[cm.country_e] = data['country']
    val = cm.html2plain(s['city']).strip().upper()
    entry[cm.city_e] = cm.extract_city(
        val if val and val != '' else data['city'])[0]
    entry[cm.addr_e] = cm.html2plain(s['address']).strip()
    entry[cm.email] = s['email'].strip()
    entry[cm.tel] = s['phone'].strip()
    entry[cm.fax] = s['fax'].strip()
    entry[cm.store_class] = 'Official Retailer' if isOfficial else 'Retailer'

    try:
        entry[cm.lat] = string.atof(s['lat']) if s['lat'] != '' else ''
    except (ValueError, KeyError, TypeError) as e:
        cm.dump('Error in fetching lat: %s' % str(e), log_name)
    try:
        entry[cm.lng] = string.atof(s['lng']) if s['lng'] != '' else ''
    except (ValueError, KeyError, TypeError) as e:
        cm.dump('Error in fetching lng: %s' % str(e), log_name)

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    return entry
Beispiel #3
0
def fetch_stores(data):
    url = data['store_url']
    try:
        body = cm.get_data(url, {
            'country': data['country'],
            'city': data['city']
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []

    for item in raw['items']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['country'].strip().upper()
        tmp = cm.extract_city(data['city'])[0]
        if entry[cm.country_e] == 'USA':
            entry[cm.province_e] = tmp
        else:
            entry[cm.city_e] = tmp
        gs.field_sense(entry)

        addr = cm.reformat_addr(item['address'].replace(u'\\', ''))
        addr_list = [tmp.strip() for tmp in addr.split(',')]
        tel = cm.extract_tel(addr_list[-1])
        if tel != '':
            entry[cm.tel] = tel
            del addr_list[-1]
        entry[cm.addr_e] = ', '.join(addr_list)
        entry[cm.store_type] = item['shop_type']

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
Beispiel #4
0
def fetch_store_details(data):
    url = data['host'] + data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching store details: %s' % url, log_name)
        return []

    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                data['brandname_c'])
    start = body.find(ur'<h3>available in store</h3>')
    if start != -1:
        type_sub = cm.extract_closure(body[start:], ur'<ul\b', ur'</ul>')[0]
        entry[cm.store_type] = ', '.join(
            cm.html2plain(tmp).strip()
            for tmp in re.findall(ur'<li[^<>]*>(.+?)</li>', type_sub, re.S))

    start = body.find(ur"<div class='gmap_info_box'")
    if start == -1:
        cm.dump('Error in fetching store details: %s' % url, log_name)
        return []
    body = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')[0]

    raw = json.loads(cm.extract_closure(body, ur'\{', ur'\}')[0])['table']
    entry[cm.name_e] = cm.html2plain(raw['name'])
    entry[cm.city_e] = data['city'].strip().upper()
    entry[cm.country_e] = data['country'].strip().upper()
    # entry[cm.store_type] = data['store_type']
    entry[cm.addr_e] = cm.reformat_addr(raw['address'])
    m = re.search(re.compile(ur'phone:(.*?)fax:(.*?)', re.I | re.S),
                  raw['phone'])
    if m is not None:
        entry[cm.tel] = m.group(1).strip()
        entry[cm.fax] = m.group(2).strip()
    else:
        m = re.search(re.compile(ur'phone:(.*?)', re.I | re.S), raw['phone'])
        if m is not None:
            entry[cm.tel] = m.group(1).strip()
        m = re.search(re.compile(ur'fax:(.*?)', re.I | re.S), raw['phone'])
        if m is not None:
            entry[cm.fax] = m.group(1).strip()
    entry[cm.hours] = raw['hours']
    if raw['lat'] is not None and raw['lat'] != '':
        entry[cm.lat] = string.atof(raw['lat'])
    if raw['lng'] is not None and raw['lng'] != '':
        entry[cm.lat] = string.atof(raw['lng'])
    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None:
        entry[cm.province_e] = ret[1]
        gs.field_sense(entry)

    cm.dump(
        '(%s / %d) Found store: %s, %s (%s, %s)' %
        (data['brandname_e'], data['brand_id'], entry[cm.name_e],
         entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
        log_name)
    db.insert_record(entry, 'stores')
    return [entry]
Beispiel #5
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    store_list = []
    for m1 in re.finditer(ur'<lignecountry\s+titre\s*=\s*"([^"]+)"', body):
        country = m1.group(1).strip().upper()
        if country == 'U.S.A.':
            country = 'US'
        sub_country = cm.extract_closure(body[m1.start():], ur'<lignecountry\b', ur'</lignecountry>')[0]
        for m2 in re.finditer(ur'<lignecity\s+titre\s*=\s*"([^"]+)"', sub_country):
            city = m2.group(1).strip().upper()
            sub_city = cm.extract_closure(sub_country[m2.start():], ur'<lignecity\b', ur'</lignecity>')[0]
            m3 = re.search(ur'<!\[CDATA\[(.+?)\]\]>', sub_city, re.S)
            if m3 is None:
                continue
            sub_city = m3.group(1)
            store_subs = re.split(ur'<\s*h2\s*>\s*LANVIN BOUTIQUE\s*<\s*/h2\s*>', sub_city)
            for s in store_subs:
                if s.strip() == '':
                    continue
                m4 = re.search(ur'<p>(.+?)</p>', s, re.S)
                if m4 is None:
                    continue
                entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
                entry[cm.country_e] = country
                entry[cm.city_e] = city
                s = m4.group(1)
                m4 = re.search(ur'(.+?)\n\s*\n', s, re.S)
                if m4 is not None:
                    entry[cm.addr_e] = cm.reformat_addr(m4.group(1))
                m4 = re.search(ur'Phone:(.+?)\n\s*\n', s, re.S)
                if m4 is not None:
                    entry[cm.tel] = cm.reformat_addr(m4.group(1).strip())
                m4 = re.search(ur'Boutique Hours:(.+?)\n\s*\n', s, re.S)
                if m4 is not None:
                    entry[cm.hours] = cm.reformat_addr(m4.group(1).strip())
                m4 = re.search(ur'Products available:(.+?)\n\s*\n', s, re.S)
                if m4 is not None:
                    entry[cm.store_type] = m4.group(1).strip()
                m4 = re.search(ur'Email:\s*<a href="mailto:([^"]+)">', s)
                if m4 is not None:
                    entry[cm.email] = m4.group(1).strip()
                gs.field_sense(entry)
                ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
                if ret[1] is not None and entry[cm.province_e] == '':
                    entry[cm.province_e] = ret[1]
                gs.field_sense(entry)
                cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                    entry[cm.name_e], entry[cm.addr_e],
                                                                    entry[cm.country_e],
                                                                    entry[cm.continent_e]), log_name)
                db.insert_record(entry, 'stores')
                store_list.append(entry)
Beispiel #6
0
def fetch_stores(data):
    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                data['brandname_c'])
    code = data['country_code']
    if gs.look_up(code, 1) is None:
        entry[cm.country_e] = cm.html2plain(data['country']).strip().upper()
    else:
        entry[cm.country_e] = code
    entry[cm.name_e] = data['store_name']
    entry[cm.city_e] = cm.extract_city(data['city'])[0]
    entry[cm.lat] = data['lat'] if data['lat'] is not None else ''
    entry[cm.lng] = data['lng'] if data['lng'] is not None else ''

    m = re.search(ur'data-boutique\s*=\s*"%s"' % data['store_id'],
                  data['content'])
    sub = data['content'][m.end():]

    m1 = re.search(ur'<li class="isDistributeur[^<>]+>(.+?)</li>', sub)
    if m1 is not None:
        entry[cm.store_class] = cm.reformat_addr(m1.group(1))

    m1 = re.search(ur'<li class="place-title[^<>]+>(.+?)</li>', sub, re.S)
    if m1 is not None:
        entry[cm.addr_e] = cm.reformat_addr(m1.group(1))

    m1 = re.search(ur'<li class="contacts[^<>]+>(.+?)</li>', sub, re.S)
    if m1 is not None:
        m2 = re.search(ur'<a class="popupLaunch" href="([^"]+)"', m1.group(1))
        if m2:
            entry = fetch_details(data, m2.group(1), entry)

        m2 = re.search(ur'<p>(.+?)</p>', m1.group(1), re.S)
        if m2:
            ct_list = tuple(
                tmp.strip()
                for tmp in cm.reformat_addr(m2.group(1)).split(','))
            entry[cm.tel] = cm.extract_tel(ct_list[0])
            if len(ct_list) > 1:
                entry[cm.email] = ct_list[1].strip()

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    cm.dump(
        '(%s / %d) Found store: %s, %s (%s, %s)' %
        (data['brandname_e'], data['brand_id'], entry[cm.name_e],
         entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
        log_name)
    db.insert_record(entry, 'stores')

    return tuple(entry)
Beispiel #7
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error occured in fetching stores: %s' % url, 'canali_log.txt')
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<div class="storeInfo">', body):
        sub = cm.extract_closure(body[m.start():], ur'<div\b', ur'</div>')[0]
        m1 = re.search(ur'<span itemprop="streetAddress">(.+?)</span>', sub)
        if m1 is None:
            cm.dump('Error: failed to find the address: %s' % url,
                    'canali_log.txt')
            continue
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.addr_e] = cm.reformat_addr(m1.group(1))
        entry[cm.country_e] = data['country']
        entry[cm.city_e] = data['city']
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None:
            entry[cm.province_e] = ret[1]

        m1 = re.search(ur'<span itemprop="telephone">(.+?)</span>', sub)
        if m1 is not None:
            entry[cm.tel] = m1.group(1).strip()

        m1 = re.search(ur'data-latitude="(.+?)"', sub)
        if m1 is not None:
            entry[cm.lat] = string.atof(m1.group(1))
        m1 = re.search(ur'data-longitude="(.+?)"', sub)
        if m1 is not None:
            entry[cm.lng] = string.atof(m1.group(1))

        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            'canali_log.txt')
        db.insert_record(entry, 'stores')
        store_list.append(entry)
Beispiel #8
0
def fetch_stores(data):
    url = data['store_url']
    param = {'myid': data['key'], 'idioma': 'in'}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for s in json.loads(body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.country_e] = data['country']
        entry[cm.city_e] = cm.extract_city(data['city'])[0]
        entry[cm.name_e] = cm.reformat_addr(s['title'])

        m = re.search(ur'(.+?)-\s*<', s['key'])
        addr_list = [entry[cm.name_e]]
        if m is not None:
            m1 = re.search(ur'-+', m.group(1))
            if m1 is not None:
                tmp = [m.group(1)[:m1.start()], m.group(1)[m1.end():]]
            else:
                tmp = [m.group(1)]
            if len(tmp) > 1:
                entry[cm.tel] = cm.extract_tel(tmp[1])
            m1 = re.search(ur'\d{4,}', tmp[0])
            if m1 is not None:
                entry[cm.zip_code] = m1.group()
            addr_list.append(tmp[0].strip())
        entry[cm.addr_e] = ', '.join(addr_list)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
Beispiel #9
0
def fetch_hk(data):
    loc_list = ('Hong Kong', 'Kowloon', 'Macau', 'New Territories')
    url = 'http://levi.com.hk/hk/storelocator'
    store_list = []
    for loc in loc_list:
        param = {'loc': loc}
        try:
            body = cm.get_data(url, param)
        except Exception, e:
            cm.dump('Error in fetching stores: %s' % param, log_name)
            continue

        start = body.find(ur'<div id="addWrapper">')
        if start == -1:
            cm.dump('Error in fetching stores: %s' % param, log_name)
            continue
        sub = cm.extract_closure(body[start:], ur'<ul>', ur'</ul>')[0]
        for s in re.findall(ur'<li>(.+?)</li>', sub, re.S):
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.country_e] = 'MACAU' if loc == 'Macau' else 'HONG KONG'
            entry[cm.city_e] = entry[cm.country_e]

            m = re.search(ur'<div id="addStore">([^<>]+)', s)
            entry[cm.addr_e] = cm.html2plain(m.group(1)) if m else ''

            m = re.search(ur'<div id="addAddress">([^<>]+)', s)
            tmp = cm.html2plain(m.group(1))
            pat = re.compile(ur'business hours?\s*[:\.]?\s*', re.I)
            if re.search(pat, tmp):
                entry[cm.hours] = re.sub(pat, '', tmp).strip()

            m = re.search(ur'<div id="addPhone">([^<>]+)', s)
            tmp = cm.html2plain(m.group(1))
            pat = re.compile(ur'(tel|phone|telephone)?\s*[:\.]?\s*', re.I)
            if re.search(pat, tmp):
                entry[cm.tel] = re.sub(pat, '', tmp).strip()

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)

            cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                                entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
Beispiel #10
0
    def func(item):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.name_e] = cm.html2plain(item('h6')[0].text).strip()
        addr_sub = unicode(pq(item('p')[0]))
        addr_list = [
            term.strip() for term in cm.reformat_addr(addr_sub).split(',')
        ]
        tel = cm.extract_tel(addr_list[-1])
        if tel != '':
            entry[cm.tel] = tel
            del addr_list[-1]
        entry[cm.addr_e] = ', '.join(addr_list)

        temp = item('a.track_map[href]')
        m = hashlib.md5()
        m.update(url)
        if len(temp) > 0:
            map_ref = temp[0].attrib['href']
            m.update(map_ref)
            m_query = re.search(r'q=([^;]+?)&', cm.html2plain(map_ref))
            if m_query:
                query_parm = m_query.group(1).replace('+', ' ')
                entry['geo_query_param'] = query_parm

        else:
            m.update(entry[cm.addr_e])
        fingerprint = m.hexdigest()
        entry[cm.native_id] = fingerprint
        if entry[cm.native_id] in data['store_list']:
            return

        entry[cm.country_e] = data['country']
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        logger.info(
            ('(%s / %d) Found store: %s, %s (%s, %s)' %
             (data['brandname_e'], data['brand_id'], entry[cm.name_e],
              entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])))
        cm.insert_record(db, entry, data['table'])
        return entry
Beispiel #11
0
def fetch_stores(data):
    url = data['home_url']
    try:
        body = cm.post_data(url, {'lz_sf': data['province'], 'lz_sx': data['city']})
    except Exception:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    start = body.find(u'搜索结果')
    if start == -1:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        return []

    body = body[start + 4:]

    store_list = []
    for m in re.findall(ur'</script>\s*(\S+)\s*</span>', body, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = m.strip()
        entry[cm.addr_e] = m.strip()
        entry[cm.city_c] = data['city']
        ret = gs.look_up(data['city'], 3)
        if ret is not None:
            entry[cm.city_e] = cm.extract_city(ret['name_e'])[0]
            if ret['province'] != '':
                entry[cm.province_e] = ret['province']['name_e']
        entry[cm.province_c] = data['province']
        ret = gs.look_up(data['province'], 2)
        if ret is not None:
            entry[cm.province_e] = ret['name_e']
        entry[cm.country_e] = u'CHINA'

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), 'benetton_log.txt', False)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
Beispiel #12
0
def fetch_stores_cn(data):
    vals = ['Storenew%d.html' % idx for idx in xrange(1, 10)]
    vals.append('Store.html')
    store_list = []
    for url in ('http://www.hushpuppies.com.cn/%s' % term for term in vals):
        try:
            body = cm.get_data(url)
        except Exception, e:
            cm.dump('Error in fetching stores: %s' % url, log_name)
            return ()

        city_map = dict((m[0].strip(), m[1].strip()) for m in re.findall(
            ur'<a rel="([^"]+)" href="#"[^<>]*>([^<>]+)', body))

        start = body.find(ur'<div id="all-list-wrap" style="float:left">')
        if start == -1:
            cm.dump('Error in fetching stores: %s' % url, log_name)
            return ()
        sub = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')[0]

        for m in re.findall(ur'<ul id="([^"]+)"[^<>]*>(.+?)</ul>', sub, re.S):
            city = city_map[m[0].strip()]
            for store in re.findall(ur'<li><a>([^<>]+)', m[1]):
                entry = cm.init_store_entry(data['brand_id'],
                                            data['brandname_e'],
                                            data['brandname_c'])
                entry[cm.country_e] = u'CHINA'
                entry[cm.city_e] = city
                entry[cm.addr_e] = store.strip()

                gs.field_sense(entry)
                ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
                if ret[1] is not None and entry[cm.province_e] == '':
                    entry[cm.province_e] = ret[1]
                if ret[2] is not None and entry[cm.city_e] == '':
                    entry[cm.city_e] = ret[2]
                gs.field_sense(entry)

                cm.dump(
                    '(%s / %d) Found store: %s, %s (%s, %s)' %
                    (data['brandname_e'], data['brand_id'], entry[cm.name_e],
                     entry[cm.addr_e], entry[cm.country_e],
                     entry[cm.continent_e]), log_name)
                db.insert_record(entry, 'stores')
                store_list.append(entry)
Beispiel #13
0
def fetch_stores(db, data, logger):
    q = pq(url='http://www.paulandjoe.com/en/ozcms/stores/list/?country_id=&postcode=')

    store_list = []

    # Country
    country_a = q('#store_list>li>a')
    country_b = q('#store_list>li>ul')
    assert (len(country_a) == len(country_b))
    for i in xrange(len(country_a)):
        country = country_a[i].text.strip().upper()
        store_a = pq(country_b[i])('a.marker-store')
        store_b = pq(country_b[i])('span.store-infos')
        assert (len(store_a) == len(store_b))
        for j in xrange(len(store_a)):
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])

            lat = store_a[j].attrib['data-latitude']
            lat = float(lat) if lat else None
            lng = store_a[j].attrib['data-longitude']
            lng = float(lng) if lng else None
            if lat and lng:
                entry[cm.lat], entry[cm.lng] = lat, lng

            entry[cm.name_e] = store_a[j].text
            entry[cm.addr_e] = cm.reformat_addr(str(pq(store_b[j])))  # cm.reformat_addr(str(store_b[j]))
            entry[cm.country_e] = country

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e])
            if ret[0] is not None and entry[cm.country_e] == '':
                entry[cm.country_e] = ret[0]
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)
            logger.info('(%s/%d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                  entry[cm.name_e], entry[cm.addr_e],
                                                                  entry[cm.country_e],
                                                                  entry[cm.continent_e]))
            store_list.append(entry)
            cm.insert_record(db, entry, 'spider_stores.stores')

    return tuple(store_list)
Beispiel #14
0
def fetch_cn(data):
    url = 'http://www.lee.com.cn/xml/storefinder.xml'

    store_list = []
    for store in (pq(tmp) for tmp in pq(url=url)('NewDataSet Table')):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        tmp = store('shop_name')[0]
        entry[cm.name_e] = tmp.text if tmp.text else ''
        entry[cm.country_e] = 'CHINA'
        tmp = store('city')[0]
        entry[cm.city_e] = tmp.text if tmp.text else ''
        tmp = store('province')[0]
        entry[cm.province_e] = tmp.text if tmp.text else ''
        tmp = store('district')[0]
        entry[cm.district_e] = tmp.text if tmp.text else ''
        tmp = store('address')[0]
        entry[cm.addr_e] = tmp.text if tmp.text else ''
        tmp = store('tel')[0]
        entry[cm.tel] = tmp.text if tmp.text else ''

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
             entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return tuple(store_list)
Beispiel #15
0
def fetch_stores(data):
    url = data['store_url']
    try:
        html = cm.get_data(url, {'nazione': data['country_e'],
                                 'citta': data['city_e'],
                                 'tipo': 'tutti'})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<marker\b', html):
        sub, start, end = cm.extract_closure(html[m.start():], ur'<marker\b', ur'</marker>')
        if end == 0:
            continue

        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m1 = re.search(ur'name\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.name_e] = cm.html2plain(m1.group(1)).strip()

        addr_list = []
        m1 = re.search(ur'address\s*=\s*"(.+?)"', sub)
        if m1 is not None and cm.html2plain(m1.group(1)).strip() != '':
            addr_list.append(cm.html2plain(m1.group(1)).strip())
        m1 = re.search(ur'addr2\s*=\s*"(.+?)"', sub)
        if m1 is not None and cm.html2plain(m1.group(1)).strip() != '':
            addr_list.append(cm.html2plain(m1.group(1)).strip())
        entry[cm.addr_e] = ', '.join(addr_list)

        m1 = re.search(ur'city\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.city_e] = cm.html2plain(m1.group(1)).strip().upper()

        m1 = re.search(ur'country\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.country_e] = cm.html2plain(m1.group(1)).strip().upper()

        m1 = re.search(ur'zipcode\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.name_e] = cm.html2plain(m1.group(1)).strip()

        m1 = re.search(ur'phone\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.tel] = cm.html2plain(m1.group(1)).strip()

        m1 = re.search(ur'email\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.email] = cm.html2plain(m1.group(1)).strip()

        m1 = re.search(ur'website\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.url] = cm.html2plain(m1.group(1)).strip()

        m1 = re.search(ur'lat\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.lat] = string.atof(m1.group(1))

        m1 = re.search(ur'lng\s*=\s*"(.+?)"', sub)
        if m1 is not None:
            entry[cm.lng] = string.atof(m1.group(1))

        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                          entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                          entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Beispiel #16
0
def fetch_stores(data):
    param = {
        'action': 'getStoresFromAjax',
        'country': data['country_code'],
        'region': data['city'],
        'collection': ''
    }
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="shop-type-container">', body):
        sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0]
        store_class = ''
        m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S)
        if m2 is not None:
            store_class = cm.reformat_addr(m2.group(1))

        for m2 in re.finditer(ur'<div class="shop"', sub):
            store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b',
                                           ur'</div>')[0]
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                        data['brandname_c'])
            entry[cm.store_class] = store_class
            entry[cm.country_e] = data['country_code']
            entry[cm.city_e] = cm.extract_city(data['city'])[0]

            m3 = re.search(
                ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)',
                store_sub)
            if m3 is not None:
                data['store_id'] = string.atoi(m3.group(1))
                entry[cm.lat] = string.atof(m3.group(2))
                entry[cm.lng] = string.atof(m3.group(3))
                entry[cm.store_type] = ', '.join(get_detail(data))

            m3 = re.search(
                ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                entry[cm.name_e] = m3.group(1).strip()
            addr_list = []
            m3 = re.search(
                ur'<div class="shop-street shop-details">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                addr_list.append(cm.reformat_addr(m3.group(1)))
            m3 = re.search(
                ur'<div class="shop-city shop-details">([^<>]+)</div>',
                store_sub)
            if m3 is not None:
                tmp = cm.reformat_addr(m3.group(1))
                m3 = re.search(ur'(\d{4,})', tmp)
                if m3 is not None:
                    entry[cm.zip_code] = m3.group(1).strip()
                addr_list.append(tmp)
            entry[cm.addr_e] = ', '.join(addr_list)

            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
            cm.dump(
                '(%s / %d) Found store: %s, %s (%s, %s, %s)' %
                (data['brandname_e'], data['brand_id'], entry[cm.name_e],
                 entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
                 entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
Beispiel #17
0
def get_store_details(data):
    url = data['url']
    try:
        html = cm.post_data(
            url, {
                'country': data['country_id'],
                'city': data['city_id'],
                'recordid': data['store_id']
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []

    entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
    info = json.loads(html)['elements']
    addr = cm.reformat_addr(info['address'].replace('\\', '').replace(
        '<p>', ',').replace('</p>', ','))
    # 第一行为商店名称
    terms = addr.split(',')
    if len(terms) > 0:
        entry[cm.name_e] = cm.reformat_addr(terms[0])
    entry[cm.addr_e] = addr

    gmap_url = info['gmap']
    m = re.findall(ur'(-?\d+\.\d+),(-?\d+\.\d+)', gmap_url)
    if len(m) > 0:
        cm.update_entry(entry, {
            cm.lat: string.atof(m[0][0]),
            cm.lng: string.atof(m[0][1])
        })

    entry[cm.url] = info['shareurl'].replace('\\', '')
    entry[cm.hours] = info['openingtimes']
    entry[cm.comments] = info['other']

    # Geo
    country = data['country']
    city = data['city']
    cm.update_entry(entry, {cm.country_e: country, cm.city_e: city})
    entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    print '(%s / %d) Found store: %s, %s (%s, %s)' % (
        brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e],
        entry[cm.country_e], entry[cm.continent_e])

    db.insert_record(entry, 'stores')
    return entry
Beispiel #18
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    raw = json.loads(body)
    store_list = []
    for s in raw:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = s['storename']
        entry[cm.addr_e] = cm.reformat_addr(', '.join([s['building'].replace(u'operated by ', u''),
                                                       s['street'].strip()]))

        if s['country'] is not None:
            entry[cm.country_e] = s['country'].strip().upper()
        if s['city'] is not None:
            if s['country'].strip() == u'US':
                tmp = s['city'].split(',')
                entry[cm.city_e] = tmp[0].strip().upper()
                if len(tmp) > 1:
                    entry[cm.province_e] = tmp[1].strip().upper()
            else:
                entry[cm.city_e] = s['city'].strip().upper()
            entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        if s['zip'] is not None:
            entry[cm.zip_code] = s['zip'].strip()
        if s['phone'] is not None:
            entry[cm.tel] = s['phone'].strip()
        if s['storeemail'] is not None:
            entry[cm.email] = s['storeemail'].strip()
        if s['storelink'] is not None and u'@' not in s['storelink']:
            entry[cm.url] = s['storelink'].strip()
        if s['storetype'] is not None:
            entry[cm.store_class] = s['storetype'].strip()
        hours = []
        for item in ['monday', 'tuesday', 'wednesday', 'thursday', 'friday', 'saturday', 'sunday']:
            if s[item] is not None:
                hours.append('%s: %s' % (item, s[item]))
        entry[cm.hours] = ', '.join(hours)
        styles = []
        for item in ['menswear', 'womenswear', 'kidswear']:
            if s[item] == '1':
                styles.append(item)
        entry[cm.store_type] = ', '.join(styles)
        if s['latitude'] is not None and s['latitude'].strip() != '':
            entry[cm.lat] = string.atof(s['latitude'])
        if s['longitude'] is not None and s['longitude'].strip() != '':
            entry[cm.lng] = string.atof(s['longitude'])

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
            gs.field_sense(entry)

        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
    return store_list
Beispiel #19
0
def fetch_stores(data):
    url = data['data_url']
    param = {
        'output': 'json',
        'country': data['country_code'],
        'brand': 'dkny'
    }
    page = 0
    tot_page = -1
    store_list = []
    while True:
        page += 1
        if tot_page != -1 and page > tot_page:
            break
        param['p'] = page
        try:
            body = cm.get_data(url, param)
        except Exception, e:
            cm.dump('Error in fetching stores: %s, %s' % (url, param),
                    log_name)
            return ()

        raw = json.loads(body)
        tot_page = raw['Stores']['TotalPages']
        if data['country_code'] not in region_map:
            # 构造州列表
            region_map[data['country_code']] = dict(
                (item['RegionId'], item['Name']) for item in raw['Regions'])

        for s in raw['Stores']['Items']:
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                        data['brandname_c'])
            entry[cm.country_e] = data['country_code'].upper()
            entry[cm.city_e] = cm.extract_city(s['City'])[0]
            entry[cm.name_e] = cm.html2plain(s['Name']).strip()
            entry[cm.addr_e] = cm.reformat_addr(s['Address'])
            entry[cm.tel] = s['Phone'].strip() if s['Phone'] else ''
            entry[cm.fax] = s['Fax'].strip() if s['Fax'] else ''
            entry[cm.email] = s['Email'].strip() if s['Email'] else ''
            entry[cm.lat] = s['Latitude'] if s['Latitude'] else ''
            entry[cm.lng] = s['Longitude'] if s['Longitude'] else ''
            region_id = s['RegionId']
            if region_id in region_map[data['country_code']]:
                entry[cm.province_e] = cm.html2plain(region_map[
                    data['country_code']][region_id]).strip().upper()

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)

            cm.dump(
                '(%s / %d) Found store: %s, %s (%s, %s)' %
                (data['brandname_e'], data['brand_id'], entry[cm.name_e],
                 entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
                log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
Beispiel #20
0
    def f(m):
        store_name = m[0].strip()
        addr_str = m[1].strip()

        spl = addr_str.split('<br/>')
        store_type = cm.html2plain(spl[0].strip())

        store_addr = spl[1].strip()
        hour_idx = 2
        store_tel = ''
        for i in xrange(2, len(spl)):
            # If this is not a phone number:
            tel = cm.extract_tel(spl[i])
            if tel == '':
                store_addr += ', ' + spl[i]
                hour_idx = i + 1
            else:
                store_tel = spl[i].strip()
                hour_idx = i + 1
                break

        if hour_idx < len(spl):
            store_hour = cm.html2plain(', '.join(spl[hour_idx:])).strip()
        else:
            store_hour = ''

        # store_addr = cm.reformat_addr('\r\n'.join([val.strip() for val in spl[1:-3]]))
        store_addr = cm.reformat_addr(store_addr)

        store_entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
        cm.update_entry(
            store_entry, {
                cm.continent_e: opt[cm.continent_e].strip().upper(),
                cm.city_e: opt[cm.city_e].strip().upper(),
                cm.country_e: opt[cm.country_e].strip().upper(),
                cm.name_e: cm.name_e,
                cm.addr_e: store_addr,
                cm.store_type: store_type,
                cm.hours: store_hour,
                cm.tel: store_tel
            })
        if opt.has_key(cm.province_e):
            store_entry[cm.province_e] = opt[cm.province_e]
        else:
            store_entry[cm.province_e] = ''
        store_entry[cm.city_e] = cm.extract_city(store_entry[cm.city_e])[0]

        gs.field_sense(store_entry)
        ret = gs.addr_sense(store_entry[cm.addr_e], store_entry[cm.country_e])
        if ret[1] is not None and store_entry[cm.province_e] == '':
            store_entry[cm.province_e] = ret[1]
        if ret[2] is not None and store_entry[cm.city_e] == '':
            store_entry[cm.city_e] = ret[2]
        gs.field_sense(store_entry)

        print '%s Found store: %s, %s (%s, %s)' % (
            brandname_e, store_entry[cm.name_e], store_entry[cm.addr_e],
            store_entry[cm.country_e], store_entry[cm.continent_e])
        db.insert_record(store_entry, 'stores')

        return store_entry
Beispiel #21
0
    store_list = []
    for item in country_sub:
        body = item['content']
        country = item['name']
        for m in re.findall(ur'<div>\s*(.+?)\s*</div>', body, re.S):
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                        data['brandname_c'])
            entry[cm.country_e] = country

            addr_list = re.findall(ur'<p>\s*(.+?)\s*</p>', m)
            tel = cm.extract_tel(addr_list[-1])
            if tel != '':
                entry[cm.tel] = tel
                del addr_list[-1]

            ret = gs.addr_sense(', '.join(addr_list))
            if ret[2] is not None:
                entry[cm.city_e] = ret[2]
            if ret[1] is not None:
                entry[cm.province_e] = ret[1]
            entry[cm.addr_e] = ', '.join(addr_list)
            entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)
Beispiel #22
0
        val = s['website']
        entry[cm.url] = cm.html2plain(val).strip() if val else ''

        try:
            val = s['lat']
            entry[cm.lat] = string.atof(val) if val and val != '' else ''
        except (ValueError, KeyError, TypeError) as e:
            cm.dump('Error in fetching lat: %s' % str(e), log_name)
        try:
            val = s['lng']
            entry[cm.lng] = string.atof(val) if val and val != '' else ''
        except (ValueError, KeyError, TypeError) as e:
            cm.dump('Error in fetching lng: %s' % str(e), log_name)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        if entry[cm.country_e] == '' or entry[cm.city_e] == '':
            ret = None
            location_valid = True
            if entry[cm.lat] != '' and entry[cm.lng] != '':
                ret = gs.geocode(latlng='%f,%f' %
                                 (entry[cm.lat], entry[cm.lng]))
            if ret is None:
Beispiel #23
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url, {
            'rsp': 'json',
            'country': data['country_code']
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []
    for s in raw['stores']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.name_e] = cm.html2plain(s['name']).strip()

        addr_list = []
        for key in ['address1', 'address2']:
            if s[key].strip() != '':
                addr_list.append(cm.reformat_addr(s[key]))
        entry[cm.addr_e] = ' '.join(addr_list)

        # r=s['region'].strip().upper()
        # m = re.search(ur'\b([A-Z]{2})\b', r)
        # if data[cm.country_e]=='UNITED STATES' and m is not None:
        #     # 美国
        #     ret = gs.look_up(m.group(1), 2)
        #     if ret is not None:
        #         r = ret['name_e']
        # entry[cm.province_e] = r

        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.zip_code] = s['zip'].strip()
        entry[cm.country_e] = data[cm.country_e]
        entry[cm.lat] = string.atof(s['lat'])
        entry[cm.lng] = string.atof(s['lng'])
        entry[cm.tel] = s['phone'].strip()
        entry[cm.fax] = s['fax'].strip()
        entry[cm.email] = s['emailaddress'].strip()
        entry[cm.url] = s['website'].strip()

        days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
        opening = []
        if 'openingHours' in s and s['openingHours'] is not None:
            for m in re.finditer(ur'i:(\d);s:\d+:\\?"([^\\"]+?)\\?"',
                                 s['openingHours']):
                opening.append(
                    '%s: %s' %
                    (days[string.atoi(m.group(1))], m.group(2).strip()))
            entry[cm.hours] = ', '.join(opening)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e],
            entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Beispiel #24
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url, hdr={'Accept': 'application/json'})
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    body = cm.extract_closure(body, ur'\{', ur'\}')[0]
    raw = json.loads(
        body)[u'storesCompleteResponse'][u'storesComplete'][u'storeComplete']
    if not isinstance(raw, list):
        raw = [raw]

    store_list = []
    for s in raw:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        if 'name' in s and s['name'] is not None:
            tmp = s['name']
            if isinstance(tmp, str) or isinstance(tmp, unicode):
                entry[cm.name_e] = cm.html2plain(s['name'])

        if 'address' in s and s['address'] is not None:
            tmp = s['address']
            if 'addressLine' in tmp and tmp['addressLine'] is not None:
                tmp = tmp['addressLine']
                if isinstance(tmp, list):
                    for i in xrange(len(tmp)):
                        tmp[i] = unicode(tmp[i])
                    entry[cm.addr_e] = ', '.join(tmp)
                else:
                    entry[cm.addr_e] = unicode(tmp)

        entry[cm.country_e] = data['country_code']
        if 'latitude' in s and s[
                'latitude'] is not None and s['latitude'] != '':
            entry[cm.lat] = string.atof(s['latitude'])
        if 'longitude' in s and s[
                'longitude'] is not None and s['longitude'] != '':
            entry[cm.lng] = string.atof(s['longitude'])
        if 'openingHours' in s and s['openingHours'] is not None:
            tmp = s['openingHours']
            if tmp is not None and 'openingHour' in tmp:
                tmp = tmp['openingHour']
                if tmp is not None and isinstance(tmp, list):
                    entry[cm.hours] = ', '.join(tmp)
        if 'phone' in s and s['phone'] is not None:
            entry[cm.tel] = s['phone']
        if 'region' in s and s['region'] is not None and 'name' in s['region']:
            tmp = s['region']['name']
            if tmp is not None:
                entry[cm.province_e] = tmp.strip().upper()
        if 'city' in s and s['city'] is not None:
            entry[cm.city_e] = cm.extract_city(s['city'])[0]

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
Beispiel #25
0
def fetch_dior_beauty(data):
    url = data['url']
    store_list = []

    with open('city_lite.dat', 'r') as f:
        sub = f.readlines()
    city_map = json.loads(sub[0])
    country = 'CHINA'
    for city in city_map[country]:
        param = {'cityName': city}
        cm.dump('Searching at %s, %s' % (city, country), log_name)
        try:
            body = cm.post_data(url, param)
        except Exception, e:
            cm.dump('Error in fetching states: %s, %s' % (url, param), log_name)
            continue

        m = re.search(ur'var\s+Json\s*=', body)
        if not m:
            continue
        sub = cm.extract_closure(body[m.end():], ur'\{', ur'\}')[0]
        for store in json.loads(sub)['content']['items']:
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.country_e] = country
            entry[cm.comments] = 'BEAUTY'

            addr_list = []
            val = store['addressLine1']
            if val:
                addr_list.append(cm.html2plain(val).strip())
            val = store['addressLine2']
            if val:
                addr_list.append(cm.html2plain(val).strip())
            entry[cm.addr_e] = ', '.join(addr_list)

            val = store['name']
            entry[cm.name_e] = cm.html2plain(val).strip() if val else ''
            val = store['type']
            entry[cm.store_class] = cm.html2plain(val).strip() if val else ''
            val = store['url']
            entry[cm.url] = cm.html2plain(val).strip() if val else ''
            val = store['city']
            entry[cm.city_e] = cm.html2plain(val).strip().upper() if val and val != '' else ''
            val = store['zipcode']
            entry[cm.zip_code] = cm.html2plain(val).strip() if val else ''
            val = store['phone']
            entry[cm.tel] = cm.html2plain(val).strip() if val else ''
            val = store['fax']
            entry[cm.fax] = cm.html2plain(val).strip() if val else ''

            coords = store['coords']
            if coords:
                try:
                    entry[cm.lat] = string.atof(str(coords['lat']))
                except (ValueError, KeyError, TypeError) as e:
                    cm.dump('Error in fetching lat: %s' % str(e), log_name)
                try:
                    entry[cm.lng] = string.atof(str(coords['lng']))
                except (ValueError, KeyError, TypeError) as e:
                    cm.dump('Error in fetching lng: %s' % str(e), log_name)

            gs.field_sense(entry)
            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            if ret[2] is not None and entry[cm.city_e] == '':
                entry[cm.city_e] = ret[2]
            gs.field_sense(entry)

            uid = u'%s|%s|%s|%s|%s,%s' % (
                entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e], entry[cm.country_e], unicode(entry[cm.lat]),
                unicode(entry[cm.lng]))
            if uid in store_map:
                cm.dump(u'%s already exists.' % uid)
                continue
            else:
                store_map[uid] = entry
                cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                    entry[cm.name_e], entry[cm.addr_e],
                                                                    entry[cm.country_e],
                                                                    entry[cm.continent_e]), log_name)
                db.insert_record(entry, 'stores')
                store_list.append(entry)
Beispiel #26
0
def fetch_stores(data):
    """
    获得门店的详细信息
    :rtype : [entries]
    :param data:
    """
    try:
        html = cm.get_data(data['url'])
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []

    entries = []
    start = html.find(u'<ul class="store-list">')
    if start == -1:
        return entries
    start += len(u'<ul class="store-list">')
    end = html.find(u'</ul>', start)
    html = html[start:end]

    for m1 in re.findall(ur'<li class="(.*?)">(.*?)</li>', html, re.S):
        store = cm.init_store_entry(brand_id, brandname_e, brandname_c)
        store[cm.store_type] = m1[0]
        sub_html = m1[1]
        m2 = re.findall(ur'<h3 class="store-name">(.*?)</h3>', sub_html)
        if len(m2) > 0:
            store[cm.name_e] = cm.reformat_addr(m2[0])
        m2 = re.findall(ur'<p class="store-address">(.*?)</p>', sub_html, re.S)
        if len(m2) > 0:
            store[cm.addr_e] = cm.reformat_addr(m2[0])

        cm.update_entry(
            store, {
                cm.continent_e: data[cm.continent_e].strip().upper(),
                cm.country_e: data[cm.country_e].strip().upper(),
                cm.city_e: data[cm.city_e].strip().upper()
            })

        entry = store
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e])
        if ret[0] is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = ret[0]
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

        print '%s: Found store: %s, %s (%s, %s)' % (
            brandname_e, store[cm.name_e], store[cm.addr_e],
            store[cm.country_e], store[cm.continent_e])
        db.insert_record(store, 'stores')
        entries.append(store)
Beispiel #27
0
            start = sub.find(ur'<dt>')
            m2 = re.search(ur'<dd>(.+?)</dd>', sub[m1.end():start], re.S)
            if m2 is not None:
                entry[cm.hours] = m2.strip()
        m1 = re.search(ur'<dt>Store Carries</dt>', sub)
        if m1 is not None:
            entry[cm.store_type] = cm.reformat_addr(
                cm.extract_closure(sub[m1.end():], ur'<dd>', ur'</dd>')[0])
        m1 = re.search(ur'<ul class="store-links">', sub)
        if m1 is not None:
            m2 = re.search(ur'<a href="([^"]+)"', sub[m1.end():])
            if m2 is not None:
                entry[cm.url] = m2.group(1)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]),
            log_name)
        # db.insert_record(entry, 'stores')
        store_list.append(entry)

    return store_list
Beispiel #28
0
def fetch_stores(data):
    url = data['data_url']
    param = {'lang': data['lang'], 'country': data['country_id'], 'region': data['region_id'],
             'city': data['city_id']}
    try:
        body = cm.get_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), 'tudor_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for store in (pq(tmp) for tmp in pq(body.encode('utf-8'))('dealer')):
        try:
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.country_e] = data['country_code']
            entry[cm.province_e] = data['region_name'].replace('PROVINCE', '').strip()
            entry[cm.city_e] = data['city_name']

            store_id = store[0].attrib['id']
            if store_id in id_set:
                if data['country_code'] == 'CN':
                    entry = id_set[store_id]

                    entry[cm.name_c] = cm.reformat_addr(store('name')[0].text).strip()
                    tmp = store('address')
                    entry[cm.addr_c] = cm.reformat_addr(tmp[0].text) if len(tmp) > 0 and tmp[0].text else ''
                    entry[cm.province_c] = data['region_name']
                    entry[cm.city_c] = data['city_name']

                    db.execute(u'DELETE FROM stores WHERE brand_id=%d AND native_id="%s"' % (
                        data['brand_id'], entry[cm.native_id]))
                    db.insert_record(entry, 'stores')
            else:
                entry[cm.native_id] = store_id

                entry[cm.name_e] = cm.reformat_addr(store('name')[0].text).strip()
                tmp = store('address')
                entry[cm.addr_e] = cm.reformat_addr(tmp[0].text) if len(tmp) > 0 and tmp[0].text else ''
                tmp = store('phone1')
                entry[cm.tel] = tmp[0].text.strip() if len(tmp) > 0 and tmp[0].text else ''

                tmp = store('fax1')
                entry[cm.fax] = tmp[0].text.strip() if len(tmp) > 0 and tmp[0].text else ''

                tmp = store('latitude')
                try:
                    entry[cm.lat] = string.atof(tmp[0].text) if len(tmp) > 0 and tmp[0].text else ''
                except (ValueError, KeyError, TypeError) as e:
                    cm.dump('Error in fetching lat: %s' % str(e), log_name)
                tmp = store('longitude')
                try:
                    entry[cm.lng] = string.atof(tmp[0].text) if len(tmp) > 0 and tmp[0].text else ''
                except (ValueError, KeyError, TypeError) as e:
                    cm.dump('Error in fetching lng: %s' % str(e), log_name)

                gs.field_sense(entry)
                ret = gs.addr_sense(entry[cm.addr_e])
                if ret[0] is not None and entry[cm.country_e] == '':
                    entry[cm.country_e] = ret[0]
                if ret[1] is not None and entry[cm.province_e] == '':
                    entry[cm.province_e] = ret[1]
                if ret[2] is not None and entry[cm.city_e] == '':
                    entry[cm.city_e] = ret[2]
                gs.field_sense(entry)
                cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                    entry[cm.name_e], entry[cm.addr_e],
                                                                    entry[cm.country_e],
                                                                    entry[cm.continent_e]), 'tudor_log.txt')
                db.insert_record(entry, 'stores')
                id_set[store_id] = entry
                store_list.append(entry)
        except (IndexError, TypeError) as e:
            print traceback.format_exc()
            continue

    return store_list
Beispiel #29
0
def fetch_store_details(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                data['brandname_c'])
    ret = gs.look_up(data['country'], 1)
    if ret is not None:
        entry[cm.country_e] = ret['name_e']
    m = re.search(ur'<span class="type">Address</span>\s*<p>(.+?)</p>', body,
                  re.S)
    if m is not None:
        addr = cm.reformat_addr(m.group(1))
        country, province, city = gs.addr_sense(addr)
        if country is not None and entry[cm.country_e] == '':
            entry[cm.country_e] = country
        if province is not None:
            entry[cm.province_e] = province
        if city is not None:
            entry[cm.city_e] = city
        entry[cm.addr_e] = addr

    m = re.search(ur'<span class="type">Phone</span>\s*<p>(.+?)</p>', body,
                  re.S)
    if m is not None:
        entry[cm.tel] = m.group(1)

    m = re.search(ur'<span class="type">Opening hours</span>\s*<p>(.+?)</p>',
                  body, re.S)
    if m is not None:
        entry[cm.hours] = cm.reformat_addr(m.group(1))

    m = re.search(ur'<span class="type">You can find</span>\s*<p>(.+?)</p>',
                  body, re.S)
    if m is not None:
        entry[cm.store_type] = cm.reformat_addr(m.group(1))

    m = re.search(ur'google.maps.LatLng\(\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)',
                  body, re.S)
    entry[cm.lat] = string.atof(m.group(1))
    entry[cm.lng] = string.atof(m.group(2))

    gs.field_sense(entry)
    print '(%s / %d) Found store: %s, %s (%s, %s)' % (
        data['brandname_e'], data['brand_id'], entry[cm.name_e],
        entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
    db.insert_record(entry, 'stores')
    return [entry]
Beispiel #30
0
def fetch_store_details(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching store details: %s' % url, 'debeers_log.txt')
        return []

    start = body.find(u'<div class="store-details">')
    if start == -1:
        cm.dump('Error in fetching store details: %s' % url, 'debeers_log.txt')
    body = cm.extract_closure(body[start:], ur'<div\b', ur'</div>')[0]

    entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])

    m = re.search(ur'<h2 class="store-name">(.+?)</h2>', body)
    if m is not None:
        entry[cm.name_e] = m.group(1).strip()

    m_addr = re.search(ur'<div class="store-address">(.+?)</div>', body, re.S)
    if m_addr is not None:
        addr = m_addr.group(1).strip()
        pat_tel = re.compile(ur'<p class="store-phone">(.+?)</p>', re.S)
        pat_fax = re.compile(ur'<p class="store-fax">(.+?)</p>', re.S)
        pat_email = re.compile(ur'<p class="store-email">(.+?)</p>', re.S)

        m = re.search(pat_tel, addr)
        if m is not None:
            entry[cm.tel] = m.group(1).strip()

        m = re.search(pat_fax, addr)
        if m is not None:
            entry[cm.fax] = m.group(1).strip()

        m = re.search(pat_email, addr)
        if m is not None:
            entry[cm.email] = m.group(1).strip()

        addr = re.sub(pat_tel, '', addr)
        addr = re.sub(pat_fax, '', addr)
        addr = re.sub(pat_email, '', addr)
        addr = re.sub(u'<h3>.+?</h3>', '', addr)
        addr = cm.reformat_addr(addr)
        entry[cm.addr_e] = addr
        country, province, city = gs.addr_sense(addr)
        if country is not None:
            entry[cm.country_e] = country
        if province is not None:
            entry[cm.province_e] = province
        if city is not None:
            entry[cm.city_e] = city

    m = re.search(ur'<div class="store-hours">(.+?)</div>', body, re.S)
    if m is not None:
        entry[cm.hours] = cm.reformat_addr(m.group(1))

    gs.field_sense(entry)
    cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                        entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                        entry[cm.continent_e]), 'benetton_log.txt', False)
    db.insert_record(entry, 'stores')

    return [entry]