Example #1
0
def fetch_cities(data):
    """
    城市列表
    :param data:
    """
    html = data['html']

    store_list = []
    while True:
        m = re.search(ur'<li class="expanded"><a href=".*?">(.+?)</a><br\s*?/>', html)
        if m is None:
            break
        html = html[m.start():]

        sub, start, end = cm.extract_closure(html, ur'<li\b', '</li>')
        html = html[end:]

        d = data.copy()
        d['html'] = sub[len(m.group(0)):-len('</li>')]
        terms = m.group(1).strip().upper().split(' ')
        if len(terms) > 1 and cm.is_chinese(terms[-1]):
            d['city_c'] = terms[-1].strip()
            terms = terms[:-1]
        d['city_e'] = ' '.join(terms)
        if d['country_e'] == 'USA':
            m1 = re.search(ur'([A-Z]{2})\s*-\s*(.+)', d['city_e'])
            if m1:
                d['city_e'] = m1.group(2).strip()
                d['province_e'] = m1.group(1).strip()
        print 'Processing %s' % d['city_e']
        store_list.extend(fetch_stores(d))

    return store_list
Example #2
0
def fetch_cities(data):
    """
    城市列表
    :param data:
    """
    html = data['html']

    store_list = []
    while True:
        m = re.search(
            ur'<li class="expanded"><a href=".*?">(.+?)</a><br\s*?/>', html)
        if m is None:
            break
        html = html[m.start():]

        sub, start, end = cm.extract_closure(html, ur'<li\b', '</li>')
        html = html[end:]

        d = data.copy()
        d['html'] = sub[len(m.group(0)):-len('</li>')]
        terms = m.group(1).strip().upper().split(' ')
        if len(terms) > 1 and cm.is_chinese(terms[-1]):
            d['city_c'] = terms[-1].strip()
            terms = terms[:-1]
        d['city_e'] = ' '.join(terms)
        if d['country_e'] == 'USA':
            m1 = re.search(ur'([A-Z]{2})\s*-\s*(.+)', d['city_e'])
            if m1:
                d['city_e'] = m1.group(2).strip()
                d['province_e'] = m1.group(1).strip()
        print 'Processing %s' % d['city_e']
        store_list.extend(fetch_stores(d))

    return store_list
Example #3
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    :return:
    """
    url = data["post_url"]
    try:

        js = json.loads(
            cm.post_data(
                url, {"country_id": data["country_id"], "retail_city": "", "retail_type": data["retail_type"]}
            ).decode("unicode_escape")
        )
    except Exception:
        print "Error occured in getting country list: %s" % url
        dump_data = {"level": 1, "time": cm.format_time(), "data": {"url": url}, "brand_id": data["brand_id"]}
        cm.dump(dump_data)
        return []

    # country_id=108&retail_city=&retail_type=retail
    # country_id=99&retail_city=&retail_type=service
    store_list = []
    for s in js:
        entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"])
        tmp = s["retail_name"].strip()
        if cm.is_chinese(tmp):
            entry[cm.name_c] = tmp
        else:
            entry[cm.name_e] = tmp
        entry[cm.addr_e] = s["retail_gmap"].strip()
        entry[cm.zip_code] = s["retail_zipcode"].strip()
        entry[cm.city_e] = s["retail_city"].strip().upper()
        if s["retail_email"] is not None:
            entry[cm.email] = s["retail_email"].strip()
        if s["retail_website"] is not None:
            entry[cm.url] = s["retail_website"].strip()
        if data["retail_type"] == "retail":
            entry[cm.store_class] = "Retail"
        else:
            entry[cm.store_class] = "Service Center"
        entry[cm.country_e] = s["country_name"].strip().upper()
        entry[cm.continent_e] = s["continent_name"].strip().upper()

        gs.field_sense(entry)
        print "(%s / %d) Found store: %s, %s (%s, %s)" % (
            data["brandname_e"],
            data["brand_id"],
            entry[cm.name_e],
            entry[cm.addr_e],
            entry[cm.country_e],
            entry[cm.continent_e],
        )
        store_list.append(entry)
        db.insert_record(entry, "stores")

    return store_list
Example #4
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    :return:
    """
    url = data['post_url']
    try:

        js = json.loads(cm.post_data(url, {'country_id': data['country_id'], 'retail_city': '',
                                           'retail_type': data['retail_type']}).decode('unicode_escape'))
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    # country_id=108&retail_city=&retail_type=retail
    # country_id=99&retail_city=&retail_type=service
    store_list = []
    for s in js:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        tmp = s['retail_name'].strip()
        if cm.is_chinese(tmp):
            entry[cm.name_c] = tmp
        else:
            entry[cm.name_e] = tmp
        entry[cm.addr_e] = s['retail_gmap'].strip()
        entry[cm.zip_code] = s['retail_zipcode'].strip()
        entry[cm.city_e] = s['retail_city'].strip().upper()
        if s['retail_email'] is not None:
            entry[cm.email] = s['retail_email'].strip()
        if s['retail_website'] is not None:
            entry[cm.url] = s['retail_website'].strip()
        if data['retail_type'] == 'retail':
            entry[cm.store_class] = 'Retail'
        else:
            entry[cm.store_class] = 'Service Center'
        entry[cm.country_e] = s['country_name'].strip().upper()
        entry[cm.continent_e] = s['continent_name'].strip().upper()

        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                          entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                          entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
Example #5
0
def fetch_stores(data):
    print '(%s/%d) Found city: %s' % (data['brandname_e'], data['brand_id'],
                                      data['city_e'])
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    # 处理重定向
    m = re.search('<h2>Object moved to <a href="(.+?)">', html)
    if m is not None:
        data['url'] = data['host'] + m.group(1)
        return fetch_countries(data)

    m = re.search('var\s+data\s*=\s*', html)
    if m is None:
        return []
    sub, start, end = cm.extract_closure(html[m.end():], r'\[', r'\]')
    if end == 0:
        return []

    store_list = []
    for s in json.loads(sub):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        name = s['Name']
        if cm.is_chinese(name):
            entry[cm.name_c] = name
        else:
            entry[cm.name_e] = name
        entry[cm.addr_e] = cm.html2plain(s['Street'])
        entry[cm.city_e] = cm.extract_city(data['city_e'])[0]
        entry[cm.country_e] = data['country_e']
        entry[cm.province_e] = data['province_e']
        pat = re.compile(ur'tel[\.: ]*', re.I)
        entry[cm.tel] = re.sub(pat, '', s['Phone']).strip()
        pat = re.compile(ur'fax[\.: ]*', re.I)
        entry[cm.fax] = re.sub(pat, '', s['Fax']).strip()
        entry[cm.email] = s['Email'].strip()
        entry[cm.url] = s['Website'].strip()
        coord = s['LatLng']
        if coord is not None and len(coord) >= 2:
            if coord[0] is not None:
                entry[cm.lat] = string.atof(coord[0])
            if coord[1] is not None:
                entry[cm.lng] = string.atof(coord[1])

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s/%d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
Example #6
0
     tmp = cm.reformat_addr(m)
     terms = tmp.split(',')
     ret = gs.look_up(terms[-1], 1)
     if ret is not None:
         # t2 = cm.geo_translate(terms[-1])
         # if len(t2) != 0:
         # 这是一个国家
         # 把最后的国家项分离出来
         street_addr = ', '.join(terms[:-1])
         entry[cm.addr_e] = cm.reformat_addr(street_addr)
         entry[cm.country_c] = ret['name_c']
         entry[cm.country_e] = ret['name_e']
         entry[cm.continent_c] = ret['continent']['name_c']
         entry[cm.continent_e] = ret['continent']['name_e']
     else:
         if cm.is_chinese(tmp):
             entry[cm.addr_c] = tmp
         else:
             entry[cm.addr_e] = tmp
 else:
     street_addr = ', '.join([street_addr, zip_code, city])
     entry[cm.addr_e] = cm.reformat_addr(street_addr)
     ret = gs.look_up(country, 1)
     if ret is None:
         # t2 = cm.geo_translate(country)
         # if len(t2) == 0:
         entry[cm.country_c] = country
     else:
         entry[cm.country_c] = ret['name_c']
         entry[cm.country_e] = ret['name_e']
         entry[cm.continent_c] = ret['continent']['name_c']
Example #7
0
def fetch(level=1, data=None, user='******', passwd=''):
    db = common.StoresDb()
    db.connect_db(user=user, passwd=passwd)
    db.execute(u'DELETE FROM %s WHERE brand_id=%d' % ('stores', brand_id))

    data = {
        's': -89,
        'w': -179,
        'n': 89,
        'e': 179,
        'chinese': 0,
        'repair': 1,
        'store': 1
    }
    try:
        html = common.get_data(url_init, data)
    except Exception:
        print 'Error occured in getting the list of countries: %s' % url_init
        dump_data = {
            'level': 1,
            'time': common.format_time(),
            'data': {
                'data': url_init
            },
            'brand_id': brand_id
        }
        common.dump(dump_data)
        return []

    store_list = []

    store_map = json.loads(html)
    tot = 0
    while True:
        # 得到{'uid':entry}的字典
        tmp = store_map['lists']
        # 是否有'more'
        flag = False
        if 'has_key' not in dir(tmp):
            raw_stores = {}
            for item in tmp:
                if 'more' in item:
                    flag = item['more']
                else:
                    raw_stores[item['nid']] = item
        else:
            raw_stores = tmp
            for k in tmp:
                if 'more' in tmp[k]:
                    flag = tmp[k]['more']
                    break

        # 分析raw_stores
        for k in raw_stores:
            s = raw_stores[k]
            if 'more' in s:
                flag = s['more']
            else:
                entry = common.init_store_entry(brand_id, brandname_e,
                                                brandname_c)

                if s['country'] is not None:
                    country_c = s['country'].strip().upper()
                    ret = gs.look_up(country_c, 1)
                    if ret is not None:
                        entry[common.country_e] = ret['name_e']
                        entry[common.country_c] = ret['name_c']
                    else:
                        if common.is_chinese(country_c):
                            entry[common.country_c] = country_c
                        else:
                            entry[common.country_e] = country_c

                if s['address'] is not None:
                    addr = common.reformat_addr(s['address'])
                    if common.is_chinese(addr):
                        entry[common.addr_c] = addr
                    else:
                        entry[common.addr_e] = addr

                city = s['city']
                if city is not None:
                    city = city.strip().upper()
                    ret = gs.look_up(city, 3)
                    if ret is not None:
                        entry[common.city_c] = ret['name_c']
                        entry[common.city_e] = ret['name_e']
                    else:
                        if common.is_chinese(city):
                            entry[common.city_c] = city
                        else:
                            entry[common.city_e] = city

                entry[common.city_e] = common.extract_city(
                    entry[common.city_e])[0]

                if s['email'] is not None:
                    entry[common.email] = s['email']
                if s['fax'] is not None:
                    entry[common.fax] = s['fax']
                if s['latitude'] is not None:
                    entry[common.lat] = string.atof(s['latitude'])
                if s['longitude'] is not None:
                    entry[common.lng] = string.atof(s['longitude'])
                if s['phone'] is not None:
                    entry[common.tel] = s['phone']
                if s['postal_code'] is not None:
                    entry[common.zip_code] = s['postal_code']

                if s['title'] is not None:
                    name = s['title']
                    if common.is_chinese(name):
                        entry[common.name_c] = name
                    else:
                        entry[common.name_e] = name

                if s['operating_hours'] is not None:
                    entry[common.hours] = s['operating_hours']
                if s['url'] is not None:
                    entry[common.url] = host + s['url']

                gs.field_sense(entry)

                print '%s: Found store: %s, %s (%s, %s)' % (
                    brandname_e, entry[common.name_e], entry[common.addr_e],
                    entry[common.country_e], entry[common.continent_e])
                db.insert_record(entry, 'stores')
                store_list.append(entry)

        if flag:
            tot += len(store_map['lists']) - 1
            data['offset'] = tot
            store_map = json.loads(common.get_data(url_more, data))
            continue
        else:
            tot += len(store_map['lists'])
            break
    print 'Found a total of %d stores.' % tot
    db.disconnect_db()
    return store_list
Example #8
0
def fetch(level=1, data=None, user='******', passwd=''):
    db = common.StoresDb()
    db.connect_db(user=user, passwd=passwd)
    db.execute(u'DELETE FROM %s WHERE brand_id=%d' % ('stores', brand_id))

    data = {'s': -89, 'w': -179, 'n': 89, 'e': 179, 'chinese': 0, 'repair': 1, 'store': 1}
    try:
        html = common.get_data(url_init, data)
    except Exception:
        print 'Error occured in getting the list of countries: %s' % url_init
        dump_data = {'level': 1, 'time': common.format_time(), 'data': {'data': url_init}, 'brand_id': brand_id}
        common.dump(dump_data)
        return []

    store_list = []

    store_map = json.loads(html)
    tot = 0
    while True:
        # 得到{'uid':entry}的字典
        tmp = store_map['lists']
        # 是否有'more'
        flag = False
        if 'has_key' not in dir(tmp):
            raw_stores = {}
            for item in tmp:
                if 'more' in item:
                    flag = item['more']
                else:
                    raw_stores[item['nid']] = item
        else:
            raw_stores = tmp
            for k in tmp:
                if 'more' in tmp[k]:
                    flag = tmp[k]['more']
                    break

        # 分析raw_stores
        for k in raw_stores:
            s = raw_stores[k]
            if 'more' in s:
                flag = s['more']
            else:
                entry = common.init_store_entry(brand_id, brandname_e, brandname_c)

                if s['country'] is not None:
                    country_c = s['country'].strip().upper()
                    ret = gs.look_up(country_c, 1)
                    if ret is not None:
                        entry[common.country_e] = ret['name_e']
                        entry[common.country_c] = ret['name_c']
                    else:
                        if common.is_chinese(country_c):
                            entry[common.country_c] = country_c
                        else:
                            entry[common.country_e] = country_c

                if s['address'] is not None:
                    addr = common.reformat_addr(s['address'])
                    if common.is_chinese(addr):
                        entry[common.addr_c] = addr
                    else:
                        entry[common.addr_e] = addr

                city = s['city']
                if city is not None:
                    city = city.strip().upper()
                    ret = gs.look_up(city, 3)
                    if ret is not None:
                        entry[common.city_c] = ret['name_c']
                        entry[common.city_e] = ret['name_e']
                    else:
                        if common.is_chinese(city):
                            entry[common.city_c] = city
                        else:
                            entry[common.city_e] = city

                entry[common.city_e] = common.extract_city(entry[common.city_e])[0]

                if s['email'] is not None:
                    entry[common.email] = s['email']
                if s['fax'] is not None:
                    entry[common.fax] = s['fax']
                if s['latitude'] is not None:
                    entry[common.lat] = string.atof(s['latitude'])
                if s['longitude'] is not None:
                    entry[common.lng] = string.atof(s['longitude'])
                if s['phone'] is not None:
                    entry[common.tel] = s['phone']
                if s['postal_code'] is not None:
                    entry[common.zip_code] = s['postal_code']

                if s['title'] is not None:
                    name = s['title']
                    if common.is_chinese(name):
                        entry[common.name_c] = name
                    else:
                        entry[common.name_e] = name

                if s['operating_hours'] is not None:
                    entry[common.hours] = s['operating_hours']
                if s['url'] is not None:
                    entry[common.url] = host + s['url']

                gs.field_sense(entry)

                print '%s: Found store: %s, %s (%s, %s)' % (
                    brandname_e, entry[common.name_e], entry[common.addr_e], entry[common.country_e],
                    entry[common.continent_e])
                db.insert_record(entry, 'stores')
                store_list.append(entry)

        if flag:
            tot += len(store_map['lists']) - 1
            data['offset'] = tot
            store_map = json.loads(common.get_data(url_more, data))
            continue
        else:
            tot += len(store_map['lists'])
            break
    print 'Found a total of %d stores.' % tot
    db.disconnect_db()
    return store_list
Example #9
0
def fetch_stores(data):
    print '(%s/%d) Found city: %s' % (data['brandname_e'], data['brand_id'], data['city_e'])
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    # 处理重定向
    m = re.search('<h2>Object moved to <a href="(.+?)">', html)
    if m is not None:
        data['url'] = data['host'] + m.group(1)
        return fetch_countries(data)

    m = re.search('var\s+data\s*=\s*', html)
    if m is None:
        return []
    sub, start, end = cm.extract_closure(html[m.end():], r'\[', r'\]')
    if end == 0:
        return []

    store_list = []
    for s in json.loads(sub):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        name = s['Name']
        if cm.is_chinese(name):
            entry[cm.name_c] = name
        else:
            entry[cm.name_e] = name
        entry[cm.addr_e] = cm.html2plain(s['Street'])
        entry[cm.city_e] = cm.extract_city(data['city_e'])[0]
        entry[cm.country_e] = data['country_e']
        entry[cm.province_e] = data['province_e']
        pat = re.compile(ur'tel[\.: ]*', re.I)
        entry[cm.tel] = re.sub(pat, '', s['Phone']).strip()
        pat = re.compile(ur'fax[\.: ]*', re.I)
        entry[cm.fax] = re.sub(pat, '', s['Fax']).strip()
        entry[cm.email] = s['Email'].strip()
        entry[cm.url] = s['Website'].strip()
        coord = s['LatLng']
        if coord is not None and len(coord) >= 2:
            if coord[0] is not None:
                entry[cm.lat] = string.atof(coord[0])
            if coord[1] is not None:
                entry[cm.lng] = string.atof(coord[1])

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)

        print '(%s/%d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                        entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                        entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
Example #10
0
     tmp = cm.reformat_addr(m)
     terms = tmp.split(',')
     ret = gs.look_up(terms[-1], 1)
     if ret is not None:
     # t2 = cm.geo_translate(terms[-1])
     # if len(t2) != 0:
         # 这是一个国家
         # 把最后的国家项分离出来
         street_addr = ', '.join(terms[:-1])
         entry[cm.addr_e] = cm.reformat_addr(street_addr)
         entry[cm.country_c] = ret['name_c']
         entry[cm.country_e] = ret['name_e']
         entry[cm.continent_c] = ret['continent']['name_c']
         entry[cm.continent_e] = ret['continent']['name_e']
     else:
         if cm.is_chinese(tmp):
             entry[cm.addr_c] = tmp
         else:
             entry[cm.addr_e] = tmp
 else:
     street_addr = ', '.join([street_addr, zip_code, city])
     entry[cm.addr_e] = cm.reformat_addr(street_addr)
     ret = gs.look_up(country, 1)
     if ret is None:
     # t2 = cm.geo_translate(country)
     # if len(t2) == 0:
         entry[cm.country_c] = country
     else:
         entry[cm.country_c] = ret['name_c']
         entry[cm.country_e] = ret['name_e']
         entry[cm.continent_c] = ret['continent']['name_c']