Exemple #1
0
def get_cities(data):
    try:
        d = {"country": data["country_code"], "city": "", "service": -1}
        html = common.post_data(url, d)
    except Exception:
        print "Error occured in getting the list of countries: %s" % url
        dump_data = {"level": 1, "time": common.format_time(), "data": {"data": url}, "brand_id": brand_id}
        common.dump(dump_data)
        return []

    start = html.find(u'<select id="city" name="city">')
    if start == -1:
        return []
    end = html.find(u"</select>", start)
    html = html[start:end]
    city_list = []
    for m in re.findall(ur'<option value="(.+?)">', html):
        if data["country_code"] == "GB" and "2 davies street" in m.lower():
            continue
        elif data["country_code"] == "RO" and "13 september street" in m.lower():
            continue
        elif "b1603daq" in m.lower():
            continue
        else:
            city_list.append({"city_e": m, "country_e": data["country_e"], "country_code": data["country_code"]})
Exemple #2
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url)
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []
Exemple #3
0
def fetch_cities(data):
    """
    获得城市列表
    :param data:
    """
    url = data['post_url']
    try:
        html = cm.post_data(
            url, {
                'pid': data['country_id'],
                'lang': 'en',
                'action': 'popola_select_city'
            })
    except Exception:
        print 'Error occured in getting city list: %s' % url
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    city_list = []
    for m in re.findall(ur'<option value="(\d+)".*?>(.+?)</option>', html):
        if m[0] != '0':
            d = data.copy()
            d['city_id'] = string.atoi(m[0])
            d['city_e'] = m[1].strip().upper()
            city_list.append(d)
Exemple #4
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url, {'rsp': 'json', 'country': data['country_code']})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    store_list = []
    for s in raw['stores']:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = cm.html2plain(s['name']).strip()

        addr_list = []
        for key in ['address1', 'address2']:
            if s[key].strip() != '':
                addr_list.append(cm.reformat_addr(s[key]))
        entry[cm.addr_e] = ' '.join(addr_list)

        # r=s['region'].strip().upper()
        # m = re.search(ur'\b([A-Z]{2})\b', r)
        # if data[cm.country_e]=='UNITED STATES' and m is not None:
        #     # 美国
        #     ret = gs.look_up(m.group(1), 2)
        #     if ret is not None:
        #         r = ret['name_e']
        # entry[cm.province_e] = r

        entry[cm.city_e] = cm.extract_city(s['city'])[0]
        entry[cm.zip_code] = s['zip'].strip()
        entry[cm.country_e] = data[cm.country_e]
        entry[cm.lat] = string.atof(s['lat'])
        entry[cm.lng] = string.atof(s['lng'])
        entry[cm.tel] = s['phone'].strip()
        entry[cm.fax] = s['fax'].strip()
        entry[cm.email] = s['emailaddress'].strip()
        entry[cm.url] = s['website'].strip()

        days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
        opening = []
        if 'openingHours' in s and s['openingHours'] is not None:
            for m in re.finditer(ur'i:(\d);s:\d+:\\?"([^\\"]+?)\\?"', s['openingHours']):
                opening.append('%s: %s' % (days[string.atoi(m.group(1))], m.group(2).strip()))
            entry[cm.hours] = ', '.join(opening)

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                              entry[cm.name_e], entry[cm.addr_e], entry[cm.city_e],
                                                              entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Exemple #5
0
def fetch_cities(data):
    country_id = data['country_id']
    try:
        html = cm.post_data(url, {'country_id': country_id})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': data,
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []

    start = html.find('<select name="city_id" id="city_id">')
    if start == -1:
        return []
    start += len('<select name="city_id" id="city_id">')
    end = html.find('</select>', start)
    html = html[start:end]
    city_list = []
    for m in re.findall(ur'<option\s.*?value="(\d+).*?">(.*?)</option>', html):
        entry = {'city': m[1].strip().upper(), 'city_id': string.atoi(m[0])}
        entry['country'] = data['country']
        entry['country_id'] = data['country_id']
        city_list.append(entry)
Exemple #6
0
def get_cities(data):
    try:
        d = {'country': data['country_code'], 'city': '', 'service': -1}
        html = common.post_data(url, d)
    except Exception:
        print 'Error occured in getting the list of countries: %s' % url
        dump_data = {'level': 1, 'time': common.format_time(), 'data': {'data': url}, 'brand_id': brand_id}
        common.dump(dump_data)
        return []

    start = html.find(u'<select id="city" name="city">')
    if start == -1:
        return []
    end = html.find(u'</select>', start)
    html = html[start:end]
    city_list = []
    for m in re.findall(ur'<option value="(.+?)">', html):
        if data['country_code'] == 'GB' and '2 davies street' in m.lower():
            continue
        elif data['country_code'] == 'RO' and '13 september street' in m.lower():
            continue
        elif 'b1603daq' in m.lower():
            continue
        else:
            city_list.append({'city_e': m, 'country_e': data['country_e'], 'country_code': data['country_code']})
Exemple #7
0
def fetch_cities(data):
    url = data['sel_url']
    try:
        body = cm.post_data(
            url, {
                'continent': data['continent'],
                'country': data['country'],
                'city': '',
                'page': 0
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    raw = json.loads(body)
    city_list = []
    for c in raw['city']:
        d = data.copy()
        d['city'] = c
        city_list.append(d)
    return city_list
Exemple #8
0
def fetch_store_list(data):
    url = data['data_url']
    param = {
        'storelocator': 1,
        'dofilter': 1,
        'L': 0,
        'map_sw': '-90.0%2C-180.0',
        'map_ne': '90.0%2C180.0',
        'slst': 'c',
        'storetype': 1
    }
    # storelocator=1&dofilter=1&L=0&map_sw=-90.0%2C-180.0&map_ne=90.0%2C180.0&slst=c&storetype=1
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    results = []
    for m in re.findall(ur'"id"\s*:\s*"([^"]+)"', body):
        tmp = m.split(',')
        for val in tmp:
            d = data.copy()
            d['store_id'] = string.atoi(val)
            results.append(d)
Exemple #9
0
def fetch_store_list(data):
    url = data['url']
    try:
        body = cm.post_data(url, {
            'cCode': data['country_code'],
            'city': data['city'],
            'postsearch': 1
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    results = []
    for m in re.finditer(ur'<td class\s*=\s*"ftd"', body):
        end = body.find('</tr>', m.start())
        sub = body[m.start():end]
        m1 = re.search(ur'<td class="ltd"><a href="(.+?)">', sub)
        if m1 is None:
            print 'Cannot find details: %s / %s' % (data['country_code'],
                                                    data['city'])
        else:
            d = data.copy()
            d['url'] = data['host'] + m1.group(1)
            results.append(d)
Exemple #10
0
def get_countries(data):
    """
    返回国家列表
    :rtype : [{'country_id':**, 'country':**}, ...]
    :param data:
    :return:
    """
    url = data['url']
    try:
        html = cm.post_data(url, {'country': -1, 'city': -1, 'recordit': -1})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []

    ret = []
    for m in re.findall(
            ur'<li>\s*?<a href=.+?country-(\d+).+?">(.+?)<\\/a><\\/li>', html,
            re.S):
        country_id = string.atoi(m[0].strip())
        country = m[1].replace(r'\r', '').replace(r'\n', '').strip().upper()
        ret.append({'country_id': country_id, 'country': country, 'url': url})
Exemple #11
0
def get_store_list(data):
    """
    获得城市中的商店列表
    :param data:
    :return:
    """
    url = data['url']
    try:
        html = cm.post_data(url, {
            'country': data['country_id'],
            'city': data['city_id'],
            'recordid': -1
        })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []
    pass

    ret = []
    for m in re.findall(ur'<a href=.+?store-(\d+).+?">', html, re.S):
        store_id = string.atoi(m.strip())
        entry = dict(data)
        entry['store_id'] = store_id
        ret.append(entry)
Exemple #12
0
def fetch_cities(data, logger):
    """
    获得城市列表
    :param data:
    """
    url = data['post_url']
    try:
        html = cm.post_data(
            url, {
                'pid': data['country_id'],
                'lang': 'en',
                'action': 'popola_select_city'
            })
        body = pq(html)
    except Exception:
        print 'Error occured in getting city list: %s' % url
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    city_list = []
    for item in body('option[value!="0"]'):
        d = data.copy()
        d['city_id'] = int(item.attrib['value'])
        d['city_e'] = item.text.upper().strip()
        city_list.append(d)

    return city_list
Exemple #13
0
def fetch_cities(data):
    url = data['url']
    try:
        body = cm.post_data(
            url, {
                'searchtype': 'normal',
                'reiter_selected': 'reiter1',
                'country_id': data['country_code'],
                'city_id': 0
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    m = re.search(ur'<option value="0"[^>]*>city</option>', body)
    if m is None:
        return []
    end = body.find(u'</select>', m.end())

    city_list = []
    for c in re.findall(ur'<option value="(.+?)"[^>]*>.+?</option>',
                        body[m.end():end]):
        d = data.copy()
        d['city'] = c
        city_list.append(d)
Exemple #14
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url)
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []
Exemple #15
0
def fetch_stores(data):
    param = {'action': 'getStoresFromAjax', 'country': data['country_code'],
             'region': data['city'], 'collection': ''}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for m1 in re.finditer(ur'<div class="shop-type-container">', body):
        sub = cm.extract_closure(body[m1.start():], ur'<div\b', ur'</div>')[0]
        store_class = ''
        m2 = re.search(ur'<div class="shop-type-title">(.+?)</div>', sub, re.S)
        if m2 is not None:
            store_class = cm.reformat_addr(m2.group(1))

        for m2 in re.finditer(ur'<div class="shop"', sub):
            store_sub = cm.extract_closure(sub[m2.start():], ur'<div\b', ur'</div>')[0]
            entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
            entry[cm.store_class] = store_class
            entry[cm.country_e] = data['country_code']
            entry[cm.city_e] = cm.extract_city(data['city'])[0]

            m3 = re.search(ur'loadStore\((\d+)\s*,\s*(-?\d+\.\d+)\s*,\s*(-?\d+\.\d+)\)', store_sub)
            if m3 is not None:
                data['store_id'] = string.atoi(m3.group(1))
                entry[cm.lat] = string.atof(m3.group(2))
                entry[cm.lng] = string.atof(m3.group(3))
                entry[cm.store_type] = ', '.join(get_detail(data))

            m3 = re.search(ur'<div class="shop-name shop-details shop-main-name">([^<>]+)</div>', store_sub)
            if m3 is not None:
                entry[cm.name_e] = m3.group(1).strip()
            addr_list = []
            m3 = re.search(ur'<div class="shop-street shop-details">([^<>]+)</div>', store_sub)
            if m3 is not None:
                addr_list.append(cm.reformat_addr(m3.group(1)))
            m3 = re.search(ur'<div class="shop-city shop-details">([^<>]+)</div>', store_sub)
            if m3 is not None:
                tmp = cm.reformat_addr(m3.group(1))
                m3 = re.search(ur'(\d{4,})', tmp)
                if m3 is not None:
                    entry[cm.zip_code] = m3.group(1).strip()
                addr_list.append(tmp)
            entry[cm.addr_e] = ', '.join(addr_list)

            ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
            if ret[1] is not None and entry[cm.province_e] == '':
                entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
            cm.dump('(%s / %d) Found store: %s, %s (%s, %s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                    entry[cm.name_e], entry[cm.addr_e],
                                                                    entry[cm.city_e],
                                                                    entry[cm.country_e],
                                                                    entry[cm.continent_e]), log_name)
            db.insert_record(entry, 'stores')
            store_list.append(entry)
Exemple #16
0
def fetch_stores_eu(data):
    url = data['data_url_eu']
    param = {'query': data['city_code']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()
Exemple #17
0
def fetch_stores_eu(data):
    url = data['data_url_eu']
    param = {'query': data['city_code']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()
Exemple #18
0
def fetch_store_list(data):
    url = data['data_url']
    param = {'zone': data['zone_id']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching store list: %s, %s' % (url, param), log_name)
        return []
Exemple #19
0
def fetch_cities(data):
    url = data['url']
    param = {'ref': 'com', 'fld_Coun_Id': data['country_id'], 'search': data['search_type']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemple #20
0
def fetch_countries(data):
    url = data['data_url']
    param = {'node_id': data['continent_id'], 'location_id': 0}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching countries: %s, %s' % (url, param), log_name)
        return ()
Exemple #21
0
def fetch_stores(data):
    url = data['data_url']
    param = {'countryId': data['country_id']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
Exemple #22
0
def fetch_states(data):
    url = data['data_url']
    param = {'node_id': data['country_id'], 'location_id': 1}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching states: %s, %s' % (url, param), log_name)
        return ()
Exemple #23
0
def fetch_stores(data):
    url = data['url']
    param = {'searchinput': data['country']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
Exemple #24
0
def fetch_stores(data):
    url = data['data_url']
    param = {'action': 'yoox_storelocator_change_city', 'city_id': data['city_id'], 'dataType': 'JSON'}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
Exemple #25
0
def fetch_stores(db, data, logger):
    """
    获得商店信息
    :param data:
    """
    url = data['post_url']
    try:
        html = cm.post_data(url, {
            'pid': data['city_id'],
            'lang': 'en',
            'action': 'popola_box_DX'
        })
        if html.strip() == u'':
            logger.error(
                unicode.format(u'Failed to fetch stores for city {0}',
                               data['city_id']))
            return []
        body = pq(html)
    except Exception as e:
        print 'Error occured in getting city list: %s' % url
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    for item in (pq(temp) for temp in body('a[href]')):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.url] = item[0].attrib['href']
        entry[cm.name_e] = item('h3.titleShop')[0].text.strip()

        # terms = cm.reformat_addr(item('div.txtBoxSingleStore p.lineHeight14')[0].text).split(',')
        terms = cm.reformat_addr(
            unicode(item('div.txtBoxSingleStore p.lineHeight14'))).split(',')
        tel = cm.extract_tel(terms[-1])
        if tel != '':
            terms = terms[:-1]
            entry[cm.tel] = tel
        entry[cm.addr_e] = u', '.join([v.strip() for v in terms])
        entry['country_e'] = data['country_e']
        entry['city_e'] = data['city_e']
        gs.field_sense(entry)

        logger.info(
            '(%s / %d) Found store: %s, %s (%s, %s)' %
            (data['brandname_e'], data['brand_id'], entry[cm.name_e],
             entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e]))
        store_list.append(entry)
        cm.insert_record(db, entry, 'spider_stores.stores')

    return store_list
Exemple #26
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url, {}, {'Content-Type': 'application/json; charset=utf-8',
                                      'Content-Length': 0, 'Connection': 'keep-alive',
                                      'Pragma': 'no-cache', 'Cache-Control': 'no-cache'})
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, e), log_name)
        return []
Exemple #27
0
def fetch_store_list(data):
    url = data['data_url']
    param = {'zone': data['zone_id']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching store list: %s, %s' % (url, param),
                log_name)
        return []
Exemple #28
0
def fetch_stores_beauty(data):
    url = data['lst_url']
    param = {'chkCat[0]': 'FRG', 'chkCat[1]': 'MKP', 'chkCat[2]': 'PRE', 'chkCat[3]': 'EXC', 'div': 'fnb',
             'lstCountry': data['country_code'], 'lstCity': data['city']}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return ()
Exemple #29
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    :return:
    """
    url = data["post_url"]
    try:

        js = json.loads(
            cm.post_data(
                url, {"country_id": data["country_id"], "retail_city": "", "retail_type": data["retail_type"]}
            ).decode("unicode_escape")
        )
    except Exception:
        print "Error occured in getting country list: %s" % url
        dump_data = {"level": 1, "time": cm.format_time(), "data": {"url": url}, "brand_id": data["brand_id"]}
        cm.dump(dump_data)
        return []

    # country_id=108&retail_city=&retail_type=retail
    # country_id=99&retail_city=&retail_type=service
    store_list = []
    for s in js:
        entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"])
        tmp = s["retail_name"].strip()
        if cm.is_chinese(tmp):
            entry[cm.name_c] = tmp
        else:
            entry[cm.name_e] = tmp
        entry[cm.addr_e] = s["retail_gmap"].strip()
        entry[cm.zip_code] = s["retail_zipcode"].strip()
        entry[cm.city_e] = s["retail_city"].strip().upper()
        if s["retail_email"] is not None:
            entry[cm.email] = s["retail_email"].strip()
        if s["retail_website"] is not None:
            entry[cm.url] = s["retail_website"].strip()
        if data["retail_type"] == "retail":
            entry[cm.store_class] = "Retail"
        else:
            entry[cm.store_class] = "Service Center"
        entry[cm.country_e] = s["country_name"].strip().upper()
        entry[cm.continent_e] = s["continent_name"].strip().upper()

        gs.field_sense(entry)
        print "(%s / %d) Found store: %s, %s (%s, %s)" % (
            data["brandname_e"],
            data["brand_id"],
            entry[cm.name_e],
            entry[cm.addr_e],
            entry[cm.country_e],
            entry[cm.continent_e],
        )
        store_list.append(entry)
        db.insert_record(entry, "stores")

    return store_list
Exemple #30
0
def fetch_stores(data):
    url = data['url']
    lat, lng = data['city_lat'], data['city_lng']
    param = {'json': 1, 'lat': lat, 'lng': lng, 'latLow': lat - 0.25, 'latHigh': lat + 0.25, 'lngLow': lng - 0.25,
             'lngHigh': lng + 0.25, 'includeResellers': 'true'}
    try:
        body = cm.post_data('%s?%s' % (url, urllib.urlencode(param)))
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()
Exemple #31
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    """
    url = data['post_url']
    try:
        html = cm.post_data(url, {
            'pid': data['city_id'],
            'lang': 'en',
            'action': 'popola_box_DX'
        })
    except Exception:
        print 'Error occured in getting city list: %s' % url
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<a href="(.+?)".*?>', html):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'],
                                    data['brandname_c'])
        entry[cm.url] = m.group(1)
        store_html, start, end = cm.extract_closure(html[m.start():],
                                                    ur'<a href', ur'</a>')
        if end == 0:
            continue
        m1 = re.findall(ur'<h3 class="titleShop">(.+?)</h3>', store_html, re.S)
        if len(m1) > 0:
            entry[cm.name_e] = m1[0].strip()
        m1 = re.findall(ur'<p\b.*?>(.+?)(?:</p>|</div>)', store_html, re.S)
        if len(m1) > 0:
            terms = cm.reformat_addr(m1[0]).split(',')
            tel = cm.extract_tel(terms[-1])
            if tel != '':
                terms = terms[:-1]
                entry[cm.tel] = tel
            entry[cm.addr_e] = ', '.join([v.strip() for v in terms])

        entry['country_e'] = data['country_e']
        entry['city_e'] = data['city_e']
        gs.field_sense(entry)

        print '(%s / %d) Found store: %s, %s (%s, %s)' % (
            data['brandname_e'], data['brand_id'], entry[cm.name_e],
            entry[cm.addr_e], entry[cm.country_e], entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Exemple #32
0
def fetch_stores(data):
    url = data['store_url']
    param = {'store_country': data['country_code'], 'store_city': data['city_code']}
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []

    store_list = []
    for s in re.findall(ur'<marker\b([^<>]+)/\s*>', body):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m = re.search(ur'store_name="([^"]+)"', s)
        if m is not None:
            entry[cm.name_e] = cm.reformat_addr(m.group(1))
        entry[cm.country_e] = data['country_code']
        entry[cm.city_e] = data['city']
        addr_list = []
        for key in ['store_mall_name', 'store_address', 'store_zip_code']:
            m = re.search(ur'%s="([^"]+)"' % key, s)
            if m is not None:
                tmp = cm.reformat_addr(m.group(1))
                if tmp != '':
                    addr_list.append(tmp)
        entry[cm.addr_e] = ', '.join(addr_list)
        m = re.search(ur'store_zip_code="([^"]+)"', s)
        if m is not None:
            entry[cm.zip_code] = m.group(1).strip()
        m = re.search(ur'store_telephone="([^"]+)"', s)
        if m is not None:
            entry[cm.tel] = m.group(1).strip()
        m = re.search(ur'store_fax="([^"]+)"', s)
        if m is not None:
            entry[cm.fax] = m.group(1).strip()
        m = re.search(ur'store_email="([^"]+)"', s)
        if m is not None:
            entry[cm.email] = m.group(1).strip()
        m = re.search(ur'store_latitude="([^"]+)"', s)
        if m is not None:
            entry[cm.lat] = string.atof(m.group(1).strip())
        m = re.search(ur'store_longitude="([^"]+)"', s)
        if m is not None:
            entry[cm.lng] = string.atof(m.group(1).strip())

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        gs.field_sense(entry)

        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
Exemple #33
0
def fetch_stores(data):
    url = data['data_url']
    state_id = data['state_id']
    if state_id is None:
        state_id = 0
    param = {'request': 'setStores', 'c': data['country_id'], 's': state_id, 'l': data['city_id'], 'ca': ''}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []
Exemple #34
0
def fetch_cities(data):
    url = data['data_url']
    state_id = data['state_id']
    if state_id is None:
        param = {'request': 'countryChange', 'c': data['country_id']}
    else:
        param = {'request': 'stateChange', 'c': data['country_id'], 's': state_id}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
Exemple #35
0
def fetch_cities(data):
    url = data['url']
    param = {
        'ref': 'com',
        'fld_Coun_Id': data['country_id'],
        'search': data['search_type']
    }
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemple #36
0
def fetch_stores(data):
    url = data['data_url']
    param = {'tx_iostorefinder_pi1[__referrer][extensionName]': 'IoStorefinder',
             'tx_iostorefinder_pi1[__referrer][controllerName]': 'Store',
             'tx_iostorefinder_pi1[__referrer][actionName]': 'search',
             'tx_iostorefinder_pi1[countryid]': data['country_code'],
             'tx_iostorefinder_pi1[zip]': 'POSTCODE', 'tx_iostorefinder_pi1[city]': 'Town'}
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()
Exemple #37
0
def get_stores(data):
    # data[StoreLocator][pays]=BO
    url = data['url']
    try:
        html = cm.post_data(url, {'data[StoreLocator][pays]': data['country_code'],
                                  'data[StoreLocator][ville]': '',
                                  'data[StoreLocator][etat]': 0})
    except Exception, e:
        print 'Error occured: %s, %s' % (url, str(e))
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id}
        cm.dump(dump_data)
        return []
Exemple #38
0
def fetch_cities(data):
    url = data['data_url']
    if data['no_region']:
        param = {'node_id': data['country_id'], 'location_id': 2, 'no_region': 1}
    else:
        param = {'node_id': data['state_id'], 'location_id': 2}

    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemple #39
0
def fetch_cities(data):
    url = data['data_url']
    param = {'DestinationURL': 'Worldwide-retailers', '(country)': data['country_code']}
    try:
        body = cm.post_data(url, param)
        # m = re.search(ur'META HTTP-EQUIV="Location"\s+Content="([^"]+)"', body)
        # if not m:
        #     raise IOError()
        # body = cm.get_data(m.group(1))
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemple #40
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.post_data(url, {}, {
            'Content-Type': 'application/json; charset=utf-8',
            'Content-Length': 0,
            'Connection': 'keep-alive',
            'Pragma': 'no-cache',
            'Cache-Control': 'no-cache'
        })
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, e), log_name)
        return []
Exemple #41
0
def get_detail(data):
    param = {'action': 'loadStoreFromAjax', 'id': data['store_id']}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()

    m = re.search(ur'<div class="lines">(.+?)</div>', body, re.S)
    if m is None:
        return ()
    return tuple(term.strip() for term in re.findall(ur'<li>(.+?)</li>', m.group(1), re.S))
Exemple #42
0
def fetch_stores(data):
    url = data['post_shops']
    param = {'city': data['city_e'], 'paulandjoe_women': 0, 'paulandjoe_man': 0,
             'paulandjoe_sister': 0, 'paulandjoe_little': 0, 'paulandjoe_beauty': 0}
    try:
        html = cm.post_data(url, param)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    try:
        for store in (pq(tmp) for tmp in pq(html)('ul')):
            try:
                entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
                entry[cm.name_e] = cm.html2plain(store('li.first')[0].text).strip()
                entry[cm.country_e] = data[cm.country_e]
                entry[cm.city_e] = data[cm.city_e]

                addr_list = []
                for term in (cm.reformat_addr(unicode(pq(tmp))) for tmp in store('li[class!="first"]')):
                    if term != '':
                        addr_list.append(term)
                tel = cm.extract_tel(addr_list[-1])
                if tel != '':
                    entry[cm.tel] = tel
                    del addr_list[-1]
                entry[cm.addr_e] = ', '.join(addr_list)

                gs.field_sense(entry)
                ret = gs.addr_sense(entry[cm.addr_e])
                if ret[0] is not None and entry[cm.country_e] == '':
                    entry[cm.country_e] = ret[0]
                if ret[1] is not None and entry[cm.province_e] == '':
                    entry[cm.province_e] = ret[1]
                if ret[2] is not None and entry[cm.city_e] == '':
                    entry[cm.city_e] = ret[2]
                gs.field_sense(entry)
                print '(%s/%d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                                entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                                entry[cm.continent_e])
                store_list.append(entry)
                db.insert_record(entry, 'stores')
            except (IndexError, TypeError) as e:
                cm.dump(u'Error in parsing %s, %s' % (url, param), log_name)
                print traceback.format_exc()
                continue
    except Exception, e:
        print traceback.format_exc()
Exemple #43
0
def parse_store(data, body=None):
    if body is None:
        url = data['url']
        try:
            body = cm.post_data(url)
        except Exception:
            cm.dump('Error in fetching stores: %s' % url, log_name)
            return []

    start = body.find(ur'jQuery.extend(Drupal.settings,')
    latlng_map = {}
    if start != -1:
        for item in json.loads(cm.extract_closure(body[start:], ur'\{', ur'\}')[0])['getlocations']['key_1']['latlons']:
            latlng_map[cm.reformat_addr(item[3])] = {'lat': string.atof(item[0]), 'lng': string.atof(item[1])}
Exemple #44
0
def fetch_stores(data):
    url = data['store_url']
    param = {'univers[mode_femme]': 'pla_women', 'univers[mode_homme]': 'pla_dior_men',
             'univers[baby_dior]': 'pla_baby_dior', 'univers[horlogerie]': 'pla_watches',
             'univers[joaillerie]': 'pla_fine_jewelry', 'univers[dior_phone]': 'pla_dior_phone',
             'continent': data['continent_id'], 'pays': data['country_id'], 'ville': data['city_id'],
             'node_id': '581', 'search': 'SEARCH'}
    if not data['no_region']:
        param['region'] = data['state_id']
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()
Exemple #45
0
def get_detail(data):
    param = {'action': 'loadStoreFromAjax', 'id': data['store_id']}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return ()

    m = re.search(ur'<div class="lines">(.+?)</div>', body, re.S)
    if m is None:
        return ()
    return tuple(term.strip()
                 for term in re.findall(ur'<li>(.+?)</li>', m.group(1), re.S))
Exemple #46
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    :return:
    """
    url = data['post_url']
    try:

        js = json.loads(cm.post_data(url, {'country_id': data['country_id'], 'retail_city': '',
                                           'retail_type': data['retail_type']}).decode('unicode_escape'))
    except Exception:
        print 'Error occured in getting country list: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    # country_id=108&retail_city=&retail_type=retail
    # country_id=99&retail_city=&retail_type=service
    store_list = []
    for s in js:
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        tmp = s['retail_name'].strip()
        if cm.is_chinese(tmp):
            entry[cm.name_c] = tmp
        else:
            entry[cm.name_e] = tmp
        entry[cm.addr_e] = s['retail_gmap'].strip()
        entry[cm.zip_code] = s['retail_zipcode'].strip()
        entry[cm.city_e] = s['retail_city'].strip().upper()
        if s['retail_email'] is not None:
            entry[cm.email] = s['retail_email'].strip()
        if s['retail_website'] is not None:
            entry[cm.url] = s['retail_website'].strip()
        if data['retail_type'] == 'retail':
            entry[cm.store_class] = 'Retail'
        else:
            entry[cm.store_class] = 'Service Center'
        entry[cm.country_e] = s['country_name'].strip().upper()
        entry[cm.continent_e] = s['continent_name'].strip().upper()

        gs.field_sense(entry)
        print '(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                          entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                          entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')

    return store_list
Exemple #47
0
def fetch_cities(data):
    param = {'action': 'getRegionsFromAjax', 'country': data['country_code']}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return []

    results = []
    for city in json.loads(body):
        d = data.copy()
        d['city'] = city
        results.append(d)
    return results
Exemple #48
0
def fetch_cities(data):
    param = {'action': 'getRegionsFromAjax', 'country': data['country_code']}
    url = data['url']
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return []

    results = []
    for city in json.loads(body):
        d = data.copy()
        d['city'] = city
        results.append(d)
    return results
Exemple #49
0
def fetch_cities(data):
    url = data['data_url']
    param = {'country': data['country_code']}
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return []

    results = []
    for m in re.findall(ur'<option value="([^"]+)">([^<>]+)', body):
        d = data.copy()
        d['city'] = m[1].strip().upper()
        d['city_code'] = m[0]
        results.append(d)
Exemple #50
0
def fetch_cities(data):
    url = data['data_url']
    param = {
        'DestinationURL': 'Worldwide-retailers',
        '(country)': data['country_code']
    }
    try:
        body = cm.post_data(url, param)
        # m = re.search(ur'META HTTP-EQUIV="Location"\s+Content="([^"]+)"', body)
        # if not m:
        #     raise IOError()
        # body = cm.get_data(m.group(1))
    except Exception, e:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return ()
Exemple #51
0
def fetch_cities(data):
    url = data['data_url']
    param = {'country': data['country_code']}
    try:
        body = cm.post_data(url, param)
    except Exception:
        cm.dump('Error in fetching cities: %s, %s' % (url, param), log_name)
        return []

    results = []
    for m in re.findall(ur'<option value="([^"]+)">([^<>]+)', body):
        d = data.copy()
        d['city'] = m[1].strip().upper()
        d['city_code'] = m[0]
        results.append(d)
Exemple #52
0
def fetch_stores(data):
    url = data['home_url']
    try:
        body = cm.post_data(url, {'lz_sf': data['province'], 'lz_sx': data['city']})
    except Exception:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    start = body.find(u'搜索结果')
    if start == -1:
        cm.dump('Error in fetching stores: %s, %s, %s' % (url, data['province'], data['city']),
                'samsonite_log.txt')
        return []

    body = body[start + 4:]

    store_list = []
    for m in re.findall(ur'</script>\s*(\S+)\s*</span>', body, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        entry[cm.name_e] = m.strip()
        entry[cm.addr_e] = m.strip()
        entry[cm.city_c] = data['city']
        ret = gs.look_up(data['city'], 3)
        if ret is not None:
            entry[cm.city_e] = cm.extract_city(ret['name_e'])[0]
            if ret['province'] != '':
                entry[cm.province_e] = ret['province']['name_e']
        entry[cm.province_c] = data['province']
        ret = gs.look_up(data['province'], 2)
        if ret is not None:
            entry[cm.province_e] = ret['name_e']
        entry[cm.country_e] = u'CHINA'

        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None and entry[cm.province_e] == '':
            entry[cm.province_e] = ret[1]
        if ret[2] is not None and entry[cm.city_e] == '':
            entry[cm.city_e] = ret[2]
        gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), 'benetton_log.txt', False)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
Exemple #53
0
def get_store_details(data):
    url = data['url']
    try:
        html = cm.post_data(url, {'country': data['country_id'], 'city': data['city_id'], 'recordid': data['store_id']})
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id}
        cm.dump(dump_data)
        return []

    entry = cm.init_store_entry(brand_id, brandname_e, brandname_c)
    info = json.loads(html)['elements']
    addr = cm.reformat_addr(info['address'].replace('\\', '').replace('<p>', ',').replace('</p>', ','))
    # 第一行为商店名称
    terms = addr.split(',')
    if len(terms) > 0:
        entry[cm.name_e] = cm.reformat_addr(terms[0])
    entry[cm.addr_e] = addr

    gmap_url = info['gmap']
    m = re.findall(ur'(-?\d+\.\d+),(-?\d+\.\d+)', gmap_url)
    if len(m) > 0:
        cm.update_entry(entry, {cm.lat: string.atof(m[0][0]), cm.lng: string.atof(m[0][1])})

    entry[cm.url] = info['shareurl'].replace('\\', '')
    entry[cm.hours] = info['openingtimes']
    entry[cm.comments] = info['other']

    # Geo
    country = data['country']
    city = data['city']
    cm.update_entry(entry, {cm.country_e: country, cm.city_e: city})
    entry[cm.city_e] = cm.extract_city(entry[cm.city_e])[0]

    gs.field_sense(entry)
    ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
    if ret[1] is not None and entry[cm.province_e] == '':
        entry[cm.province_e] = ret[1]
    if ret[2] is not None and entry[cm.city_e] == '':
        entry[cm.city_e] = ret[2]
    gs.field_sense(entry)

    print '(%s / %d) Found store: %s, %s (%s, %s)' % (
        brandname_e, brand_id, entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
        entry[cm.continent_e])

    db.insert_record(entry, 'stores')
    return entry
Exemple #54
0
def fetch_cities(data):
    url = data['data_url']
    state_id = data['state_id']
    if state_id is None:
        param = {'request': 'countryChange', 'c': data['country_id']}
    else:
        param = {
            'request': 'stateChange',
            'c': data['country_id'],
            's': state_id
        }
    try:
        body = cm.post_data(url, param)
    except Exception, e:
        cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
        return []
Exemple #55
0
def fetch_stores(data):
    """
    获得商店信息
    :param data:
    """
    url = data["post_url"]
    try:
        html = cm.post_data(url, {"pid": data["city_id"], "lang": "en", "action": "popola_box_DX"})
    except Exception:
        print "Error occured in getting city list: %s" % url
        dump_data = {"level": 2, "time": cm.format_time(), "data": {"url": url}, "brand_id": data["brand_id"]}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<a href="(.+?)".*?>', html):
        entry = cm.init_store_entry(data["brand_id"], data["brandname_e"], data["brandname_c"])
        entry[cm.url] = m.group(1)
        store_html, start, end = cm.extract_closure(html[m.start() :], ur"<a href", ur"</a>")
        if end == 0:
            continue
        m1 = re.findall(ur'<h3 class="titleShop">(.+?)</h3>', store_html, re.S)
        if len(m1) > 0:
            entry[cm.name_e] = m1[0].strip()
        m1 = re.findall(ur"<p\b.*?>(.+?)(?:</p>|</div>)", store_html, re.S)
        if len(m1) > 0:
            terms = cm.reformat_addr(m1[0]).split(",")
            tel = cm.extract_tel(terms[-1])
            if tel != "":
                terms = terms[:-1]
                entry[cm.tel] = tel
            entry[cm.addr_e] = ", ".join([v.strip() for v in terms])

        entry["country_e"] = data["country_e"]
        entry["city_e"] = data["city_e"]
        gs.field_sense(entry)

        print "(%s / %d) Found store: %s, %s (%s, %s)" % (
            data["brandname_e"],
            data["brand_id"],
            entry[cm.name_e],
            entry[cm.addr_e],
            entry[cm.country_e],
            entry[cm.continent_e],
        )
        store_list.append(entry)
        db.insert_record(entry, "stores")