Exemple #1
0
    def test_gp_quadratic_kernel(self):
        """Test Gaussian process predictions with the quadratic kernel."""
        train_features, train_targets, test_features, test_targets = get_data()

        # Test prediction routine with quadratic kernel.
        kdict = {
            'k1': {
                'type': 'quadratic',
                'slope': 1.,
                'degree': 1.,
                'scaling': 1.,
                'bounds': ((1e-5, None), ) * (np.shape(train_features)[1] + 1),
                'scaling_bounds': ((0., None), )
            }
        }
        gp = GaussianProcess(train_fp=train_features,
                             train_target=train_targets,
                             kernel_dict=kdict,
                             regularization=1e-3,
                             optimize_hyperparameters=True,
                             scale_data=True)
        pred = gp.predict(test_fp=test_features,
                          test_target=test_targets,
                          get_validation_error=True,
                          get_training_error=True)
        self.assertEqual(len(pred['prediction']), len(test_features))
        print('quadratic prediction:',
              pred['validation_error']['rmse_average'])
Exemple #2
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url, {'m': data['m']})
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #3
0
def fetch_cities(data):
    url = data['home_url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching geo info: %s' % url, 'samsonite_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    pat = re.compile(ur"if \(currlan == 'ZHT'\)\s*\{.+?\}", re.S)
    body = re.sub(pat, '', body)

    m = re.search(ur'dsy.add\("0",\[(.+?)\]', body)
    if m is None:
        cm.dump('Error in fetching geo info: %s' % url, 'samsonite_log.txt')
        return []
    province_list = [m1 for m1 in re.findall(ur'"(.+?)"', m.group(1))]

    city_list = []
    for m in re.findall(ur'dsy.add\("0_(\d+)",\[(.+?)\]', body):
        for m1 in re.findall(ur'"(.+?)"', m[1]):
            c = data.copy()
            c['province'] = province_list[string.atoi(m[0])]
            c['city'] = m1
            city_list.append(c)
Exemple #4
0
    def test_pareto(self):
        """Simple test case to make sure it doesn't crash."""
        train_features, train_targets, _, _ = get_data()
        train_features = train_features[:, :20]

        ga = GeneticAlgorithm(population_size=10,
                              fit_func=minimize_error_descriptors,
                              features=train_features,
                              targets=train_targets,
                              population=None,
                              fitness_parameters=2)
        self.assertEqual(np.shape(ga.population), (10, 20))

        ga.search(50)
        self.assertTrue(len(ga.population) == 10)
        self.assertTrue(len(ga.fitness) == 10)

        ga = GeneticAlgorithm(population_size=10,
                              fit_func=minimize_error_time,
                              features=train_features,
                              targets=train_targets,
                              population=None,
                              fitness_parameters=2)
        self.assertEqual(np.shape(ga.population), (10, 20))

        ga.search(50)
        self.assertTrue(len(ga.population) == 10)
        self.assertTrue(len(ga.fitness) == 10)
Exemple #5
0
def fetch_continents(data):
    url = data['store_url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    start = html.find(u'<select id="continent" name="continent"')
    if start == -1:
        return []
    sub, start, end = cm.extract_closure(html[start:], ur'<select\b',
                                         ur'</select')

    continent_list = []
    for m in re.findall(ur'<option value="(.+?)">.+?</option>', sub):
        d = data.copy()
        d['continent'] = m
        continent_list.append(d)
Exemple #6
0
def fetch_stores(data):
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.findall(ur'var markerContent\s*?=\s*?"(.+?)".+?'
                        ur'createMarker\(.+?new google.maps.LatLng\((-?\d+\.\d+),(-?\d+\.\d+)\)', html, re.S):
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        lat, lng = map(string.atof, [m[1], m[2]])
        cm.update_entry(entry, {cm.lat: lat, cm.lng: lng})

        sub = m[0].strip()
        m1 = re.search(ur'<b>(.+?)</b>', sub)
        if m1 is None:
            continue
        entry[cm.name_c] = m1.group(1)
        sub = sub.replace(m1.group(0), '')
        m1=re.search(ur'聯系電話(?::|:)(.+?)<', sub)
        if m1 is not None:
            entry[cm.tel]=m1.group(1)
            sub=sub.replace(m1.group(0), '<')
        sub = re.sub(ur'<img\b.*?/>', '', sub)
        entry[cm.addr_c] = cm.reformat_addr(sub)

        print '(%s/%d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                        entry[cm.name_c], entry[cm.addr_e], entry[cm.country_e],
                                                        entry[cm.continent_e])
        store_list.append(entry)
        db.insert_record(entry, 'stores')
Exemple #7
0
def fetch_continents(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching continents: %s' % url, log_name)
        return []
Exemple #8
0
def fetch_contact_info(data, s, store_id):
    url = "%s/%s/detail" % (data["shop_url"], store_id)
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump("Error in fetching stores: %s" % url, log_name)
        return ()
Exemple #9
0
def get_store_list(data):
    """
    返回店铺列表,其中店铺包含国家信息。
    :rtype : [{'name':'store name', 'url':'http://...', 'city':'NEW YORK', 'country:':'AUSTRALIA'}, ...]
    :param data:
    """
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 1, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': brand_id}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<ul>\s+?<h3 class="country-name">(.+?)</h3>', html, re.S):
        sub, start, end = cm.extract_closure(html[m.start():], ur'<ul>', ur'</ul>')
        if end == 0:
            continue
            # 得到不同国家的分割
        splits = [[m1.start(), m1.group(1)] for m1 in re.finditer(ur'<h3 class="country-name">(.+?)</h3>', sub)]
        splits.append([-1, ''])
        for i in xrange(len(splits) - 1):
            # 在同一个国家下寻找
            sub1 = sub[splits[i][0]:splits[i + 1][0]]
            country = splits[i][1].upper()
            for m1 in re.findall(ur'<li>\s*?<a href="(http://us.christianlouboutin.com/us_en/storelocator/\S+?)">'
                                 ur'(.+?)</a>,(.+?)</li>', sub1):
                store_list.append({'name': m1[1].strip(), 'url': m1[0], 'city': m1[2].strip().upper(),
                                   'country': country})
Exemple #10
0
def fetch_store_list(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
Exemple #11
0
def fetch_countries(data):
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    # 处理重定向
    m = re.search('<h2>Object moved to <a href="(.+?)">', html)
    if m is not None:
        data['url'] = data['host'] + m.group(1)
        return fetch_countries(data)

    m = re.search('<span class="country">Choose a country</span>', html)
    if m is None:
        return []
    sub, start, end = cm.extract_closure(html[m.end():], r'<ul\b', r'</ul>')
    if end == 0:
        return []

    country_list = []
    for m in re.findall('<li><a .*?href="(.+?)">(.+?)</a></li>', sub):
        d = data.copy()
        country_e = cm.html2plain(m[1]).strip().upper()
        ret = gs.look_up(country_e, 1)
        if ret is not None:
            country_e=ret['name_e']
        d['country_e'] = country_e
        d['province_e'] = ''
        d['url'] = data['host'] + m[0]
        country_list.append(d)
    return country_list
Exemple #12
0
def tv(identifier):
  channel = television.channels_by_id[str(identifier).decode('UTF-8')]
  data = get_data(television.create_link(channel[u'cmd']))
  link = data[u'cmd']
  if not link:
    link = 'http://149.13.0.80/nrj128.m3u'
  return redirect(link, code=302)
Exemple #13
0
def fetch_store_details(data):
    url = data["url"]
    try:
        body = cm.get_data(url, hdr={"X-Requested-With": ""})
    except Exception, e:
        cm.dump("Error in fetching store details: %s" % url, log_name)
        return ()
Exemple #14
0
def fetch_details(data, detail_url, entry):
    entry = entry.copy()
    try:
        body = cm.get_data(detail_url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % detail_url, log_name)
        return entry
Exemple #15
0
def fetch_stores_au(data):
    url = 'http://www.hushpuppies.com.au/stockists/index/search/'
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #16
0
def fetch_cities(data):
    url = data['url'] % data['country_id']
    try:
        raw = json.loads(cm.get_data(url))['places']
    except Exception, e:
        cm.dump('Error in fetching cities: %s' % url, log_name)
        return ()
Exemple #17
0
def fetch_store_details(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching store details: %s' % url, log_name)
        return ()
Exemple #18
0
def fetch_continents(data):
    url = data['url'] % 0
    try:
        raw = json.loads(cm.get_data(url))['places']
    except Exception, e:
        cm.dump('Error in fetching continents: %s' % url, log_name)
        return ()
Exemple #19
0
def fetch_cities(data):
    url = data['host'] + data['url']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching cities: %s' % url, log_name)
        return []
Exemple #20
0
def fetch_stores_au(data):
    url = 'http://www.hushpuppies.com.au/stockists/index/search/'
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #21
0
def fetch_stores(data):
    url = data['url']
    try:
        raw = json.loads(cm.get_data(url))
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #22
0
def fetch_countries(data):
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    start = html.find('<select name="country" id="inp-country"')
    if start == -1:
        return []
    sub, start, end = cm.extract_closure(html[start:], ur'<select\b', ur'</select>')
    if end == 0:
        return []
    country_list = []
    for m in re.findall(ur'<option value="([A-Z]{2})">(.*?)</option>', sub):
        d = data.copy()
        d['country_code'] = m[0]
        d[cm.country_c] = m[1].strip()
        for key in [cm.country_e, cm.continent_e, cm.continent_c]:
            d[key] = ''
        ret = gs.look_up(d['country_code'], 1)
        if ret is not None:
            d[cm.country_e] = ret['name_e']
            d[cm.country_c] = ret['name_c']
            d[cm.continent_c] = ret['continent']['name_c']
            d[cm.continent_e] = ret['continent']['name_e']

        country_list.append(d)
Exemple #23
0
def fetch_cities(data):
    url = data['host'] + '/ajax/esiajaxProxy.asp'
    try:
        body = cm.get_data(
            url, {
                'c': 'FF_StoreLocator2',
                'm': 'getCountiesAjax',
                'ws': 'ch-ch',
                'pid': 178,
                'cid': data['country_code'],
                'CT': 0
            })
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    results = []
    for m in re.findall(ur'<li><a href="" data-value="(.+?)">', body):
        d = data.copy()
        d['city'] = m
        results.append(d)
Exemple #24
0
def fetch_stores(data):
    url = data['url']
    try:
        raw = json.loads(cm.get_data(url))
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #25
0
def currency_update(param_dict):
    """
    更新货币的汇率信息
    @param param_dict:
    """
    db = RoseVisionDb()
    db.conn(getattr(gs, 'DATABASE')['DB_SPEC'])
    rs = db.query_match(['iso_code', 'currency'],
                        'region_info').fetch_row(maxrows=0)
    db.start_transaction()
    try:
        for code, currency in rs:
            print str.format('Fetching for currency data for {0}...', currency)
            data = cm.get_data(url=str.format(
                'http://download.finance.yahoo.com/d/quotes.csv?s={0}CNY=X'
                '&f=sl1d1t1ba&e=.json', currency))
            rdr = csv.reader(StringIO(data['body']))
            line_data = [val for val in rdr][0]
            timestamp = datetime.datetime.strptime(
                str.format('{0} {1}', line_data[2], line_data[3]),
                '%m/%d/%Y %I:%M%p')
            db.update(
                {
                    'rate': line_data[1],
                    'update_time': timestamp.strftime('%Y-%m-%d %H:%M:%S')
                }, 'region_info', str.format('iso_code="{0}"', code))
        db.commit()
    except:
        db.rollback()
        raise
Exemple #26
0
    def func(data, level):
        """
        :param data:
        :param level:
        :return: siblings
        """
        if level == 4:
            # get store details
            stores = get_store_details(data['content'], data)
            return [{'func': None, 'data': s} for s in stores]
        else:
            if level == 1:
                content = cm.get_data(data['url'])
            else:
                content = data['content']

            entries = get_entries(content, pattern[level - 1])

            def siblings_data(ent):
                # Each time when a new level is reached, a new field is added to data, and the 'content'
                # field is updated. This is returned to build new siblings.
                local_d = dict(data)
                if level == 1:
                    local_d[cm.continent_e] = ent
                elif level == 2:
                    local_d[cm.country_e] = ent
                elif level == 3:
                    local_d[cm.city_e] = ent
                local_d['content'] = entries[ent]
                return local_d

            return [{
                'func': lambda data: func(data, level + 1),
                'data': siblings_data(ent)
            } for ent in entries]
Exemple #27
0
def fetch_countries(data):
    url = data['country_url']
    try:
        body = cm.get_data(url, {'display_country': 'CN'})
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return ()
Exemple #28
0
    def run(cls, logger=None, **kwargs):
        """
        更新货币的汇率信息
        @param param_dict:
        """
        logger = logger if 'logger' in kwargs else get_logger()
        logger.info('Update currency STARTED!!!!')

        with RoseVisionDb(getattr(gs, 'DATABASE')['DB_SPEC']) as db:
            for currency in db.query_match('currency', 'currency_info').fetch_row(maxrows=0):
                currency = currency[0]
                try:
                    logger.debug(str.format('Fetching for currency data for {0}...', currency))
                    data = cm.get_data(url=str.format('http://download.finance.yahoo.com/d/quotes.csv?s={0}CNY=X'
                                                      '&f=sl1d1t1ba&e=.json', currency))
                    rate, d, t = [val for val in csv.reader(StringIO(data['body']))][0][1:4]
                    rate = float(rate)
                    timestamp = datetime.strptime(' '.join((d, t)), '%m/%d/%Y %I:%M%p').strftime('%Y-%m-%d %H:%M:%S')
                    db.update({'rate': rate, 'update_time': timestamp}, 'currency_info',
                              str.format('currency="{0}"', currency))
                except (ValueError, IOError):
                    continue
                except:
                    raise
        logger.info('Update currency ENDED!!!!')
Exemple #29
0
    def test_gp_linear_kernel(self):
        """Test Gaussian process predictions with the linear kernel."""
        train_features, train_targets, test_features, test_targets = get_data()

        # Test prediction routine with linear kernel.
        kdict = {
            'k1': {
                'type': 'linear',
                'scaling': 1.,
                'scaling_bounds': ((0., None), )
            },
            'c1': {
                'type': 'constant',
                'const': 1.
            }
        }
        gp = GaussianProcess(train_fp=train_features,
                             train_target=train_targets,
                             kernel_dict=kdict,
                             regularization=1e-3,
                             optimize_hyperparameters=True,
                             scale_data=True)
        pred = gp.predict(test_fp=test_features,
                          test_target=test_targets,
                          get_validation_error=True,
                          get_training_error=True)
        self.assertEqual(len(pred['prediction']), len(test_features))
        print('linear prediction:', pred['validation_error']['rmse_average'])
Exemple #30
0
def _validate(**kwargs):
    """
    Check each feed type and keep valid results
    """
    results = []
    version = kwargs["version"]

    for record_type in [mds.STATUS_CHANGES, mds.TRIPS]:
        datasource = common.get_data(record_type, **kwargs)

        if len(datasource) > 0:
            versions = set([d["version"] for d in datasource])

            if len(versions) > 1:
                expected, unexpected = mds.Version(
                    versions.pop()), mds.Version(versions.pop())
                error = mds.versions.UnexpectedVersionError(
                    expected, unexpected)
                results.append(
                    (record_type, expected, datasource, [], [error], []))
                continue

            version = mds.Version(version or versions.pop())

            try:
                valid, errors, removed = validate(record_type, datasource,
                                                  version)
                results.append(
                    (record_type, version, datasource, valid, errors, removed))
            except mds.versions.UnexpectedVersionError as unexpected_version:
                results.append((record_type, version, datasource, [],
                                [unexpected_version], []))

    return results
Exemple #31
0
def query():
    """Perform inference on some examples of documents from our classes."""
    tf.logging.set_verbosity(FLAGS.verbosity)

    classes = get_data(FLAGS.data_dir, classes_only=True)
    FLAGS.output_dim = len(classes)

    queries = np.loadtxt(FLAGS.query_file, dtype=str, delimiter='\n')
    _, x_query, _, query_lengths, _, _ = process_vocabulary(
        None,
        queries,
        FLAGS,
        reuse=True,
        sequence_lengths=FLAGS.model == 'rnn')

    if FLAGS.model == 'perceptron':
        model = bag_of_words_perceptron_model
    elif FLAGS.model == 'mlp':
        model = bag_of_words_MLP_model
    elif FLAGS.model == 'rnn':
        model = rnn_model
    else:
        raise ValueError('unknown model')

    classifications = predict(x_query, query_lengths, model, FLAGS)
    for i, query in enumerate(queries):
        print('The model classifies "{}" as a member of the class {}.'.format(
            query, classes['class'][classifications[i]]))
Exemple #32
0
def fetch_stores(data):
    # <h2 property="dc:title"
    url = data[cm.url]
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    store_list = []
    for m in re.finditer(ur'<h2 property="dc:title"', html):
        end = html.find('</header>', m.start())
        if end == -1:
            continue
        sub = html[m.start():end]
        m1 = re.search(ur'<a href="(.+?)">(.+?)</a></h2>', sub)
        if m1 is None:
            print 'Error: no more details for %s' % url
            continue
        d = data.copy()
        d[cm.url] = data['host'] + m1.group(1)
        d[cm.name_e] = cm.html2plain(m1.group(2)).strip()
        store_list.append(d)
Exemple #33
0
def fetch_cities(data):
    url = data['home_url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching cities: %s' % url, 'canali_log.txt')
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    start = body.find(u'<nav class="countrySelector">')
    if start == -1:
        cm.dump('Error occured in fetching country list: %s' % url, 'canali_log.txt')
    body = cm.extract_closure(body[start:], ur'<nav\b', ur'</nav>')[0]

    results = []
    for m in re.finditer(ur'<li><a href=".+?">(.+?)</a>', body):
        country = m.group(1).strip().upper()
        sub = cm.extract_closure(body[m.end():], ur'<ul\b', ur'</ul>')[0]
        for m1 in re.findall(ur'<li><a class=".+?" href="(.+?)">(.+?)</a></li>', sub):
            d = data.copy()
            d['country'] = country
            d['url'] = data['host'] + m1[0]
            d['city'] = m1[1].strip().upper()
            results.append(d)
Exemple #34
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return []

    store_list = []
    for m in re.finditer(ur'<item id="\d+">', body):
        sub = cm.extract_closure(body[m.start():], ur'<item\b', ur'</item>')[0]
        entry = cm.init_store_entry(data['brand_id'], data['brandname_e'], data['brandname_c'])
        m1 = re.search(ur'<country>([^<>]+)</country>', sub)
        if m1 is not None:
            tmp = m1.group(1).split('/')
            for v in tmp:
                ret = gs.look_up(v.strip().upper(), 1)
                if ret is not None:
                    entry[cm.country_e] = ret['name_e']
                    break
        m1 = re.search(ur'<city>([^<>]+)</city>', sub)
        if m1 is not None:
            val = cm.reformat_addr(m1.group(1))
            if entry[cm.country_e] == 'UNITED STATES':
                tmp_list = tuple(tmp.strip() for tmp in cm.reformat_addr(val).strip(','))
                if len(tmp_list) == 2:
                    if re.search('[A-Z]{2}', tmp_list[1]):
                        entry[cm.province_e] = tmp_list[1]
            entry[cm.city_e] = cm.extract_city(m1.group(1))[0]
        m1 = re.search(ur'<brands>([^<>]+)</brands>', sub)
        if m1 is not None:
            tmp = m1.group(1).split('/')
            brand_list = []
            for v in tmp:
                if v.strip() != '':
                    brand_list.append(v)
            entry[cm.store_type] = ', '.join(brand_map[key] for key in brand_list)
        m1 = re.search(ur'<name>([^<>]+)</name>', sub)
        if m1 is not None:
            entry[cm.name_e] = m1.group(1).strip()
        m1 = re.search(ur'<address>([^<>]+)</address>', sub)
        if m1 is not None:
            entry[cm.addr_e] = cm.reformat_addr(m1.group(1))
        m1 = re.search(ur'<tel>([^<>]+)</tel>', sub)
        if m1 is not None:
            entry[cm.tel] = m1.group(1).strip()
        m1 = re.search(ur'sll=(-?\d+\.\d+),(-?\d+\.\d+)', sub)
        if m1 is not None:
            entry[cm.lat] = string.atof(m1.group(1))
            entry[cm.lng] = string.atof(m1.group(2))
        gs.field_sense(entry)
        ret = gs.addr_sense(entry[cm.addr_e], entry[cm.country_e])
        if ret[1] is not None:
            entry[cm.province_e] = ret[1]
            gs.field_sense(entry)
        cm.dump('(%s / %d) Found store: %s, %s (%s, %s)' % (data['brandname_e'], data['brand_id'],
                                                            entry[cm.name_e], entry[cm.addr_e], entry[cm.country_e],
                                                            entry[cm.continent_e]), log_name)
        db.insert_record(entry, 'stores')
        store_list.append(entry)
Exemple #35
0
def fetch_states(data):
    global national_added

    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching states: %s' % url, log_name)
        return []

    national_added = False

    m = re.search(ur'Choose a (state|region|province)', body)
    if m is None:
        d = data.copy()
        d['state'] = ''
        return [d]

    body = cm.extract_closure(body[m.start():], ur'<ul>', ur'</ul>')[0]
    results = []
    for m in re.findall(ur'<a href="([^"]+)">([^<>]+)</a>', body):
        d = data.copy()
        d['url'] = data['host'] + m[0]
        d['state'] = cm.html2plain(m[1]).strip().upper()
        results.append(d)
Exemple #36
0
    def test_load(self):
        """Function to test loading a pre-generated model."""
        train_features, train_targets, test_features, test_targets = get_data()

        self.make_test(train_features, train_targets, test_features,
                       test_targets)

        new_gp = io.read(filename='test-model', ext='pkl')

        pred = new_gp.predict(test_fp=test_features,
                              test_target=test_targets,
                              get_validation_error=True,
                              get_training_error=True)

        self.assertTrue(
            np.allclose(pred['validation_error']['rmse_all'],
                        self.original['validation_error']['rmse_all']))

        gp = io.read(filename='test-model', ext='hdf5')

        pred = gp.predict(test_fp=test_features,
                          test_target=test_targets,
                          get_validation_error=True,
                          get_training_error=True)

        self.assertTrue(
            np.allclose(pred['validation_error']['rmse_all'],
                        self.original['validation_error']['rmse_all']))

        os.remove('{}/test-model.pkl'.format(wkdir))
        os.remove('{}/test-model.hdf5'.format(wkdir))
Exemple #37
0
def fetch_countries(data):
    url = data['host'] + 'boutique'
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
Exemple #38
0
def fetch_store_details(url, data):
    """
    获得门店的详细信息(url下可能有多个门店)
    :rtype : [{}]
    :param url:
    :param data:
    """
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s / %s' % (str(data), url)
        dump_data = {
            'level': 2,
            'time': cm.format_time(),
            'data': data,
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []

    # 可能有多个门店,拆分
    sub_html = []
    for m in re.finditer(ur'<li\s+class\s*=\s*"boutique-info-cadre-\d+"\s*>',
                         html):
        start = m.start() + len(m.group())
        end = html.find('</li>', start)
        sub_html.append(html[start:end])
Exemple #39
0
def fetch_cities(data):
    url = data['home_url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching geo info: %s' % url, 'samsonite_log.txt')
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    pat = re.compile(ur"if \(currlan == 'ZHT'\)\s*\{.+?\}", re.S)
    body = re.sub(pat, '', body)

    m = re.search(ur'dsy.add\("0",\[(.+?)\]', body)
    if m is None:
        cm.dump('Error in fetching geo info: %s' % url, 'samsonite_log.txt')
        return []
    province_list = [m1 for m1 in re.findall(ur'"(.+?)"', m.group(1))]

    city_list = []
    for m in re.findall(ur'dsy.add\("0_(\d+)",\[(.+?)\]', body):
        for m1 in re.findall(ur'"(.+?)"', m[1]):
            c = data.copy()
            c['province'] = province_list[string.atoi(m[0])]
            c['city'] = m1
            city_list.append(c)
Exemple #40
0
def get_continents(data):
    """
    返回洲列表
    :rtype : [{'name':u'欧洲', 'url':'http://....'}, ...]
    :param data:
    :return:
    """
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 1,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': brand_id
        }
        cm.dump(dump_data)
        return []
    return [{
        'name': m[1],
        'url': m[0]
    } for m in re.findall(
        ur'<a href="(http://us.christianlouboutin.com/us_en/storelocator/\S+)">(.+?)</a>',
        html)]
Exemple #41
0
def fetch_stores(data):
    url = data['url']
    try:
        body = cm.get_data(url, cookie=data['cookie'])
    except Exception, e:
        cm.dump('Error in fetching stores: %s' % url, log_name)
        return ()
Exemple #42
0
def fetch(level=1, data=None, user='******', passwd=''):
    # Walk from the root node, where level == 1.
    if data is None:
        data = {
            'url': 'http://www.mido.cn/zh/retailer_li/POS',
            'brand_id': 10260,
            'brandname_e': u'MIDO',
            'brandname_c': u'美度'
        }

    global db
    db = cm.StoresDb()
    db.connect_db(user=user, passwd=passwd)
    db.execute(u'DELETE FROM %s WHERE brand_id=%d' %
               ('stores', data['brand_id']))

    url = data['url']
    try:
        data['html'] = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    store_list = fetch_countries(data)

    db.disconnect_db()
Exemple #43
0
def fetch_countries(data):
    url = data["url"]
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump("Error in fetching countries: %s" % url, log_name)
        return []
Exemple #44
0
def fetch_cities_beauty(data):
    url = '%s/%s' % (data['city_url'], data['country_code'])
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return ()
Exemple #45
0
    def test_read_write(self):
        """Function to test reading an writing GA files."""
        train_features, train_targets, _, _ = get_data()
        train_features = train_features[:, :20]

        ga1 = GeneticAlgorithm(population_size=10,
                               fit_func=minimize_error,
                               features=train_features,
                               targets=train_targets,
                               population=None)
        self.assertEqual(np.shape(ga1.population), (10, 20))

        ga1.search(2, writefile='gaWrite.json')
        self.assertTrue(len(ga1.population) == 10)
        self.assertTrue(len(ga1.fitness) == 10)

        old_pop, _ = read_data('gaWrite.json')

        ga2 = GeneticAlgorithm(population_size=10,
                               fit_func=minimize_error,
                               features=train_features,
                               targets=train_targets,
                               population=old_pop)
        self.assertTrue(np.allclose(ga2.population, ga1.population))

        ga2.search(50)
        self.assertTrue(len(ga2.population) == 10)
        self.assertTrue(len(ga2.fitness) == 10)
Exemple #46
0
def fetch_countries(data):
    url = data['url']
    try:
        html = cm.get_data(url)
    except Exception:
        print 'Error occured: %s' % url
        dump_data = {'level': 0, 'time': cm.format_time(), 'data': {'url': url}, 'brand_id': data['brand_id']}
        cm.dump(dump_data)
        return []

    # 处理重定向
    m = re.search('<h2>Object moved to <a href="(.+?)">', html)
    if m is not None:
        data['url'] = data['host'] + m.group(1)
        return fetch_countries(data)

    m = re.search('<span class="country">Choose a country</span>', html)
    if m is None:
        return []
    sub, start, end = cm.extract_closure(html[m.end():], r'<ul\b', r'</ul>')
    if end == 0:
        return []

    country_list = []
    for m in re.findall('<li><a .*?href="(.+?)">(.+?)</a></li>', sub):
        d = data.copy()
        country_e = cm.html2plain(m[1]).strip().upper()
        ret = gs.look_up(country_e, 1)
        if ret is not None:
            country_e = ret['name_e']
        d['country_e'] = country_e
        d['province_e'] = ''
        d['url'] = data['host'] + m[0]
        country_list.append(d)
    return country_list
Exemple #47
0
def get_frag_countries(url):
    # 获得国家代码
    """
    获得国家的名字和代码
    :rtype : [{'id':**, 'country':**}, ...]
    :param url:
    :return:
    """
    try:
        html = common.get_data(url)
    except Exception:
        print 'Error occured: %s' % url_fragrance
        dump_data = {'level': 1, 'time': common.format_time(), 'data': {'url': url_fragrance},
                     'brand_id': brand_id}
        common.dump(dump_data)
        return [], False

    start = html.find('<select name="country" id="id_country">')
    if start == -1:
        return [], False
    sub, s, e = common.extract_closure(html[start:], ur'<select\b', ur'</select>')
    if e == 0:
        return [], False
    return [{'id': string.atoi(m[0]), 'country': m[1].strip().upper()}
            for m in re.findall(ur'<option value="(\d+)".*?>(.+?)</option>', sub)]
Exemple #48
0
def fetch_states(data):
    global national_added

    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching states: %s' % url, log_name)
        return []

    national_added = False

    m = re.search(ur'Choose a (state|region|province)', body)
    if m is None:
        d = data.copy()
        d['state'] = ''
        return [d]

    body = cm.extract_closure(body[m.start():], ur'<ul>', ur'</ul>')[0]
    results = []
    for m in re.findall(ur'<a href="([^"]+)">([^<>]+)</a>', body):
        d = data.copy()
        d['url'] = data['host'] + m[0]
        d['state'] = cm.html2plain(m[1]).strip().upper()
        results.append(d)
Exemple #49
0
    def parse_data2(self):
        if self.name == '互联网':
            url = 'http://news.baidu.com/widget?'
            params = {
                'id': 'AllOtherData',
                'channel': self.__dict[self.name],
                't': str(time.time() * 1000).split('.')[0]
            }
            self.content = get_data(url=url, params=params, plate=self.name)
            tree = etree.HTML(self.content)
            lis = tree.xpath('//div[contains(@class,"item")]')
        else:
            tree = etree.HTML(self.content)
            lis = tree.xpath('//li[@class="item"]')

        for li in lis:
            for ia in li.xpath('h3//a'):
                href = ia.xpath('@href')[0]
                title = ia.xpath('text()')[0]
                if 'baidu.com' in href:
                    self.news.append({
                        '标题': title,
                        '链接': href
                    })
        logger.info(self.name + ':' + str(self.news))
        return
Exemple #50
0
def fetch_cities(data):
    url = data['home_url']
    try:
        body = cm.get_data(url)
    except Exception:
        cm.dump('Error in fetching cities: %s' % url, 'unode50_log.txt')
        dump_data = {
            'level': 0,
            'time': cm.format_time(),
            'data': {
                'url': url
            },
            'brand_id': data['brand_id']
        }
        cm.dump(dump_data)
        return []

    m = re.search(ur'countries\s*=\s*\{', body)
    if m is None:
        cm.dump('Error in fetching cities: %s' % url, 'unode50_log.txt')
        return []
    body = cm.extract_closure(body[m.start():], ur'\{', ur'\}')[0]
    raw = json.loads(body)
    results = []
    for key in raw:
        d = data.copy()
        d['country'] = raw[key]['name'].strip().upper()
        d['country_id'] = key
        results.append(d)
    return results
Exemple #51
0
def fetch_countries(data):
    url = data['url']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching countries: %s' % url, log_name)
        return []
Exemple #52
0
def get_frag_countries(url):
    # 获得国家代码
    """
    获得国家的名字和代码
    :rtype : [{'id':**, 'country':**}, ...]
    :param url:
    :return:
    """
    try:
        html = common.get_data(url)
    except Exception:
        print 'Error occured: %s' % url_fragrance
        dump_data = {
            'level': 1,
            'time': common.format_time(),
            'data': {
                'url': url_fragrance
            },
            'brand_id': brand_id
        }
        common.dump(dump_data)
        return [], False

    start = html.find('<select name="country" id="id_country">')
    if start == -1:
        return [], False
    sub, s, e = common.extract_closure(html[start:], ur'<select\b',
                                       ur'</select>')
    if e == 0:
        return [], False
    return [{
        'id': string.atoi(m[0]),
        'country': m[1].strip().upper()
    } for m in re.findall(ur'<option value="(\d+)".*?>(.+?)</option>', sub)]
Exemple #53
0
def fetch_countries_eu(data):
    url = data['url_eu']
    try:
        body = cm.get_data(url)
    except Exception, e:
        cm.dump('Error in fetching EU countries: %s' % url, log_name)
        return ()
Exemple #54
0
    def test_gp_addative_kernel(self):
        """Test Gaussian process predictions with the addative kernel."""
        train_features, train_targets, test_features, test_targets = get_data()

        # Test prediction with addative linear and gaussian kernel.
        kdict = {
            'k1': {
                'type': 'linear',
                'features': [0, 1],
                'scaling': 1.
            },
            'k2': {
                'type': 'gaussian',
                'features': [2, 3],
                'width': 1.,
                'scaling': 1.
            },
            'c1': {
                'type': 'constant',
                'const': 1.
            }
        }
        gp = GaussianProcess(train_fp=train_features,
                             train_target=train_targets,
                             kernel_dict=kdict,
                             regularization=1e-3,
                             optimize_hyperparameters=True,
                             scale_data=True)
        pred = gp.predict(test_fp=test_features,
                          test_target=test_targets,
                          get_validation_error=True,
                          get_training_error=True)
        self.assertEqual(len(pred['prediction']), len(test_features))
        print('addition prediction:', pred['validation_error']['rmse_average'])
Exemple #55
0
def fetch_stores(data):
    store_list = []

    global tot_processed
    tot_processed += 1
    cm.dump('Processint city #%d' % tot_processed, log_name)

    next_val = 0
    while True:
        if next_val == -1:
            break

        url = data['store_url']
        param = {'location_form[services]': data['type_key'], 'location_form[countries]': data['country_id'],
                 'startNextValues': next_val, 'location_form[latitude]': data['city_lat'],
                 'location_form[longitude]': data['city_lng']}
        try:
            body = cm.get_data(url, param)
        except Exception, e:
            cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
            return []

        try:
            raw = json.loads(body)
        except Exception, e:
            cm.dump('Error in fetching stores: %s, %s' % (url, param), log_name)
            return []
Exemple #56
0
    def func(data, level):
        """
        :param data:
        :param level:
        :return: siblings
        """
        if level == 4:
            # get store details
            stores = get_store_details(data['content'], data)
            return [{'func': None, 'data': s} for s in stores]
        else:
            if level == 1:
                content = cm.get_data(data['url'])
            else:
                content = data['content']

            entries = get_entries(content, pattern[level - 1])

            def siblings_data(ent):
                # Each time when a new level is reached, a new field is added to data, and the 'content'
                # field is updated. This is returned to build new siblings.
                local_d = dict(data)
                if level == 1:
                    local_d[cm.continent_e] = ent
                elif level == 2:
                    local_d[cm.country_e] = ent
                elif level == 3:
                    local_d[cm.city_e] = ent
                local_d['content'] = entries[ent]
                return local_d

            return [{'func': lambda data: func(data, level + 1), 'data': siblings_data(ent)} for ent in entries]