Ejemplo n.º 1
0
def parse_index(ex, type_, content, conf):
    text = content.decode(conf['encoding'], 'ignore')
    for values in re.compile(conf['detail'], re.DOTALL).findall(text):
        d = {key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip())
             for key, value in zip(conf['fields'], values)}
        if 'relative' in conf and not d['url'].startswith('http'):
            d['url'] = conf['relative'] + d['url']
        if Announcement.query_one({'url': d['url']}):
            continue

        if ex.abbr == '中港邮币卡':
            d['published_at'] = re.sub('<[^>]*>', '-', d['published_at'])
        if ex.abbr == '三点零':
            pa = d['published_at']
            pa = re.sub('<[^>]*>', '', pa)
            d['published_at'] = pa[2:] + '/' + pa[:2]
        d['published_at'] = parse_datetime(d['published_at']) \
            - timedelta(hours=8)
        d['exchange'] = ex._id
        d['type_'] = type_
        content = session.get(d['url'], timeout=(5, 10)).content
        d['html'] = content.decode(conf['encoding'], 'ignore')
        d['html'] = d['html'].replace(conf['encoding'], 'utf-8')
        log.info('[{exchange}]{published_at}: {title}'.format(**d))
        Announcement(d).upsert()
Ejemplo n.º 2
0
Archivo: parse.py Proyecto: maocis/ybk
def parse():
    nav = 'parse'
    url = request.args.get('url')
    num_parsed = Announcement.count({
        'parsed': True,
        'type_': {
            '$in': ['offer', 'result']
        }
    })
    num_total = Announcement.count({'type_': {'$in': ['offer', 'result']}})
    if url:
        announcement = Announcement.query_one({'url': url})
        colls = list(Collection.query({'from_url': url}))
        for coll in colls:
            if coll.offers_at:
                coll.offers_at2 = coll.offers_at.strftime('%Y%m%d')
            if coll.offer_cash_ratio:
                coll.offer_cash_ratio = '{:2.0f}%'.format(
                    coll.offer_cash_ratio * 100)
            if coll.offer_price:
                coll.offer_price = str(coll.offer_price)
                if coll.offer_price.endswith('.0'):
                    coll.offer_price = coll.offer_price[:-2]
    all_done = num_parsed == num_total
    return render_template('admin/parse.html', **locals())
Ejemplo n.º 3
0
def parse_index(ex, type_, content, conf):
    text = content.decode(conf['encoding'], 'ignore')
    for values in re.compile(conf['detail'], re.DOTALL).findall(text):
        d = {
            key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip())
            for key, value in zip(conf['fields'], values)
        }
        if 'relative' in conf and not d['url'].startswith('http'):
            d['url'] = conf['relative'] + d['url']
        if Announcement.query_one({'url': d['url']}):
            continue

        if ex.abbr == '中港邮币卡':
            d['published_at'] = re.sub('<[^>]*>', '-', d['published_at'])
        if ex.abbr == '三点零':
            pa = d['published_at']
            pa = re.sub('<[^>]*>', '', pa)
            d['published_at'] = pa[2:] + '/' + pa[:2]
        d['published_at'] = parse_datetime(d['published_at']) \
            - timedelta(hours=8)
        d['exchange'] = ex._id
        d['type_'] = type_
        content = session.get(d['url'], timeout=(5, 10)).content
        d['html'] = content.decode(conf['encoding'], 'ignore')
        d['html'] = d['html'].replace(conf['encoding'], 'utf-8')
        log.info('[{exchange}]{published_at}: {title}'.format(**d))
        Announcement(d).upsert()
Ejemplo n.º 4
0
Archivo: parse.py Proyecto: sopnic/ybk
def parse_findone():
    announcement = Announcement.query_one(
        {"parsed": False, "type_": {"$in": ["offer", "result"]}}, sort=[("published_at", -1)]
    )
    if announcement:
        return redirect(url_for("admin.parse", url=announcement.url))
    else:
        return redirect(url_for("admin.parse"))
Ejemplo n.º 5
0
Archivo: parse.py Proyecto: maocis/ybk
def parse_findone():
    announcement = Announcement.query_one(
        {
            'parsed': False,
            'type_': {
                '$in': ['offer', 'result']
            }
        },
        sort=[('published_at', -1)])
    if announcement:
        return redirect(url_for('admin.parse', url=announcement.url))
    else:
        return redirect(url_for('admin.parse'))
Ejemplo n.º 6
0
Archivo: parse.py Proyecto: sopnic/ybk
def parse():
    nav = "parse"
    url = request.args.get("url")
    num_parsed = Announcement.count({"parsed": True, "type_": {"$in": ["offer", "result"]}})
    num_total = Announcement.count({"type_": {"$in": ["offer", "result"]}})
    if url:
        announcement = Announcement.query_one({"url": url})
        colls = list(Collection.query({"from_url": url}))
        for coll in colls:
            if coll.offers_at:
                coll.offers_at2 = coll.offers_at.strftime("%Y%m%d")
            if coll.offer_cash_ratio:
                coll.offer_cash_ratio = "{:2.0f}%".format(coll.offer_cash_ratio * 100)
            if coll.offer_price:
                coll.offer_price = str(coll.offer_price)
                if coll.offer_price.endswith(".0"):
                    coll.offer_price = coll.offer_price[:-2]
    all_done = num_parsed == num_total
    return render_template("admin/parse.html", **locals())