コード例 #1
0
ファイル: crawler.py プロジェクト: maocis/ybk
def parse_index(ex, type_, content, conf):
    text = content.decode(conf['encoding'], 'ignore')
    for values in re.compile(conf['detail'], re.DOTALL).findall(text):
        d = {
            key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip())
            for key, value in zip(conf['fields'], values)
        }
        if 'relative' in conf and not d['url'].startswith('http'):
            d['url'] = conf['relative'] + d['url']
        if Announcement.query_one({'url': d['url']}):
            continue

        if ex.abbr == '中港邮币卡':
            d['published_at'] = re.sub('<[^>]*>', '-', d['published_at'])
        if ex.abbr == '三点零':
            pa = d['published_at']
            pa = re.sub('<[^>]*>', '', pa)
            d['published_at'] = pa[2:] + '/' + pa[:2]
        d['published_at'] = parse_datetime(d['published_at']) \
            - timedelta(hours=8)
        d['exchange'] = ex._id
        d['type_'] = type_
        content = session.get(d['url'], timeout=(5, 10)).content
        d['html'] = content.decode(conf['encoding'], 'ignore')
        d['html'] = d['html'].replace(conf['encoding'], 'utf-8')
        log.info('[{exchange}]{published_at}: {title}'.format(**d))
        Announcement(d).upsert()
コード例 #2
0
ファイル: crawler.py プロジェクト: sopnic/ybk
def parse_index(ex, type_, content, conf):
    text = content.decode(conf['encoding'], 'ignore')
    for values in re.compile(conf['detail'], re.DOTALL).findall(text):
        d = {key: re.sub(r'(</?[a-zA-Z]+>|\s+)', '', value.strip())
             for key, value in zip(conf['fields'], values)}
        if 'relative' in conf and not d['url'].startswith('http'):
            d['url'] = conf['relative'] + d['url']
        if Announcement.query_one({'url': d['url']}):
            continue

        if ex.abbr == '中港邮币卡':
            d['published_at'] = re.sub('<[^>]*>', '-', d['published_at'])
        if ex.abbr == '三点零':
            pa = d['published_at']
            pa = re.sub('<[^>]*>', '', pa)
            d['published_at'] = pa[2:] + '/' + pa[:2]
        d['published_at'] = parse_datetime(d['published_at']) \
            - timedelta(hours=8)
        d['exchange'] = ex._id
        d['type_'] = type_
        content = session.get(d['url'], timeout=(5, 10)).content
        d['html'] = content.decode(conf['encoding'], 'ignore')
        d['html'] = d['html'].replace(conf['encoding'], 'utf-8')
        log.info('[{exchange}]{published_at}: {title}'.format(**d))
        Announcement(d).upsert()
コード例 #3
0
ファイル: cli.py プロジェクト: maocis/ybk
def do_cron(parser, args):
    setup_config(args)
    lockfile = '/tmp/ybk.cron.lock'
    path = pathlib.Path(lockfile)

    class doing(object):

        def __enter__(self):
            path.open('w').write('')

        def __exit__(self, type, value, traceback):
            if value:
                crawl_log.exception('出错啦')
            path.unlink()
            return True

    if not path.exists():
        with doing():
            crawl_all()

        now = datetime.utcnow() + timedelta(hours=8)
        with doing():
            if 9 <= now.hour <= 20:
                realtime_all()

        with doing():
            if now.hour == 6 and now.minute < 5:
                history_all()

        with doing():
            if 9 <= now.hour <= 20:
                # 生成所有人的历史收益记录
                ProfitLog.ensure_all_profits()

        # 更新所有交易账号的状态
        if now.hour == 22 and 30 <= now.minute <= 35:
            trade_account_all()
    else:
        crawl_log.info('已有cron在跑, 直接退出')