Exemple #1
0
    logger = get_logger()
    url = BASE_URL + 'vol.' + str(num)
    logger.info('url:%s', url)
    request = urllib.request.Request(url)
    request = fill_request(request)
    html = get_html(request)
    return html


def fill_request(request):
    for k in HEADERS:
        request.add_header(k, HEADERS[k])
    return request


if __name__ == '__main__':
    logger = get_logger()
    start_day = datetime.date(2012, 10, 8)
    today = datetime.date.today()
    vol = today - start_day + datetime.timedelta(days=2)
    vol = vol.days
    logger.info('range from %d to %d', 1, vol)
    for i in range(1, vol):
        logger.info('begin get vol %d', i)
        html = get_one_journal(i)
        if html is None:
            logger.error('get html error')
        else:
            insert_journal_src(i, html)
        time.sleep(5)
Exemple #2
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-


import datetime
from loggingutil import get_logger
from gethtml import get_one_journal
from dbutil import insert_journal_src
from extractdata import get_one_journal_data


if __name__ == '__main__':
    start_day = datetime.date(2012, 10, 8)
    today = datetime.date.today()
    vol = today - start_day + datetime.timedelta(days=1)
    vol = vol.days
    html = get_one_journal(vol)
    res = insert_journal_src(vol, html)
    if res == False:
        logger = get_logger()
        logger.error('persist data error: vol:%d', vol)
    else:
        get_one_journal_data(vol)