예제 #1
0
def store2pg(ps=None, url=None, flag=None):
    '''存到pg
    cnt: 源码
    url: url
    '''
    try:
        uid, _obj = tran2pg(ps=ps, url=url, flag=flag)
        sess = session()
        sess.add(_obj)
        sess.commit()
        return uid
    except Exception as e:
        sess.rollback()
        traceback.print_exc()
        return False
    finally:
        sess.close()
예제 #2
0
def store2pg_parse(url=None,
                   author=None,
                   public_time=None,
                   page_source=None,
                   content=None,
                   website_name=None,
                   channel_name=None,
                   title=None,
                   topic=None,
                   tag=None,
                   meta_keywords=None,
                   pic=None,
                   flag=None):
    '''存到pg
    cnt: 源码
    url: url
    '''
    try:
        sess = session()
        uid, _obj = tran2pg_parse(url=url,
                                  author=author,
                                  public_time=public_time,
                                  page_source=page_source,
                                  content=content,
                                  website_name=website_name,
                                  channel_name=channel_name,
                                  title=title,
                                  topic=topic,
                                  tag=tag,
                                  meta_keywords=meta_keywords,
                                  flag=flag,
                                  pic=pic)
        sess.add(_obj)
        sess.commit()
        print('完成入库')
        return uid
    except Exception as e:
        print(e)
        sess.rollback()
        return False
    finally:
        sess.close()
    pass
예제 #3
0
"""

import __init__
import requests
import re
import time
import random
from mrq.task import Task
from mrq.job import queue_job
from mrq.context import log, traceback
from retrying import retry

from __dber.pg_client import session, session1
from __dber.pg_orm_dongfangcaifu import Jz_dongfangcaifu_PageSource, Jz_dongfangcaifu_content
from __utils.base import get_date_str, get_header, get_now_str, str_simhash, get_proxy_redis
sess = session()
sess1 = session1()


class Start(Task):
    def run(self, params):
        #初始化的时候,选用页面长度为30
        #每日增量,只要一页就可以
        url_s1 = ('http://finance.eastmoney.com/news/cjjsp_%s.html', '经济时评')
        url_s2 = ('http://finance.eastmoney.com/news/cgnjj_%s.html', '国内经济')
        url_s3 = ('http://finance.eastmoney.com/news/cgjjj_%s.html', '国际经济')
        #        for i in range(1,4):
        for i in range(1, 26):
            url1 = url_s1[0] % str(i)
            log.info('入队列 jz_cj_pagesource')
            queue_job('main_dongfangcaifu.Crawler1', {