예제 #1
0
def feed_revisit(pool, dbi=None):
    """重下載必要的新聞,仿造 Base Ctlr :: dispatch_rss_2_0 meta
  並轉由 dispatch_response 處理

  @see db.list_revisits()
  @startpoint
  """
    import json
    import importlib

    from lib import db, DB, logger
    from lib.util.dt import to_timestamp

    if dbi is None: _dbi = DB()
    else: _dbi = dbi

    ctlr_cache = {}

    i_created_on = 0
    i_last_seen_on = 1
    i_pub_ts = 2
    i_feed_url = 3
    i_canonical_url = 4
    i_title = 5
    i_meta = 6
    i_ctlr = 7

    # logger.info("Found %d articles to revisit" % len(revisit_list))

    for x in db.list_recent_fetches(revisit_max_m(), dbi=dbi):
        expired = need_revisit(x[i_created_on], x[i_last_seen_on])
        if (not expired):
            continue

        if (x[i_ctlr] not in ctlr_cache):
            (ns, cn) = x[i_ctlr].rsplit('.', 1)
            module = importlib.import_module(ns)
            ctlr_cache[x[i_ctlr]] = getattr(module, cn)()

        ctlr = ctlr_cache[x[i_ctlr]]
        meta = json.loads(x[i_meta])

        meta['feed_url'] = x[i_feed_url]
        meta['pub_date'] = to_timestamp(x[i_pub_ts])
        meta['title'] = x[i_title]

        logger.info('Revisiting %s, expired for %d min',
                    x[i_canonical_url],
                    expired,
                    extra={'classname': feed_revisit})
        pool.log_stats('with_revisit')
        pool.put("http://" + x[i_canonical_url],
                 ctlr.dispatch_response,
                 category="revisit",
                 meta=meta)

    if dbi is None: _dbi.disconnect()
예제 #2
0
def cleanup(dbi=None):
    from lib import DB

    if dbi is None: _dbi = DB()
    else: _dbi = dbi

    _article(_dbi)
    _ctlr_feed(_dbi)

    if dbi is None: _dbi.disconnect()
예제 #3
0
def cleanup(dbi = None):
  from lib import DB

  if dbi is None: _dbi = DB()
  else: _dbi = dbi

  _article(_dbi)
  _ctlr_feed(_dbi)

  if dbi is None: _dbi.disconnect()
예제 #4
0
 def setup_database(self):
     if self.DB_READONLY:
         self.con = DB.StorageDB(connections['readonly'])
     else:
         self.con = DB.StorageDB(connections['default'])
     self.cur = self.con.cursor()
     self.db_IntegrityError = IntegrityError
     self.db_ProgrammingError = ProgrammingError
     self.db_OperationalError = OperationalError
     self.db_DataError = DataError
예제 #5
0
def feed_catchup(pool, dbi=None):
    from lib import DB

    if dbi is None:
        _dbi = DB()
    else:
        _dbi = dbi

    if dbi is None:
        _dbi.disconnect()
예제 #6
0
def feed_revisit(pool, dbi=None):
    """重下載必要的新聞,仿造 Base Ctlr :: dispatch_rss_2_0 meta
  並轉由 dispatch_response 處理

  @see db.list_revisits()
  @startpoint
  """
    import json
    import importlib

    from lib import db, DB, logger
    from lib.util.dt import to_timestamp

    if dbi is None:
        _dbi = DB()
    else:
        _dbi = dbi

    ctlr_cache = {}

    i_created_on = 0
    i_last_seen_on = 1
    i_pub_ts = 2
    i_feed_url = 3
    i_canonical_url = 4
    i_title = 5
    i_meta = 6
    i_ctlr = 7

    # logger.info("Found %d articles to revisit" % len(revisit_list))

    for x in db.list_recent_fetches(revisit_max_m(), dbi=dbi):
        expired = need_revisit(x[i_created_on], x[i_last_seen_on])
        if not expired:
            continue

        if x[i_ctlr] not in ctlr_cache:
            (ns, cn) = x[i_ctlr].rsplit(".", 1)
            module = importlib.import_module(ns)
            ctlr_cache[x[i_ctlr]] = getattr(module, cn)()

        ctlr = ctlr_cache[x[i_ctlr]]
        meta = json.loads(x[i_meta])

        meta["feed_url"] = x[i_feed_url]
        meta["pub_date"] = to_timestamp(x[i_pub_ts])
        meta["title"] = x[i_title]

        logger.info("Revisiting %s, expired for %d min", x[i_canonical_url], expired, extra={"classname": feed_revisit})
        pool.log_stats("with_revisit")
        pool.put("http://" + x[i_canonical_url], ctlr.dispatch_response, category="revisit", meta=meta)

    if dbi is None:
        _dbi.disconnect()
예제 #7
0
def fetch(payload, dbi=None):
    """抓取 payload['url'] 的檔案
  並將最終讀取到的 url 寫入 payload['url_read'], response 寫入 payload['src']
  """
    import re
    from lxml.html import fromstring

    from lib import db, DB, logger
    from lib.util.text import to_unicode

    extra = {'classname': 'util.net.fetch()'}

    try:
        uo = urlopen(payload['url'], timeout=HTTP_TIMEOUT)
        if (uo.code != 200):
            raise IOError("HTTP response code=%d from %s" % (uo.code, uo.url))

        portal = get_portal(uo.url)
        if portal:
            break_portal(portal, payload, uo)
        else:
            payload['src'] = uo.read()
            payload['url_read'] = uo.url
    except Exception as e:
        # 抓取出錯,留待記錄 (save_fetch)
        payload['src'] = 'error ' + unicode(e)
        payload['category'] = 'error'
        payload['exception'] = e

    if 'url_read' not in payload:
        payload['url_read'] = payload['url']

    if dbi is None: _dbi = DB()
    else: _dbi = dbi

    try:
        db.save_fetch(payload['url'],
                      to_unicode(payload['src']),
                      payload['category'],
                      dbi=_dbi)
    except Exception as e:
        logger.warning('DB save_fetch failed for url %s' % payload['url'],
                       extra=extra)
        logger.debug(e)

    if dbi is None: _dbi.disconnect()

    if 'error' == payload['category']:
        # raise the exception to skip the parsing process
        logger.info("failed fetching %s" % payload['url'], extra=extra)
        raise payload['exception']

    return payload
예제 #8
0
파일: fetch.py 프로젝트: clifflu/news-diff
def feed_fetch(pool, ctlr_list, dbi = None):
  """透過 Ctlr_Base_RSS 抓取新文章"""
  import importlib
  from lib import DB

  if dbi is None: _dbi = DB()
  else: _dbi = dbi

  for pkg in ctlr_list:
    module = importlib.import_module('ctlr.%s' % pkg)
    for ctlr in module.Ctlrs:
      ctlr().feed(pool, dbi = _dbi)

  if dbi is None: _dbi.disconnect()
예제 #9
0
def feed_fetch(pool, ctlr_list, dbi=None):
    """透過 Ctlr_Base_RSS 抓取新文章"""
    import importlib
    from lib import DB

    if dbi is None: _dbi = DB()
    else: _dbi = dbi

    for pkg in ctlr_list:
        module = importlib.import_module('ctlr.%s' % pkg)
        for ctlr in module.Ctlrs:
            ctlr().feed(pool, dbi=_dbi)

    if dbi is None: _dbi.disconnect()
예제 #10
0
파일: net.py 프로젝트: dehao/news-diff
def fetch(payload, dbi = None):
  """抓取 payload['url'] 的檔案
  並將最終讀取到的 url 寫入 payload['url_read'], response 寫入 payload['src']
  """
  import re
  from lxml.html import fromstring

  from lib import db, DB, logger
  from lib.util.text import to_unicode

  extra = {'classname': 'util.net.fetch()'}

  try:
    uo = urllib.urlopen(payload['url'])
    if (uo.code != 200):
      raise IOError("HTTP response code=%d from %s" % (uo.code, uo.url))

    portal = get_portal(uo.url)
    if portal:
      break_portal(portal, payload, uo)
    else:
      payload['src'] = uo.read()
      payload['url_read'] = uo.url
  except Exception as e:
    # 抓取出錯,留待記錄 (save_fetch)
    payload['src'] = 'error ' + unicode(e)
    payload['category'] = 'error'
    payload['exception'] = e

  if 'url_read' not in payload:
    payload['url_read'] = payload['url']

  if dbi is None: _dbi = DB()
  else: _dbi = dbi
  
  try:
    db.save_fetch(payload['url'], to_unicode(payload['src']), payload['category'], dbi = _dbi)
  except Exception as e:
    logger.warning('DB save_fetch failed for url %s' % payload['url'], extra=extra)
    logger.debug(e)
  
  if dbi is None: _dbi.disconnect()

  if 'error' == payload['category']:
    # raise the exception to skip the parsing process
    logger.warning("failed fetching %s" % payload['url'], extra=extra)
    raise payload['exception']

  return payload
예제 #11
0
    def __init__(self, pool):
        from lib import DB, logger
        Thread.__init__(self)

        logger.info('initiated', extra={'classname': self.__class__})

        self.pool = pool
        self.dbi = DB()
예제 #12
0
 def __init__(self, callback, db=DB.DB()):
     self.streamer = Streamer.Streamer(callback)
     self.db = db
     self.ind = Indicator.Indicator(self.db)
     self.sig = Signal.Signal(self.db)
     self.strat = Strategy.Strategy(self.db)
     self.runAsyncTasks = True
     self.pw = PyWhale.PyWhale()
     self.pw.verbose = False
     for strategy in Helper.getAvailableStrategies():
         self.strategies[strategy] = False
     self.log = logging.getLogger(__name__)
     self.fh = logging.FileHandler('log/trader.log')
     self.fh.setLevel(logging.DEBUG)
     self.sh = logging.StreamHandler(sys.stderr)
     self.sh.setLevel(logging.DEBUG)
     self.log.addHandler(self.sh)
     self.log.addHandler(self.fh)
     logging.basicConfig(level=logging.DEBUG, handlers=[self.fh, self.sh])
예제 #13
0
파일: main.py 프로젝트: panickat/btc_note
from lib import User, DB
"""
Pendiente de optimizar:
    1. En un lapso de un minuto usar live_usd last_usd contenidos en el objeto User (sin usar request)
    2. preguntar si hacer request al inicializar un Usuario, si ha pasado un lapso(minuto) 
    desde la ultima vez que se llamo a request

- la proxima ves que se llame a User se puede omitir la db
"""

db = DB()
user = User(name="cris", db=db)
 def __init__(self, db=DB.DB()):
     self.db = db
     self.ind = Indicator.Indicator(self.db)
     self.sig = Signal.Signal(self.db)
예제 #15
0
 def __init__(self, db=DB.DB()):
     self.db = db
     self.ind = Indicator.Indicator(db)
#!/usr/bin/python3
from lib import DB
from datetime import datetime
db_conn = DB.DbConn()

sql = """ INSERT INTO student
            (name, address_line_1, address_line_2, postcode, when_created)
            VALUES
            (%s, %s, %s, %s, %s)"""

# open and read the file line by line
lines = open("student.txt", "r")

for line in lines:
    line = line.strip()
    line = line.strip("|")
    lineData = line.split('|')
    name = lineData[0]
    addressLine1 = lineData[1]
    addressLine2 = lineData[2]
    postCode = lineData[3]
    print('Saving: ' + name)

    sqlValue = (name, addressLine1, addressLine2, postCode, datetime.now())
    db_conn.get_cursor().execute(sql, sqlValue)

db_conn.commit().close_cursor().close_db_connection()