Ejemplo n.º 1
0
 def run(self):
     try:
         delay = self.settings.DELAY
         limit = self.settings.LIMIT
         depth = self.settings.DEPTH
         if not isinstance(delay, int) and delay <= 0:
             spider_log.critical("settings.DELAY should be a positive integer")
             raise ValueError
         if not isinstance(limit, int) and limit <= 0:
             spider_log.critical("settings.LIMIT should be a positive integer")
             raise ValueError
         if not isinstance(depth, int) and depth <= 0:
             spider_log.critical("settings.DEPTH should be a positive integer")
             raise ValueError
         io_loop = ioloop.IOLoop.current()
         ioloop.PeriodicCallback(worker, delay).start()
         io_loop.start()
     except AttributeError:
         spider_log.critical("settings.DELAY not found")
     except ValueError:
         pass
Ejemplo n.º 2
0
# -*- coding: utf-8 -*-
import datetime
import sys

import MySQLdb

import settings
from spider.log import spider_log

try:
    db_setting = settings.DATABASE
except AttributeError:
    spider_log.critical("settings.DATABASE not found!")
    sys.exit(-1)

host = db_setting["host"]
port = db_setting["port"]
user = db_setting["user"]
password = db_setting["password"]
database_name = db_setting["database_name"]

# a global variable of db connect
spider_session = MySQLdb.connect(host=host, port=port, user=user, passwd=password, db=database_name, charset="utf8")
# a global variable to store url hash
hash_list = list()


class SpiderResult(object):
    """A simple class to store spider result

    """