Beispiel #1
0
 def __init__(self, root, local_items):
     resource.Resource.__init__(self)
     if database_type == 'mysql':
         self.db = glv.get_value(key='mysql_db')
     else:
         self.db = glv.get_value(key='sqlite_db')
     self.root = root
     self.local_items = local_items
     self.lock = lock
Beispiel #2
0
 def __init__(self, root, local_items):
     resource.Resource.__init__(self)
     if database_type == 'mysql':
         self.db = glv.get_value(key='mysql_db')
     else:
         self.db = glv.get_value(key='sqlite_db')
     self.logger = Logger(namespace='- STATUS -')
     self.RC = RuntimeCalculator(lock=lock)
     self.root = root
     self.local_items = local_items
Beispiel #3
0
 def __init__(self, lock, host='localhost', port='6800'):
     config = Config()
     self.highest_level = glv.get_value(key='top_level')
     self.db = glv.get_value(key='mysql_db')
     self.user_name = config.get('auth_username', '')
     self.user_password = config.get('auth_password', '')
     self.start_time = time.strftime("%Y %m %d %H %M %S", time.localtime())
     self.schedule_post_url = 'http://{}:{}/schedule.json'.format(
         host, port)
     self.listproject_url = 'http://{}:{}/listprojects.json'.format(
         host, port)
     self.projects = None
     self.spider_task_dic = dict()
     self.db_lock = lock
     self.ts_lock = threading.Lock()
     self._keys_set = {
         "year",
         "month",
         "day",
         "week",
         "hour",
         "minute",
         "second",
         "y",
         "m",
         "d",
         "w",
         "H",
         "M",
         "S",
     }
     self._keys_dic = {
         "y": "year",
         "m": "month",
         "d": "day",
         "w": "week",
         "H": "hour",
         "M": "minute",
         "S": "second",
     }
     self._keys_set_lis = [[y for y in x] for x in self._keys_set]
     self.CPU_THRESHOLD = 93
     self.MEMORY_THRESHOLD = 96
     self.schedule_logger = Logger(namespace='- Scheduler -')
 def __init__(self, lock, addr='localhost', port='6800'):
     config = Config()
     self.lock = lock
     self.highest_level = glv.get_value(key='top_level')
     self.user_name = config.get('auth_username', '')
     self.user_password = config.get('auth_password', '')
     self.clear_at_start = config.get('clear_up_database_when_start', 'yes')
     self.observation_times = int(config.get('observation_times', '20'))
     self.strict_mode = config.get('strict_mode', 'no')
     self.strict_degree = int(config.get('strict_degree',
                                         '4'))  # 严格模式的严格程度,取值大于零,数值越小越严格
     self.db = glv.get_value(key='mysql_db')
     self.mysql_host = config.get('mysql_host', '127.0.0.1')
     self.mysql_port = config.get('mysql_port', '3306')
     self.mysql_user = config.get('mysql_user', 'root')
     self.mysql_db = config.get('mysql_db', 'scrapydartdb')
     self.runtime_log = Logger(namespace='- Runtime Collector -')
     self.terminator_log = Logger(namespace='- TERMINATOR -')
     self.sep_time = 1 * 60  # 每次收集时间间隔 1 分钟
     self.terminator_scan_sep = 20
     self.server_port = 'http://{}:{}/'.format(addr, port)
     self.jobs_url = self.server_port + 'listjobs.json'
Beispiel #5
0
 def __init__(self, lock, host='127.0.0.1', port='6800'):
     config = Config()
     self.db = glv.get_value(key='sqlite_db')
     self.user_name = config.get('auth_username', '')
     self.user_password = config.get('auth_password', '')
     self.start_time = time.strftime("%Y %m %d %H %M %S", time.localtime())
     self.server_port = 'http://{}:{}/'.format(host, port)
     self.schedule_post_url = 'http://{}:{}/schedule.json'.format(
         host, port)
     self.listproject_url = 'http://{}:{}/listprojects.json'.format(
         host, port)
     self.spider_task_dic = dict()
     self.projects = None
     self.db_lock = lock
     self.ts_lock = threading.Lock()
     self.CPU_THRESHOLD = 93
     self.MEMORY_THRESHOLD = 96
     self.schedule_logger = Logger(namespace='- Scheduler -')
Beispiel #6
0
import sys
from datetime import datetime
from multiprocessing import cpu_count

from twisted.internet import reactor, defer, protocol, error
from twisted.application.service import Service
from twisted.python import log

from scrapydartx.utils import get_crawl_args, native_stringify_dict
from scrapydartx import __version__
from .interfaces import IPoller, IEnvironment
from .scrapydart_extend import run_extend, set_default_config
from scrapydartx import global_values as glv
lock = glv.get_value(key='lock')


class Launcher(Service):

    name = 'launcher'

    def __init__(self, config, app):
        self.processes = {}
        self.finished = []
        self.finished_to_keep = config.getint('finished_to_keep', 100)
        self.max_proc = self._get_max_proc(config)
        self.runner = config.get('runner', 'scrapydartx.runner')
        self.app = app

    def startService(self):
        for slot in range(self.max_proc):
            self._wait_for_project(slot)
Beispiel #7
0
from twisted.logger import Logger
from twisted.web import resource, static
from twisted.application.service import IServiceCollection
from scrapy.utils.misc import load_object
from .interfaces import IPoller, IEggStorage, ISpiderScheduler
from six.moves.urllib.parse import urlparse

from .config import Config
from .auth import decorator_auth
from .webservice import CustomResource
from .webtools import str_to_bytes, make_table, microsec_trunc, features, host_information, make_urls
from .websource import files
from scrapydartx import global_values as glv

lock = glv.get_value(key='lock')
config = Config()
database_type = config.get('database_type', 'sqlite')
if database_type == 'mysql':
    db_connector = glv.get_value(key='mysql_db')
    from .mysql_runtime_monitor import RuntimeCalculator
else:
    from .sqlite_runtime_monitor import RuntimeCalculator
    db_connector = glv.get_value(key='sqlite_db')

HEADER_HTML, FOOTERS_HTML, INDEX_HTML, JOBS_HTML, FEATURE_HTML, \
DOCUMENTS_HTML, STYLE_CSS, RESET_CSS, JQUERY_JS, MAIN_JS, \
MODERN_JS, VELOCITY_MIN_JS, SVG = files


class Root(resource.Resource):
Beispiel #8
0
    from cStringIO import StringIO as BytesIO
except ImportError:
    from io import BytesIO

from twisted.logger import Logger
from twisted.python import log
from collections import Counter

from .config import Config
from .utils import get_spider_list, JsonResource, UtilsCache, native_stringify_dict
from twisted.web import resource
from .auth import decorator_auth
from .webtools import get_invokes, get_ps, run_time_stats, get_psn, microsec_trunc
from .webtools import valid_date, valid_index, valid_args, valid_params, spider_dict
from scrapydartx import global_values as glv
lock = glv.get_value('lock')

config = Config()
database_type = config.get('database_type', 'sqlite')
if database_type == 'mysql':
    from .mysql_models import SpiderScheduleModel
    database_connector = glv.get_value(key='mysql_db')
else:
    from .sqlite_model import SpiderScheduleModel as SLM_SpiderScheduleModel
    database_connector = glv.get_value(key='sqlite_db')


class WsResource(JsonResource):
    def __init__(self, root):
        JsonResource.__init__(self)
        self.root = root