예제 #1
0
    def __init__(self,
                 url: str,
                 method: str = 'GET',
                 *,
                 callback=None,
                 metadata: dict = None,
                 request_config: dict = None,
                 request_session=None,
                 res_type: str = 'text',
                 **kwargs):
        # TODO: cookie
        """
        Initialization parameters
        """
        self.url = url
        self.method = method.upper()
        if self.method not in self.METHOD:
            raise ValueError('%s method is not supported' % self.method)

        self.callback = callback
        self.metadata = metadata if metadata is not None else {}
        self.request_session = request_session
        if request_config is None:
            self.request_config = self.REQUEST_CONFIG
        else:
            self.request_config = request_config
        self.res_type = res_type
        self.kwargs = kwargs

        self.close_request_session = False
        self.logger = get_logger(name=self.name)
        self.retry_times = self.request_config.get('RETRIES', 3)
예제 #2
0
    def __init__(self, url: str, method: str = 'GET', *,
                 callback=None,
                 headers: dict={},
                 load_js: bool=False,
                 metadata: dict={},
                 request_config: dict={},
                 request_session=None,
                 res_type: str='text',
                 **kwargs
                 ):
        self.url = url
        self.method = method
        if self.method not in self.METHOD:
            raise ValueError('{} method is not supported ~~~'.format(self.method))

        self.callback = callback
        self.headers = headers
        self.load_js = load_js
        self.metadata = metadata
        self.request_session = request_session

        if not request_config:
            self.request_config = self.REQUEST_CONFIG
        else:
            self.request_config = request_config

        self.res_type = res_type
        self.kwargs = kwargs
        self.close_request_session = False
        self.logger = get_logger(name=self.name)
        self.retry_times = self.request_config.get('RETRIES', 15)
예제 #3
0
 def __init__(self, loop=None):
     if not self.start_urls or not isinstance(self.start_urls, list):
         raise ValueError(
             "Spider must have a param named start_urls, eg: start_urls = ['https://www.github.com']"
         )
     self.logger = get_logger(name=self.name)
     self.loop = loop or asyncio.get_event_loop()
예제 #4
0
파일: spider.py 프로젝트: lfykid/aspider
 def __init__(self, loop=None):
     if not self.start_urls or not isinstance(self.start_urls, list):
         raise ValueError(
             "Spider must have a param named start_urls, eg: start_urls = ['https://www.github.com']"
         )
     self.logger = get_logger(name=self.name)
     self.loop = loop or asyncio.new_event_loop()
     asyncio.set_event_loop(self.loop)
     self.request_queue = asyncio.Queue()
     self.sem = asyncio.Semaphore(getattr(self, 'concurrency', 3))
예제 #5
0
    def __init__(self, middleware, loop=None):
        if not self.start_urls or not isinstance(self.start_urls, list):
            raise ValueError("Spider must have a param start_urls")

        self.logger = get_logger(name=self.name)
        self.loop = loop or asyncio.new_event_loop()
        asyncio.set_event_loop(self.loop)

        # customize middleware
        if isinstance(middleware, list):
            self.middleware = reduce(lambda x, y: x + y, middleware)
        else:
            self.middleware = middleware or Middleware()

        # async queue
        self.request_queue = asyncio.Queue()

        # semaphore
        self.sem = asyncio.Semaphore(getattr(self, 'concurrency', 3))
예제 #6
0
#!/usr/bin/env python

import os

from importlib import util

from aspider.utils import get_logger

log = get_logger('settings')


class SettingsWrapper(object):
    """
    SettingsWrapper returns a spider config
    """
    def __init__(self, settings_name='settings.py'):
        self.my_settings = {}
        self.settings_name = settings_name
        self._load_settings()

    def __call__(self):
        return self.my_settings

    def settings(self):
        return self.my_settings

    def load_with_file(self, file_path):
        file_name = os.path.basename(file_path)
        if file_name[-3:] != '.py':
            log.error("module name must be python file, such as : example.py")
        module_spec = util.spec_from_file_location(file_name, file_path)