def load_module(self, fullname):
        if self.mod is None:
            mod = self.mod = imp.new_module(self.name)
        else:
            mod = self.mod

        log_buffer = []
        mod.logging = mod.logger = logging.Logger(self.name)
        handler = SaveLogHandler(log_buffer)
        handler.setFormatter(LogFormatter(color=False))
        mod.logger.addHandler(handler)
        mod.log_buffer = log_buffer
        mod.__file__ = '<%s>' % self.name
        mod.__loader__ = self
        mod.__project__ = self.project
        mod.__package__ = ''

        code = self.get_code(fullname)
        six.exec_(code, mod.__dict__)
        linecache.clearcache()

        if '__handler_cls__' not in mod.__dict__:
            BaseHandler = mod.__dict__.get('BaseHandler', base_handler.BaseHandler)
            for each in list(six.itervalues(mod.__dict__)):
                if inspect.isclass(each) and each is not BaseHandler \
                        and issubclass(each, BaseHandler):
                    mod.__dict__['__handler_cls__'] = each

        return mod
Esempio n. 2
0
    def load_module(self, fullname):
        if self.mod is None:
            mod = self.mod = imp.new_module(self.name)
        else:
            mod = self.mod

        mod.__file__ = '<%s>' % self.name
        mod.__loader__ = self
        mod.__project__ = self.project
        mod.__package__ = ''
        # logger inject
        log_buffer = []
        mod.logging = mod.logger = logging.Logger(self.name)
        handler = SaveLogHandler(log_buffer)
        handler.setFormatter(LogFormatter(color=False))
        mod.logger.addHandler(handler)
        mod.log_buffer = log_buffer

        code = self.get_code(fullname)
        six.exec_(code, mod.__dict__)
        linecache.clearcache()

        if '__handler_cls__' not in mod.__dict__:
            BaseHandler = mod.__dict__.get('BaseHandler', base_handler.BaseHandler)
            for each in list(six.itervalues(mod.__dict__)):
                if inspect.isclass(each) and each is not BaseHandler \
                        and issubclass(each, BaseHandler):
                    mod.__dict__['__handler_cls__'] = each

        return mod
Esempio n. 3
0
    def build_module(project, env=None):
        '''Build project script as module'''
        from pyspider.libs import base_handler
        assert 'name' in project, 'need name of project'
        assert 'script' in project, 'need script of project'

        if env is None:
            env = {}
        # fix for old non-package version scripts
        pyspider_path = os.path.join(os.path.dirname(__file__), "..")
        if pyspider_path not in sys.path:
            sys.path.insert(1, pyspider_path)

        env = dict(env)
        env.update({
            'debug': project.get('status', 'DEBUG') == 'DEBUG',
        })

        loader = ProjectLoader(project)
        module = loader.load_module(project['name'])

        # logger inject
        module.log_buffer = []
        module.logging = module.logger = logging.Logger(project['name'])
        if env.get('enable_stdout_capture', True):
            handler = SaveLogHandler(module.log_buffer)
            handler.setFormatter(LogFormatter(color=False))
        else:
            handler = logging.StreamHandler()
            handler.setFormatter(LogFormatter(color=True))
        module.logger.addHandler(handler)

        if '__handler_cls__' not in module.__dict__:
            BaseHandler = module.__dict__.get('BaseHandler', base_handler.BaseHandler)
            for each in list(six.itervalues(module.__dict__)):
                if inspect.isclass(each) and each is not BaseHandler \
                        and issubclass(each, BaseHandler):
                    module.__dict__['__handler_cls__'] = each
        _class = module.__dict__.get('__handler_cls__')
        assert _class is not None, "need BaseHandler in project module"

        instance = _class()
        instance.__env__ = env
        instance.project_name = project['name']
        instance.project = project

        return {
            'loader': loader,
            'module': module,
            'class': _class,
            'instance': instance,
            'exception': None,
            'exception_log': '',
            'info': project,
            'load_time': time.time(),
        }
Esempio n. 4
0
    def load_module(self, fullname):
        if self.mod is None:
            mod = self.mod = imp.new_module(self.name)
        else:
            mod = self.mod

        log_buffer = []
        mod.logging = mod.logger = logging.Logger(self.name)
        mod.logger.addHandler(SaveLogHandler(log_buffer))
        mod.log_buffer = log_buffer
        mod.__file__ = '<%s>' % self.name
        mod.__loader__ = self
        mod.__project__ = self.project
        mod.__package__ = ''

        code = self.get_code(fullname)
        exec code in mod.__dict__
        linecache.clearcache()

        if '__handler_cls__' not in mod.__dict__:
            BaseHandler = mod.__dict__.get('BaseHandler',
                                           base_handler.BaseHandler)
            for each in mod.__dict__.values():
                if inspect.isclass(each) and each is not BaseHandler \
                        and issubclass(each, BaseHandler):
                    mod.__dict__['__handler_cls__'] = each

        return mod
Esempio n. 5
0
    def build_module(project, env=None):
        '''Build project script as module'''
        from pyspider.libs import base_handler
        assert 'name' in project, 'need name of project'
        assert 'script' in project, 'need script of project'

        if env is None:
            env = {}
        # fix for old non-package version scripts
        pyspider_path = os.path.join(os.path.dirname(__file__), "..")
        if pyspider_path not in sys.path:
            sys.path.insert(1, pyspider_path)

        env = dict(env)
        env.update({
            'debug': project.get('status', 'DEBUG') == 'DEBUG',
        })

        loader = ProjectLoader(project)
        module = loader.load_module(project['name'])

        # logger inject
        module.log_buffer = []
        module.logging = module.logger = logging.Logger(project['name'])
        if env.get('enable_stdout_capture', True):
            handler = SaveLogHandler(module.log_buffer)
            handler.setFormatter(LogFormatter(color=False))
        else:
            handler = logging.StreamHandler()
            handler.setFormatter(LogFormatter(color=True))
        module.logger.addHandler(handler)

        if '__handler_cls__' not in module.__dict__:
            BaseHandler = module.__dict__.get('BaseHandler',
                                              base_handler.BaseHandler)
            for each in list(six.itervalues(module.__dict__)):
                if inspect.isclass(each) and each is not BaseHandler \
                        and issubclass(each, BaseHandler):
                    module.__dict__['__handler_cls__'] = each
        _class = module.__dict__.get('__handler_cls__')
        assert _class is not None, "need BaseHandler in project module"

        instance = _class()
        instance.__env__ = env
        instance.project_name = project['name']
        instance.project = project

        return {
            'loader': loader,
            'module': module,
            'class': _class,
            'instance': instance,
            'exception': None,
            'exception_log': '',
            'info': project,
            'load_time': time.time(),
        }