Example #1
0
        def load_project(_projectname, spider_path, models_path):
            """
            Load Project File to Database
            :param spider_path: project spider path
            :return:
            """
            try:
                with open(spider_path, 'rb') as fp:
                    spider_script = fp.read().decode('utf8')
                with open(models_path, 'rb') as fp:
                    models_script = fp.read().decode('utf8')
                project = Project.objects(name=_projectname).first()
                if project:
                    project.update(script=spider_script)
                    project.update(models=models_script)
                else:
                    project = Project(name=_projectname,
                                      info="",
                                      script=spider_script,
                                      models=models_script,
                                      generator_interval="60",
                                      downloader_interval="60",
                                      downloader_dispatch=1)
                    project.save()

            except Exception:
                reason = traceback.format_exc()
                raise CommandError('Failed to load project %s !, Reason: %s' % (spider_path, reason))
Example #2
0
    def _load_project(self, project_name, spider_path, models_path):
        """
        _load project
        :param project_name:
        :param spider_path:
        :param models_path:
        :return:
        """
        try:
            with open(spider_path, 'rb') as fp:
                spider_script = fp.read().decode('utf8')
            with open(models_path, 'rb') as fp:
                models_script = fp.read().decode('utf8')
            project = Project.objects(name=project_name).first()
            if project:
                project.update(script=spider_script)
                project.update(models=models_script)
            else:
                project = Project(name=project_name,
                                  info="",
                                  script=spider_script,
                                  models=models_script,
                                  generator_interval="60",
                                  downloader_interval="60",
                                  downloader_dispatch=1)
                project.save()
            message = 'Successfully load project %s !' % (project_name)
            return {"status": True, "message": message}

        except Exception:
            reason = traceback.format_exc()
            message = 'Failed to load project %s !, Reason: %s' % (spider_path,
                                                                   reason)
            return {"status": False, "message": message}
Example #3
0
    def query_projects_by_name(name):
        """
        Get Projects by Name
        :return: jobs list
        """
        name = smart_unicode(name)
        if not name:
            return
        if name == "--all":
            projects = Project.objects()
        else:
            projects = Project.objects(name=name)

        return projects
Example #4
0
    def handle(*args, **options):
        """
            Create New Projects Handler
            :param args:
            :param options:
            :return:
            """
        type = None
        for _type in options["type"]:
            if _type in ["json", "csv"]:
                type = _type
            else:
                raise CommandError("Bad Type Parameter: {0}".format(_type))

        for _projectname in options['projectname']:
            project = Project.objects(name=_projectname).first()
            if project and type == "json":
                dump = ResultDump(project)
                result = dump.dump_as_json()
                return result
            elif project and type == "csv":
                dump = ResultDump(project)
                result = dump.dump_as_csv()
                return result
            else:
                CommandError("Project does not exist!:{0}".format(
                    project.name))
Example #5
0
    def run_processor(self, project_name, task):
        """
        Create Project
        :return:
        """
        try:
            project_name = project_name.strip()
            project = Project.objects(name=project_name).first()
            if project is None:
                message = 'Project %s does not exist!' % (project_name)
                return {"status": False, "message": message}
            if project.status != 2:
                message = 'Project %s status is not Debug, please set status to DEBUG.' % (
                    project_name)
                return {"status": False, "message": message}
            else:
                from collector.utils import Processor
                processor = Processor(task=task)
                result = processor.run_processor()

                # processor = Processor(task=task["result"])
                # result = processor.run_processor()
                # print result

                message = 'Successfully run processor %s !' % (project_name)
                return {"status": True, "message": message, "task": result}

        except Exception:
            reason = traceback.format_exc()
            message = 'Failed to run project %s !, Reason: %s' % (project_name,
                                                                  reason)
            return {"status": False, "message": message}
Example #6
0
    def init_project(self, project_name):
        """
        Initialization Project to execute path
        :return:
        """
        if not os.path.exists(settings.EXECUTE_PATH):
            os.mkdir(settings.EXECUTE_PATH)
        project = Project.objects(name=project_name).first()

        try:
            project_name = project.name
            spider_script = project.script
            models_script = project.models
            _spider_path = os.path.join(settings.EXECUTE_PATH,
                                        "%s_spider.py" % (project_name))
            _models_path = os.path.join(settings.EXECUTE_PATH,
                                        "%s_models.py" % (project_name))
            execute_init = os.path.join(settings.EXECUTE_PATH, "__init__.py")

            with open(execute_init, 'w') as fp:
                fp.write("")
            with open(_spider_path, 'w') as fp:
                fp.write(spider_script.encode('utf8'))
            with open(_models_path, 'w') as fp:
                fp.write(models_script.encode('utf8'))

            message = 'Successfully init project %s !' % (project_name)
            return {"status": True, "message": message}

        except Exception:
            reason = traceback.format_exc()
            message = 'Failed to Init project %s !, Reason: %s' % (
                project_name, reason)
            return {"status": False, "message": message}
Example #7
0
 def __init__(self, project_id):
     """
     Generator Module Initialization
     :param str
     """
     self.project = Project.objects(id=project_id).first()
     InitSpider().load_spider(self.project)
     self.storage = Storage(self.project)
Example #8
0
 def __init__(self, task=None, _id=None, project_id=None):
     """
     Processor Module Initialization
     :param Json
     """
     # Debug Status
     if isinstance(task, dict):
         project_id = task.get("project")
         self.project = Project.objects(id=project_id).first()
         self.storage = Storage(self.project)
         self.task = self.storage.package_task(task=task)
     # Run Status
     elif _id and project_id:
         self.project = Project.objects(id=project_id).first()
         self.storage = Storage(self.project)
         self.task = self.storage.package_task(_id=_id)
     else:
         raise TypeError("Bad Parameters.")
Example #9
0
    def _filter_processor_projects():
        """
        Projects Filter
        :return:
        """
        _projects = Project.objects(
            status=Project.STATUS_ON).order_by('+priority')
        projects = []
        for project in _projects:
            projects.append(project)

        return projects
Example #10
0
 def dump_task_as_json_by_task_id(self, name, task_id):
     """
     Dump as Json
     :return:
     """
     name = smart_unicode(name)
     project = Project.objects(name=name).first()
     if project:
         result = self.query_task_by_id(name, task_id)
     else:
         result = None
     return result
Example #11
0
 def dump_result_as_json_by_name(self, name, page, rows):
     """
     Dump as Json
     :return:
     """
     name = smart_unicode(name)
     project = Project.objects(name=name).first()
     if project:
         result = self.query_result_by_name(name, page, rows)
     else:
         result = None
     return result
Example #12
0
    def _filter_generator_projects():
        """
        Projects Filter
        :return:
        """
        _projects = Project.objects(
            status=Project.STATUS_ON).order_by('+priority')
        projects = []
        for project in _projects:
            now = datetime.datetime.now()
            last = project.last_generator_time
            interval = int(project.generator_interval)
            if not project.last_generator_time:
                projects.append(project)
                project.update(last_generator_time=now)
                continue
            next = last + datetime.timedelta(seconds=interval)
            if next <= now:
                projects.append(project)
                project.update(last_generator_time=now)
            else:
                continue

        return projects
Example #13
0
    def edit_project_settings(self, data):
        """
        Edit Project Settings
        :return:
        """

        name = data.get("project").strip()
        project = Project.objects(name=name).first()
        if project is None:
            return {
                "status": False,
                "project": name,
                "message": "Bab Parameters",
                "code": 4002,
            }
        else:
            try:
                if data.get("group", False):
                    project.update(group=str(data.get("group").strip()))
                if data.get("timeout", False):
                    project.update(timeout=int(data.get("timeout".strip())))
                if data.get("status", False):
                    project.update(status=int(data.get("status".strip())))
                if data.get("priority", False):
                    project.update(priority=int(data.get("priority".strip())))
                if data.get("info", False):
                    project.update(info=str(data.get("info".strip())))
                if data.get("script", False):
                    project.update(
                        script=str(data.get("script".strip()).encode('utf8')))
                if data.get("interval", False):
                    project.update(generator_interval=str(
                        int(data.get("interval").strip())))
                if data.get("ip_limit", False):
                    project.update(downloader_interval=str(
                        int(data.get("ip_limit").strip())))
                if data.get("number", False):
                    project.update(
                        downloader_dispatch=int(data.get("number").strip()))

                project.update(update_datetime=datetime.datetime.now())

            except ValueError:
                return {
                    "status": False,
                    "project": name,
                    "message": "Bad Parameters",
                    "reason": traceback.format_exc(),
                    "code": 4003,
                }
            except Exception:
                return {
                    "status": False,
                    "project": name,
                    "message": "Internal Server Error",
                    "code": 5001
                }

        return {
            "status": True,
            "project": name,
            "message": "Operation Succeeded",
            "code": 2001
        }