コード例 #1
0
def delete_version(project, client, version):
    url = 'http://{ip}:{port}'.format(ip=client.ip, port=client.port)
    try:
        scrapyd = ScrapydAPI(url)
        result = scrapyd.delete_version(project.name, version)
        return True if result else False
    except (ConnectionError, InvalidURL):
        return False
コード例 #2
0
ファイル: views.py プロジェクト: jinuoA/spider
def remove_depody_spider(request, client_id, project, version_name):
    if request.method == 'POST':
        node = Node.objects.get(id=client_id)
        scrapyd = ScrapydAPI(scrapyd_url(node.node_ip, node.node_port))
        try:
            spider = scrapyd.delete_version(project, version_name)
            return JsonResponse(spider)
        except ConnectionError:
            return JsonResponse({'message': 'Connect Error'}, status=500)
コード例 #3
0
ファイル: crawler.py プロジェクト: Alexoner/sloth
def delete_version(project, version, url=DEFAULT_URL):
    scrapyd = ScrapydAPI(url)
    return scrapyd.delete_version(project, version)
コード例 #4
0
def delete_version(project, version, url=DEFAULT_URL):
    scrapyd = ScrapydAPI(url)
    return scrapyd.delete_version(project, version)
コード例 #5
0
class Schedular:
    def __init__(self):
        self._scrapyd = None
        try:
            self._scrapyd = ScrapydAPI('http://{}:{}'.format(
                config['Scrapyd']['host'], config['Scrapyd']['port']))
        except KeyError as e:
            logger.error("{}: No such key exists - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to create a scrapyd object - {}".format(
                class_fullname(e), str(e)))

    def addversion(self,
                   project,
                   version,
                   egg_filename='pricewatch_bot-0.0.1-py3.7.egg'):
        """ Scrapyd API: addversion - https://scrapyd.readthedocs.io/en/stable/api.html#addversion-json
        """
        if not self._scrapyd:
            logger.error(
                "No scrapyd object find. Unable to add a new version.")
            return None
        num_of_spiders = None
        try:
            with open(os.path.join(settings.APP_DIST_DIRPATH, egg_filename),
                      'rb') as egg:
                num_of_spiders = self._scrapyd.add_version(
                    project, version, egg)
        except FileNotFoundError as e:
            logger.error("{}: {}".format(class_fullname(e), str(e)))
        except ScrapydResponseError as e:
            logger.error("{}: Response error - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to add a version - {}".format(
                class_fullname(e), str(e)))
        else:
            logger.info(
                "version '{}' for project '{}' added/updated - {} spider(s)".
                format(project, version, num_of_spiders))
            # call API to create a version
            response = requests.post(
                'http://{}:{}/api/schedule/version/'.format(
                    config['PriceWatchWeb']['host'],
                    config['PriceWatchWeb']['port']),
                json={
                    'project': project,
                    'version': version,
                    'status': settings.SCHEDULES_VERSION_STATUS_ADDED,
                    'added_at': str(datetime.now()),
                    'deleted_at': None,
                })
            if not response.ok:
                logger.error(
                    "{} HTTP Error: Failed to add a version - {} - {}".format(
                        response.status_code, response.reason, response.text))
        finally:
            return num_of_spiders

    def schedule(self, project, spider, **kwargs):
        if not self._scrapyd:
            logger.error("No scrapyd object find. Unable to schedule a job.")
            return None
        _jobid = str(uuid.uuid4())
        kwargs['jobid'] = _jobid  # a scrapyd parameter
        kwargs['job_id'] = _jobid  # passing to a spider
        try:
            _s = None  # scrapy settings in dict. eg {'DOWNLOAD_DELAY': 2}
            jobid = self._scrapyd.schedule(project,
                                           spider,
                                           settings=_s,
                                           **kwargs)
        except ScrapydResponseError as e:
            logger.error("{}: Response error - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to schedule a job - {}".format(
                class_fullname(e), str(e)))
        else:
            if jobid != _jobid:
                logger.error(
                    "{}: Invalid jobid [enteredid vs returnedid] [{} vs {}] - {}"
                    .format(class_fullname(e), _jobid, jobid, str(e)))
            else:
                logger.info(
                    "new scheduled job '{}' for project '{}', spider '{}' has been set"
                    .format(jobid, project, spider))
                # call API to create a job
                response = requests.post(
                    'http://{}:{}/api/schedule/job/'.format(
                        config['PriceWatchWeb']['host'],
                        config['PriceWatchWeb']['port']),
                    json={
                        'job_id': jobid,
                        'project': project,
                        'spider': spider,
                        'version': kwargs.pop('_version', None),
                        'settings': _s,
                        'other_params': kwargs,
                        'status': settings.SCHEDULES_JOB_STATUS_PENDING,
                    })
                if not response.ok:
                    logger.error(
                        "{} HTTP Error: Failed to add a new job - {} - {}".
                        format(response.status_code, response.reason,
                               response.text))
        finally:
            return jobid

    def listjobs(self, project):
        if not self._scrapyd:
            logger.error("No scrapyd object find. Unable to list jobs.")
            return None
        jobs = None
        try:
            jobs = self._scrapyd.list_jobs(project)
        except ScrapydResponseError as e:
            logger.error("{}: Response error - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to list jobs - {}".format(
                class_fullname(e), str(e)))
        else:
            logger.info("list of jobs for project '{}' - {}".format(
                project, str(jobs)))
            self._store_jobs(project, jobs)
        finally:
            return jobs

    def _store_jobs(self, project, jobs):
        """ parse jobs and store information into db
        """
        if all(_j in jobs for _j in ['running', 'finished']):
            for x in jobs['running']:
                # call API to update a running job
                response = requests.put(
                    'http://{}:{}/api/schedule/job/{}/'.format(
                        config['PriceWatchWeb']['host'],
                        config['PriceWatchWeb']['port'], x['id']),
                    json={
                        'job_id': x['id'],
                        'project': project,
                        'spider': x['spider'],
                        'start_time': x['start_time'],
                        'status': settings.SCHEDULES_JOB_STATUS_RUNNING,
                    })
                if not response.ok:
                    logger.error(
                        "{} HTTP Error: Failed to update a running job - {} - {}"
                        .format(response.status_code, response.reason,
                                response.text))
            for x in jobs['finished']:
                # call API to update a finished job
                response = requests.put(
                    'http://{}:{}/api/schedule/job/{}/'.format(
                        config['PriceWatchWeb']['host'],
                        config['PriceWatchWeb']['port'], x['id']),
                    json={
                        'job_id': x['id'],
                        'project': project,
                        'spider': x['spider'],
                        'start_time': x['start_time'],
                        'end_time': x['end_time'],
                        'status': settings.SCHEDULES_JOB_STATUS_FINISHED,
                    })
                if not response.ok:
                    logger.error(
                        "{} HTTP Error: Failed to update a finished job - {} - {}"
                        .format(response.status_code, response.reason,
                                response.text))

    def delversion(self, project, version):
        """ delversion
        """
        if not self._scrapyd:
            logger.error("No scrapyd object find. Unable to delete version.")
            return False
        deleted = False
        try:
            deleted = self._scrapyd.delete_version(project, version)
        except ScrapydResponseError as e:
            logger.error("{}: Response error - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to delete version - {}".format(
                class_fullname(e), str(e)))
        else:
            logger.info(
                "successfully deleted project '{}' version '{}'".format(
                    project, version))
            # update deleted version
            response = requests.put(
                'http://{}:{}/api/schedule/version/'.format(
                    config['PriceWatchWeb']['host'],
                    config['PriceWatchWeb']['port']),
                json={
                    'project': project,
                    'version': version,
                    'status': settings.SCHEDULES_VERSION_STATUS_DELETED,
                    'deleted_at': str(datetime.now()),
                })
            if not response.ok:
                logger.error(
                    "{} HTTP Error: Failed to update a deleted version - {} - {}"
                    .format(response.status_code, response.reason,
                            response.text))
        finally:
            return deleted

    def delproject(self, project):
        """ delproject
        """
        if not self._scrapyd:
            logger.error("No scrapyd object find. Unable to delete version.")
            return False
        deleted = False
        try:
            deleted = self._scrapyd.delete_project(project)
        except ScrapydResponseError as e:
            logger.error("{}: Response error - {}".format(
                class_fullname(e), str(e)))
        except Exception as e:
            logger.error("{}: Failed to delete project - {}".format(
                class_fullname(e), str(e)))
        else:
            logger.info("successfully deleted project '{}'".format(project))
            # update deleted project
            response = requests.put(
                'http://{}:{}/api/schedule/version/'.format(
                    config['PriceWatchWeb']['host'],
                    config['PriceWatchWeb']['port']),
                json={
                    'project': project,
                    'status': settings.SCHEDULES_VERSION_STATUS_DELETED,
                    'deleted_at': str(datetime.now()),
                })
            if not response.ok:
                logger.error(
                    "{} HTTP Error: Failed to update deleted project - {} - {}"
                    .format(response.status_code, response.reason,
                            response.text))
        finally:
            return deleted

    def close(self):
        self._scrapyd.client.close()
コード例 #6
0
class Scrapyd_Control(object):
    def __init__(self):
        scrapyd_url = input('请输入scrapyd地址: ')
        project = input('请输入项目名称: ')
        self.project = project
        self.scrapyd = ScrapydAPI(scrapyd_url)

    # 启动爬虫
    def schedule(self):
        spider = input('请输入爬虫名称: ')
        return {
            'project': self.project,
            'spider': spider,
            'jobid': self.scrapyd.schedule(self.project, spider)
        }
    
    start, run = schedule, schedule

    # 取消爬虫
    def cancel(self):
        jobid = input('请粘贴要取消的爬虫jobid: ')
        return self.scrapyd.cancel(self.project, jobid)

    # 查看项目
    def listprojects(self):
        return self.scrapyd.list_projects()

    # 查看爬虫
    def listspiders(self):
        return self.scrapyd.list_spiders(self.project)

    # 列出所有jobs
    def listjobs(self):
        return self.scrapyd.list_jobs(self.project)

    # 查看job状态
    def jobstatus(self):
        jobid = input('请粘贴要查看的jobid: ')
        return self.scrapyd.job_status(self.project, jobid)

    # 查看版本
    def listversions(self):
        return self.scrapyd.list_versions(self.project)

    # 删除版本
    def delversion(self):
        version_name = input('请粘贴要删除的版本: ')
        yes = input('是否确认删除该版本{},请输yes否则回车跳过删除\n'.format(version_name))
        if yes == 'yes':
            return self.scrapyd.delete_version(self.project, version_name)
        else:
            pass

    # 删除项目
    def delproject(self):
        yes = input('是否确认删除该项目{},请输yes否则回车跳过删除\n'.format(self.project))
        if yes == 'yes':
            return self.scrapyd.delete_project(self.project)
        else:
            pass
        
    # 列出所有命令
    def help(self):
        print("""
        启动爬虫 schedule|start|run
        取消爬虫 cancel
        查看项目 listprojects
        查看爬虫 listspiders
        列出所有jobs listjobs 
        查看job状态 jobstatus
        查看版本 listversions
        删除版本 delversion
        删除项目 deleproject
        列出所有命令 help
        """)