def test_get_target_version(self, mock_lsh): get_target('mytarget', auth_required=False) get_target_conf('mytargetconf', auth_required=False) get_version() mock_lsh.return_value.get_target.assert_called_once_with( 'mytarget', auth_required=False) mock_lsh.return_value.get_target_conf.assert_called_once_with( 'mytargetconf', auth_required=False) mock_lsh.return_value.get_version.assert_called_once_with()
def get_job_specs(job): """ Parse job identifier into valid job id and corresponding API key. With projects default=10 and external=20 defined in config: * 1/1 -> 10/1/1 * 2/2/2 -> 2/2/2 * external/2/2 -> 20/2/2 It also accepts job URLs from Scrapinghub. """ match = re.match(r'^((\w+)/)?(\d+/\d+)$', job) if not match: job_url_re = r'^https?://[^/]+/p/((\d+)/)job/(\d+/\d+).*' match = re.match(job_url_re, job) if not match: raise BadParameterException( "Job ID {} is invalid. Format should be spiderid/jobid (inside a " "project) or target/spiderid/jobid, where target can be either a " "project ID or an identifier defined in scrapinghub.yml." "".format(job), param_hint='job_id', ) # XXX: Lazy import due to circular dependency from shub.config import get_target_conf targetconf = get_target_conf(match.group(2) or 'default') return ("{}/{}".format(targetconf.project_id, match.group(3)), targetconf.apikey)
def get_job_specs(job): """ Parse job identifier into valid job id and corresponding API key. With projects default=10 and external=20 defined in config: * 1/1 -> 10/1/1 * 2/2/2 -> 2/2/2 * external/2/2 -> 20/2/2 It also accepts job URLs from Scrapy Cloud Dashboard. """ match = re.match(r"^((\w+)/)?(\d+/\d+)$", job) if not match: dash_job_url_re = r"^https?://[^/]+/p/((\d+)/)job/(\d+/\d+).*" match = re.match(dash_job_url_re, job) if not match: raise BadParameterException( "Job ID {} is invalid. Format should be spiderid/jobid (inside a " "project) or target/spiderid/jobid, where target can be either a " "project ID or an identifier defined in scrapinghub.yml." "".format(job), param_hint="job_id", ) # XXX: Lazy import due to circular dependency from shub.config import get_target_conf targetconf = get_target_conf(match.group(2) or "default") return ("{}/{}".format(targetconf.project_id, match.group(3)), targetconf.apikey)
def cli(target_or_key, keys, force): # target_or_key contains a target or just another job key if "/" in target_or_key: keys = (target_or_key,) + keys target = "default" else: target = target_or_key targetconf = get_target_conf(target) project_id = targetconf.project_id client = get_scrapinghub_client_from_config(targetconf) project = client.get_project(project_id) try: job_keys = [validate_job_key(project_id, key) for key in keys] except (BadParameterException, SubcommandException) as err: click.echo('Error during keys validation: %s' % str(err)) exit(1) if not force: jobs_str = ", ".join([str(job) for job in job_keys]) click.confirm( 'Do you want to cancel these %s jobs? \n\n%s \n\nconfirm?' % (len(job_keys), jobs_str), abort=True ) try: output = project.jobs.cancel( keys=[str(job) for job in job_keys] ) except (ValueError, ScrapinghubAPIError) as err: raise ShubException(str(err)) click.echo(output)
def main(target, requirements_file): targetconf = get_target_conf(target) requirements_full_path = os.path.abspath(requirements_file) eggs_tmp_dir = _mk_and_cd_eggs_tmpdir() _download_egg_files(eggs_tmp_dir, requirements_full_path) decompress_egg_files() build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey)
def main(target): targetconf = get_target_conf(target) url = urljoin(targetconf.endpoint, 'migrate-eggs.zip') params = {'project': targetconf.project_id} auth = (targetconf.apikey, '') response = requests.get(url, auth=auth, params=params, stream=True) with zipfile.ZipFile(BytesIO(response.content), 'r') as mfile: Migrator(mfile).start()
def cli(spider, argument, set): try: target, spider = spider.rsplit("/", 1) except ValueError: target = "default" targetconf = get_target_conf(target) job_key = schedule_spider(targetconf.project_id, targetconf.endpoint, targetconf.apikey, spider, argument, set) watch_url = urljoin(targetconf.endpoint, "../p/{}/job/{}/{}".format(*job_key.split("/"))) short_key = job_key.split("/", 1)[1] if target == "default" else job_key click.echo("Spider {} scheduled, job ID: {}".format(spider, job_key)) click.echo("Watch the log on the command line:\n shub log -f {}" "".format(short_key)) click.echo("or print items as they are being scraped:\n shub items -f " "{}".format(short_key)) click.echo("or watch it running in Scrapinghub's web interface:\n {}" "".format(watch_url))
def main(target, from_url=None, git_branch=None, from_pypi=None): targetconf = get_target_conf(target) if from_pypi: _fetch_from_pypi(from_pypi) decompress_egg_files() utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey) return if from_url: _checkout(from_url, git_branch) if not os.path.isfile("setup.py"): error = "No setup.py -- are you running from a valid Python project?" raise NotFoundException(error) utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint, targetconf.apikey)
def main(target, from_url=None, git_branch=None, from_pypi=None): targetconf = get_target_conf(target) if from_pypi: _fetch_from_pypi(from_pypi) decompress_egg_files() utils.build_and_deploy_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey) return if from_url: _checkout(from_url, git_branch) if not os.path.isfile('setup.py'): error = "No setup.py -- are you running from a valid Python project?" raise NotFoundException(error) utils.build_and_deploy_egg(targetconf.project_id, targetconf.endpoint, targetconf.apikey)
def cli(spider, argument, set): try: target, spider = spider.rsplit('/', 1) except ValueError: target = 'default' targetconf = get_target_conf(target) job_key = schedule_spider(targetconf.project_id, targetconf.endpoint, targetconf.apikey, spider, argument, set) watch_url = urljoin( targetconf.endpoint, '../p/{}/job/{}/{}'.format(*job_key.split('/')), ) short_key = job_key.split('/', 1)[1] if target == 'default' else job_key click.echo("Spider {} scheduled, job ID: {}".format(spider, job_key)) click.echo("Watch the log on the command line:\n shub log -f {}" "".format(short_key)) click.echo("or print items as they are being scraped:\n shub items -f " "{}".format(short_key)) click.echo("or watch it running in Scrapinghub's web interface:\n {}" "".format(watch_url))
def cli(target): targetconf = get_target_conf(target) destfile = 'eggs-%s.zip' % targetconf.project_id fetch_eggs(targetconf.project_id, targetconf.endpoint, targetconf.apikey, destfile)
def cli(source_project, new_project, copy_main): source = get_target_conf(source_project) target = get_target_conf(new_project) copy_eggs(source.project_id, source.endpoint, source.apikey, target.project_id, target.endpoint, target.apikey, copy_main)