def test_decode_uri_basic_usage(): assert decode_uri('{"spider": "hello"}') == {'spider': 'hello'} str1 = 'data:application/json;charset=utf8;base64,ImhlbGxvIHdvcmxkIg==' assert decode_uri(str1) == u'hello world' assert decode_uri('data:;base64,ImhlbGxvIHdvcmxkIg==') == 'hello world' str2 = 'data:custom-mime;charset=utf8;base64,ImhlbGxvIHdvcmxkIg==' assert decode_uri(str2) == b'"hello world"'
def main(): try: from sh_scrapy.env import get_args_and_env, decode_uri job = decode_uri(envvar='JOB_DATA') args, env = get_args_and_env(job) os.environ.update(env) from sh_scrapy.log import initialize_logging from sh_scrapy.settings import populate_settings loghdlr = initialize_logging() except: _fatalerror() raise # user code will be imported beyond this point -------------- try: settings = populate_settings(job['spider']) loghdlr.setLevel(settings['LOG_LEVEL']) except Exception: logging.exception('Settings initialization failed') raise try: _run(args, settings) except Exception: logging.exception('Script initialization failed') raise
def kumo_settings(): if os.environ.get('SHUB_SETTINGS'): from sh_scrapy.env import decode_uri return decode_uri(os.environ.get('SHUB_SETTINGS')).get( 'project_settings', {}) logging.info( "Couldn't find Dash project settings, probably not running in Dash") return {}
def _launch(): try: from scrapy.exceptions import ScrapyDeprecationWarning warnings.filterwarnings('ignore', category=ScrapyDeprecationWarning, module='^sh_scrapy') from sh_scrapy.env import get_args_and_env, decode_uri job = decode_uri(envvar='JOB_DATA') assert job, 'JOB_DATA must be set' args, env = get_args_and_env(job) os.environ.update(env) print args, env from sh_scrapy.log import initialize_logging from sh_scrapy.settings import populate_settings # NOQA loghdlr = initialize_logging() except: _fatalerror() raise _run_usercode(job['spider'], args, _get_apisettings, loghdlr)
def _launch(): try: from scrapy.exceptions import ScrapyDeprecationWarning warnings.filterwarnings('ignore', category=ScrapyDeprecationWarning, module='^sh_scrapy') from sh_scrapy.env import get_args_and_env, decode_uri job = decode_uri(envvar='SHUB_JOB_DATA') assert job, 'SHUB_JOB_DATA must be set' args, env = get_args_and_env(job) os.environ.update(env) from sh_scrapy.log import initialize_logging from sh_scrapy.settings import populate_settings # NOQA from sh_scrapy.env import setup_environment loghdlr = initialize_logging() setup_environment() except: _fatalerror() raise _run_usercode(job['spider'], args, _get_apisettings, loghdlr)
def test_decode_uri_from_file(): with tempfile.NamedTemporaryFile() as temp: temp.write('{"hello":"world"}'.encode('utf-8')) temp.flush() assert decode_uri(temp.name) == {'hello': 'world'} assert decode_uri('file://' + temp.name) == {'hello': 'world'}
def test_decode_uri_var_or_env_is_needed(): with pytest.raises(ValueError): decode_uri()
def test_decode_uri_from_env(): assert decode_uri(None, 'TEST_VAR') == {'spider': 'hello'}
def _get_apisettings(): from sh_scrapy.env import decode_uri return decode_uri(envvar='SHUB_SETTINGS') or {}
def _get_apisettings(): from sh_scrapy.env import decode_uri return decode_uri(envvar='JOB_SETTINGS') or {}