def test_update_settings_check_unicode_in_py2_key_value():
    # a dict entry is duplicated as unicode doesn't match native str value
    test = EntrypointSettings()
    test.setdict({'\xf1e\xf1e\xf1e': '\xf1e\xf1e'}, 10)
    assert test['\xf1e\xf1e\xf1e'] == '\xf1e\xf1e'
    native_key = to_native_str('\xf1e\xf1e\xf1e')
    assert test[native_key] == to_native_str('\xf1e\xf1e')
def _update_settings(o, d):
    # We need to convert settings to string since the S3 download handler
    # doesn't work if the AWS keys are passed as unicode. Other code may also
    # depend on settings being str. TODO: we should test this
    for k, v in d.items():
        d[to_native_str(k)] = to_native_str(v) if is_string(v) else v
    o.update(d)
Esempio n. 3
0
def test_update_settings_check_unicode_in_py2_key_value():
    # a dict entry is duplicated as unicode doesn't match native str value
    test = EntrypointSettings()
    test.setdict({'\xf1e\xf1e\xf1e': '\xf1e\xf1e'}, 10)
    assert test['\xf1e\xf1e\xf1e'] == '\xf1e\xf1e'
    native_key = to_native_str('\xf1e\xf1e\xf1e')
    assert test[native_key] == to_native_str('\xf1e\xf1e')
def _make_scrapy_args(arg, args_dict):
    if not args_dict:
        return []
    args = []
    for k, v in sorted(dict(args_dict).items()):
        args += [arg, "{}={}".format(
            to_native_str(k), to_native_str(v) if is_string(v) else v)]
    return args
def _job_args_and_env(msg):
    env = msg.get('job_env')
    if not isinstance(env, dict):
        env = {}
    cmd = msg.get('job_cmd')
    if not isinstance(cmd, list):
        cmd = [str(cmd)]
    return cmd, {to_native_str(k): to_native_str(v) if is_string(v) else v
                 for k, v in sorted(dict(env).items())}
Esempio n. 6
0
def _make_scrapy_args(arg, args_dict):
    if not args_dict:
        return []
    args = []
    for k, v in sorted(dict(args_dict).items()):
        args += [
            arg, "{}={}".format(to_native_str(k),
                                to_native_str(v) if is_string(v) else v)
        ]
    return args
Esempio n. 7
0
def _job_args_and_env(msg):
    env = msg.get('job_env')
    if not isinstance(env, dict):
        env = {}
    cmd = msg.get('job_cmd')
    if not isinstance(cmd, list):
        cmd = [str(cmd)]
    return cmd, {
        to_native_str(k): to_native_str(v) if is_string(v) else v
        for k, v in sorted(dict(env).items())
    }
    def write(self, data):
        data = to_native_str(data, self.encoding)

        d = (self.buf + data).split('\n')
        self.buf = d[-1]
        messages = d[0:-1]
        for message in messages:
            self._logprefixed(message)
    def write(self, data):
        data = to_native_str(data, self.encoding)

        d = (self.buf + data).split('\n')
        self.buf = d[-1]
        messages = d[0:-1]
        for message in messages:
            self._logprefixed(message)
def test_get_args_and_env_run_script():
    msg = {'key': '1/2/3', 'job_cmd': ['custom.py', 'arg1'],
           'auth': 'authstring'}
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:authstring'), 'hex')
    assert len(result) == 2
    assert result[0] == ['custom.py', 'arg1']
    assert result[1] == {
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'custom.py',
        'SHUB_JOB_TAGS': ''}
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
    def _get_log_item(self, ev):
        """Get HubStorage log item for the given Twisted event, or None if no
        document should be inserted
        """
        if ev['system'] == 'scrapy':
            level = ev['logLevel']
        else:
            if ev['isError']:
                level = logging.ERROR
            else:
                level = logging.INFO

        # It's important to access level trough handler instance,
        # min log level can change at any moment.
        if level < self._hs_loghdlr.level:
            return

        msg = ev.get('message')
        if msg:
            msg = to_native_str(msg[0])

        failure = ev.get('failure', None)
        if failure:
            msg = failure.getTraceback()

        why = ev.get('why', None)
        if why:
            msg = "%s\n%s" % (why, msg)

        fmt = ev.get('format')
        if fmt:
            try:
                msg = fmt % ev
            except:
                msg = "UNABLE TO FORMAT LOG MESSAGE: fmt=%r ev=%r" % (fmt, ev)
                level = logging.ERROR
        # to replicate typical scrapy log appeareance
        msg = msg.replace('\n', '\n\t')
        return {'message': msg, 'level': level}
Esempio n. 12
0
def test_get_args_and_env_run_script():
    msg = {
        'key': '1/2/3',
        'job_cmd': ['custom.py', 'arg1'],
        'auth': 'authstring'
    }
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:authstring'), 'hex_codec')
    assert len(result) == 2
    assert result[0] == ['custom.py', 'arg1']
    assert result[1] == {
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'custom.py',
        'SHUB_JOB_TAGS': ''
    }
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
    def _get_log_item(self, ev):
        """Get HubStorage log item for the given Twisted event, or None if no
        document should be inserted
        """
        if ev['system'] == 'scrapy':
            level = ev['logLevel']
        else:
            if ev['isError']:
                level = logging.ERROR
            else:
                level = logging.INFO

        # It's important to access level trough handler instance,
        # min log level can change at any moment.
        if level < self._hs_loghdlr.level:
            return

        msg = ev.get('message')
        if msg:
            msg = to_native_str(msg[0])

        failure = ev.get('failure', None)
        if failure:
            msg = failure.getTraceback()

        why = ev.get('why', None)
        if why:
            msg = "%s\n%s" % (why, msg)

        fmt = ev.get('format')
        if fmt:
            try:
                msg = fmt % ev
            except:
                msg = "UNABLE TO FORMAT LOG MESSAGE: fmt=%r ev=%r" % (fmt, ev)
                level = logging.ERROR
        # to replicate typical scrapy log appeareance
        msg = msg.replace('\n', '\n\t')
        return {'message': msg, 'level': level}
Esempio n. 14
0
def test_get_args_and_env_run_spider():
    msg = {
        'key': '1/2/3',
        'spider': 'test',
        'spider_type': 'auto',
        'auth': 'auths',
        'spider_args': {
            'arg1': 'val1',
            'arg2': 'val2'
        },
        'settings': {
            'SETTING1': 'VAL1',
            'SETTING2': 'VAL2'
        }
    }
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:auths'), 'hex_codec')
    assert len(result) == 2
    assert result[0] == [
        'scrapy', 'crawl', 'test', '-a', 'arg1=val1', '-a', 'arg2=val2', '-s',
        'SETTING1=VAL1', '-s', 'SETTING2=VAL2'
    ]
    assert result[1] == {
        'SCRAPY_JOB': '1/2/3',
        'SCRAPY_PROJECT_ID': '1',
        'SCRAPY_SPIDER': 'test',
        'SHUB_JOBAUTH': to_native_str(expected_auth),
        'SHUB_JOBKEY': '1/2/3',
        'SHUB_JOBNAME': 'test',
        'SHUB_JOB_TAGS': '',
        'SHUB_SPIDER_TYPE': 'auto'
    }
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
def test_get_args_and_env_run_spider():
    msg = {'key': '1/2/3', 'spider': 'test', 'spider_type': 'auto',
           'auth': 'auths', 'spider_args': {'arg1': 'val1', 'arg2': 'val2'},
           'settings': {'SETTING1': 'VAL1', 'SETTING2': 'VAL2'}}
    result = get_args_and_env(msg)
    expected_auth = codecs.encode(to_bytes('1/2/3:auths'), 'hex')
    assert len(result) == 2
    assert result[0] == ['scrapy', 'crawl', 'test', '-a', 'arg1=val1',
                         '-a', 'arg2=val2', '-s', 'SETTING1=VAL1', '-s',
                         'SETTING2=VAL2']
    assert result[1] == {'SCRAPY_JOB': '1/2/3',
                         'SCRAPY_PROJECT_ID': '1',
                         'SCRAPY_SPIDER': 'test',
                         'SHUB_JOBAUTH': to_native_str(expected_auth),
                         'SHUB_JOBKEY': '1/2/3',
                         'SHUB_JOBNAME': 'test',
                         'SHUB_JOB_TAGS': '',
                         'SHUB_SPIDER_TYPE': 'auto'}
    add_fields = {'tags': ['tagA', 'tagB'], 'api_url': 'some-api-url'}
    msg.update(add_fields)
    result1 = get_args_and_env(msg)
    assert len(result1) == 2
    assert result1[1]['SHUB_APIURL'] == 'some-api-url'
    assert result1[1]['SHUB_JOB_TAGS'] == 'tagA,tagB'
 def auth(self):
     return to_native_str(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec'))
def test_jobauth():
    msg = {'key': '1/2/3', 'auth': 'authstring'}
    expected = codecs.encode(to_bytes('1/2/3:authstring'), 'hex')
    assert _jobauth(msg) == to_native_str(expected)
 def set(self, name, value, priority='project'):
     super(EntrypointSettings,
           self).set(to_native_str(name),
                     to_native_str(value) if is_string(value) else value,
                     priority=priority)
def _jobauth(msg):
    auth_data = to_bytes('{0[key]}:{0[auth]}'.format(msg))
    return to_native_str(codecs.encode(auth_data, 'hex_codec'))
Esempio n. 20
0
def test_jobauth():
    msg = {'key': '1/2/3', 'auth': 'authstring'}
    expected = codecs.encode(to_bytes('1/2/3:authstring'), 'hex_codec')
    assert _jobauth(msg) == to_native_str(expected)
 def writelines(self, lines):
     for line in lines:
         line = to_native_str(line, self.encoding)
         self._logprefixed(line)
Esempio n. 22
0
# -*- coding: utf-8 -*-
import codecs
import os
import shutil
import tempfile

import pytest

TEMP_DIR = tempfile.mkdtemp()
SHUB_FIFO_PATH = os.path.join(TEMP_DIR, 'scrapinghub')
os.environ['SHUB_FIFO_PATH'] = SHUB_FIFO_PATH

from sh_scrapy.compat import to_native_str, to_bytes

TEST_AUTH = to_native_str(codecs.encode(to_bytes('1/2/3:authstr'),
                                        'hex_codec'))


@pytest.fixture(scope='session', autouse=True)
def clean_shub_fifo_path():
    global TEMP_DIR
    try:
        yield
    finally:
        shutil.rmtree(TEMP_DIR)


@pytest.fixture(autouse=True)
def set_jobkeyenvironment(monkeypatch):
    monkeypatch.setenv('SHUB_JOBKEY', '1/2/3')
    monkeypatch.setenv('SCRAPY_JOB', '1/2/3')
 def set(self, name, value, priority='project'):
     super(EntrypointSettings, self).set(
         to_native_str(name),
         to_native_str(value) if is_string(value) else value,
         priority=priority)
Esempio n. 24
0
def _jobauth(msg):
    auth_data = to_bytes('{0[key]}:{0[auth]}'.format(msg))
    return to_native_str(codecs.encode(auth_data, 'hex_codec'))
Esempio n. 25
0
 def auth(self):
     return to_native_str(decode(os.environ['SHUB_JOBAUTH'], 'hex_codec'))
import os
import sys
import mock
import pytest
import codecs
from sh_scrapy.hsref import _HubstorageRef
from sh_scrapy.compat import to_native_str, to_bytes

TEST_AUTH = to_native_str(codecs.encode(to_bytes('1/2/3:authstr'), 'hex'))


def test_init_disabled():
    hsref = _HubstorageRef()
    assert not hsref._client
    assert not hsref._project
    assert not hsref._job
    assert not hsref.enabled
    assert not hasattr(hsref, 'jobkey')
    assert not hsref._projectid
    assert not hsref._spiderid
    assert not hsref._jobcounter


@pytest.fixture
@mock.patch.dict(os.environ, {'SHUB_JOBKEY': '1/2/3'})
def hsref():
    return _HubstorageRef()


@pytest.fixture
def hsc_class(monkeypatch):
 def writelines(self, lines):
     for line in lines:
         line = to_native_str(line, self.encoding)
         self._logprefixed(line)