Exemple #1
0
class GitHelper(object):

    repo = 'local/'

    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)

    def git(self, command, repo=None):
        repo_path = self.dir.getpath(repo or self.repo)
        try:
            return check_output(['git'] + command.split(),
                                cwd=repo_path,
                                stderr=STDOUT)
        except CalledProcessError as e:
            self.fail(e.output)

    def git_rev_parse(self, label, repo=None):
        return self.git('rev-parse --verify -q --short ' + label, repo).strip()

    def check_tags(self, expected, repo=None):
        actual = {}
        for tag in self.git('tag', repo).split():
            actual[tag] = self.git_rev_parse(tag, repo)
        compare(expected, actual=actual)

    def make_repo_with_content(self, repo):
        if not os.path.exists(self.dir.getpath(repo)):
            self.dir.makedir(repo)
        self.git('init', repo)
        self.dir.write(repo + 'a', 'some content')
        self.dir.write(repo + 'b', 'other content')
        self.dir.write(repo + 'c', 'more content')
        self.git('add .', repo)
        self.git('commit -m initial', repo)
Exemple #2
0
class TestPrepareTarget(TestCase):
    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)
        replace = Replacer()
        replace('workfront.generate.TARGET_ROOT', self.dir.path)
        self.addCleanup(replace.restore)
        self.session = Session('test')

    def test_from_scratch(self):
        path = prepare_target(self.session)

        compare(path, expected=self.dir.getpath('unsupported.py'))
        self.dir.compare(expected=[])

    def test_everything(self):
        self.dir.write('unsupported.py', b'yy')
        path = prepare_target(self.session)

        compare(path, expected=self.dir.getpath('unsupported.py'))
        self.dir.compare(expected=['unsupported.py'])
        compare(self.dir.read('unsupported.py'), b"yy")

    def test_dots_in_version(self):
        path = prepare_target(Session('test', api_version='v4.0'))

        compare(path, expected=self.dir.getpath('v40.py'))
        self.dir.compare(expected=[])
class TestPrepareTarget(TestCase):

    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)
        replace = Replacer()
        replace('workfront.generate.TARGET_ROOT', self.dir.path)
        self.addCleanup(replace.restore)
        self.session = Session('test')

    def test_from_scratch(self):
        path = prepare_target(self.session)

        compare(path, expected=self.dir.getpath('unsupported.py'))
        self.dir.compare(expected=[])

    def test_everything(self):
        self.dir.write('unsupported.py', b'yy')
        path = prepare_target(self.session)

        compare(path, expected=self.dir.getpath('unsupported.py'))
        self.dir.compare(expected=['unsupported.py'])
        compare(self.dir.read('unsupported.py'), b"yy")

    def test_dots_in_version(self):
        path = prepare_target(Session('test', api_version='v4.0'))

        compare(path, expected=self.dir.getpath('v40.py'))
        self.dir.compare(expected=[])
Exemple #4
0
class GitHelper(object):

    repo = 'local/'

    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)

    def git(self, command, repo=None):
        repo_path = self.dir.getpath(repo or self.repo)
        try:
            return check_output(['git'] + command.split(), cwd=repo_path, stderr=STDOUT)
        except CalledProcessError as e:
            self.fail(e.output)

    def git_rev_parse(self, label, repo=None):
        return self.git('rev-parse --verify -q --short '+label, repo).strip()

    def check_tags(self, expected, repo=None):
        actual = {}
        for tag in self.git('tag', repo).split():
            actual[tag] = self.git_rev_parse(tag, repo)
        compare(expected, actual=actual)

    def make_repo_with_content(self, repo):
        if not os.path.exists(self.dir.getpath(repo)):
            self.dir.makedir(repo)
        self.git('init', repo)
        self.dir.write(repo + 'a', 'some content')
        self.dir.write(repo + 'b', 'other content')
        self.dir.write(repo + 'c', 'more content')
        self.git('add .', repo)
        self.git('commit -m initial', repo)
Exemple #5
0
class HomeDirTest(unittest.TestCase):
    def setUp(self):
        self.temp_dir = TempDirectory(create=True)
        self.home = PathHomeDir(self.temp_dir.path)

    def tearDown(self):
        self.temp_dir.cleanup()

    def test_read(self):
        self.temp_dir.write("filename", "contents")
        self.assertEquals(self.home.read("filename"), "contents")

    def test_write(self):
        self.temp_dir.write("existing_file", "existing_contents")
        self.home.write("new_file", "contents")
        self.home.write("existing_file", "new_contents")
        self.assertEquals(self.temp_dir.read("existing_file"), "new_contents")
        self.assertEquals(self.temp_dir.read("new_file"), "contents")

    def test_config_file(self):
        with collect_outputs() as outputs:
            self.home.write_config_file("new config")
            self.temp_dir.check(".cosmosrc")
            self.assertEquals(self.home.read_config_file(), "new config")
            self.assertIn("Settings saved", outputs.stdout.getvalue())
            file_mode = os.stat(self.temp_dir.getpath(".cosmosrc")).st_mode
            self.assertEquals(file_mode,
                              stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR)

    def test_override_config_file(self):
        with collect_outputs():
            other_config = self.temp_dir.write("path/other", "config")
            self.assertEquals(
                self.home.read_config_file(filename_override=other_config),
                "config")

    def test_warn_on_unprotected_config_file(self):
        with collect_outputs() as outputs:
            self.home.write_config_file("new config")
            config_path = self.temp_dir.getpath(".cosmosrc")
            os.chmod(config_path, 0777)
            self.home.read_config_file()
            assertFunc = (self.assertNotIn
                          if os.name == 'nt' else self.assertIn)
            assertFunc("WARNING", outputs.stderr.getvalue())

    def test_last_cluster(self):
        self.home.write_last_cluster("0000000")
        self.temp_dir.check(".cosmoslast")
        self.assertEquals(self.home.read_last_cluster(), "0000000")
Exemple #6
0
def test_allergy_combine_transform(dir: TempDirectory, mocker: MockerFixture) -> None:
    """ Tests the allergy combine transform """

    with open("tests/resources/allergys.csv") as f_csv, open(dir.getpath("allergys.json"), "w") as f_json:        
        allergyToJson = AllergyToJson(combine=True)
        allergyToJson.csv_to_json(f_csv, f_json)

    with open(dir.getpath("allergys.json")) as f_json:
        lines = f_json.readlines()
        assert len(lines) == 1

        _json = json.loads(lines[0])        
        assert _json["allergys"][0]["onset"] == "20180724"
        assert _json["allergys"][1]["onset"] == "20110101"
Exemple #7
0
def test_problem_combine_transform(dir: TempDirectory, mocker: MockerFixture) -> None:
    """ Tests the problem combine transform """

    with open("tests/resources/problems.csv") as f_csv, open(dir.getpath("problems.json"), "w") as f_json:        
        problemToJson = ProblemToJson(combine=True)
        problemToJson.csv_to_json(f_csv, f_json)

    with open(dir.getpath("problems.json")) as f_json:
        lines = f_json.readlines()
        assert len(lines) == 1

        _json = json.loads(lines[0])
        assert _json["problems"][0]["annotated_display"] == "This is the annotated display."
        assert _json["problems"][1]["annotated_display"] == "This is another annotated display."
class HomeDirTest(unittest.TestCase):

    def setUp(self):
        self.temp_dir = TempDirectory(create=True)
        self.home = PathHomeDir(self.temp_dir.path)

    def tearDown(self):
        self.temp_dir.cleanup()

    def test_read(self):
        self.temp_dir.write("filename", "contents")
        self.assertEquals(self.home.read("filename"), "contents")

    def test_write(self):
        self.temp_dir.write("existing_file", "existing_contents")
        self.home.write("new_file", "contents")
        self.home.write("existing_file", "new_contents")
        self.assertEquals(self.temp_dir.read("existing_file"),
                          "new_contents")
        self.assertEquals(self.temp_dir.read("new_file"), "contents")

    def test_config_file(self):
        with collect_outputs() as outputs:
            self.home.write_config_file("new config")
            self.temp_dir.check(".cosmosrc")
            self.assertEquals(self.home.read_config_file(), "new config")
            self.assertIn("Settings saved", outputs.stdout.getvalue())
            file_mode = os.stat(self.temp_dir.getpath(".cosmosrc")).st_mode
            self.assertEquals(file_mode, stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR)

    def test_override_config_file(self):
        with collect_outputs():
            other_config = self.temp_dir.write("path/other", "config")
            self.assertEquals(
                self.home.read_config_file(filename_override=other_config),
                "config")

    def test_warn_on_unprotected_config_file(self):
        with collect_outputs() as outputs:
            self.home.write_config_file("new config")
            config_path = self.temp_dir.getpath(".cosmosrc")
            os.chmod(config_path, 0777)
            self.home.read_config_file()
            assertFunc = (self.assertNotIn if os.name=='nt' else self.assertIn)
            assertFunc("WARNING", outputs.stderr.getvalue())

    def test_last_cluster(self):
        self.home.write_last_cluster("0000000")
        self.temp_dir.check(".cosmoslast")
        self.assertEquals(self.home.read_last_cluster(), "0000000")
class TestPathSource(TestCase):

    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)

    def test_abc(self):
        self.assertTrue(issubclass(Plugin, Source))

    def test_schema_ok(self):
        p1 = self.dir.write('foo', b'f')
        p2 = self.dir.write('bar', b'b')
        compare(
            dict(type='paths', values=[p1, p2], repo='config'),
            Plugin.schema(
                dict(type='paths', values=[p1, p2], repo='config')
            ))

    def test_schema_wrong_type(self):
        text = "not a valid value for dictionary value @ data['type']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='bar', values=['/']))

    def test_schema_extra_keys(self):
        with ShouldFailSchemaWith("extra keys not allowed @ data['foo']"):
            Plugin.schema(dict(type='paths', foo='bar'))

    def test_name_supplied(self):
        text = "not a valid value for dictionary value @ data['name']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='paths', name='foo'))

    def test_no_paths(self):
        text = "length of value must be at least 1 for dictionary value " \
               "@ data['values']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='paths', values=[]))

    def test_path_not_string(self):
        text = "invalid list value @ data['values'][0]"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='paths', values=[1]))

    def test_path_not_starting_with_slash(self):
        text = "invalid list value @ data['values'][0]"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='paths', values=['foo']))

    def test_path_not_there(self):
        text = "invalid list value @ data['values'][0]"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='paths', values=[self.dir.getpath('bad')]))

    def test_interface(self):
        plugin = Plugin('source', name=None, repo='config',
                        values=['/foo/bar'])
        compare(plugin.type, 'source')
        compare(plugin.name, None)
        compare(plugin.repo, 'config')
        compare(plugin.source_paths, ['/foo/bar'])
Exemple #10
0
def setUpModule():
    """ """
    import os
    d = TempDirectory()
    if os.getenv('SQLALCHEMY_URL'):
        settings['sqlalchemy.url'] = os.getenv('SQLALCHEMY_URL')
    else:
        sqlalchemy_url = "sqlite:///%(here)s/sns.sqlite" % dict(here=d.path)
        settings['sqlalchemy.url'] = sqlalchemy_url
        from alembic.config import Config
        from alembic import command
        alembic_cfg = Config("alembic.ini")
        alembic_cfg.set_main_option('sqlalchemy.url', sqlalchemy_url)
        command.upgrade(alembic_cfg, "head")

    if os.getenv('USE_REDIS'):
        settings['cache.registration.backend'] = 'dogpile.cache.redis'
        settings['pyramid.includes'].append('pyramid_redis_sessions')
    else:
        from sns.testing import FakeLock
        settings['cache.registration.backend'] = "dogpile.cache.dbm"
        settings['cache.registration.arguments.filename'] = d.getpath(
            'registration.dbm')
        settings['cache.registration.arguments.lock_factory'] = FakeLock
        settings['pyramid.includes'].append('sns.signed_cookie_session')
Exemple #11
0
class MultiFileMultiRegexTestCase(unittest.TestCase):
    def setUp(self):
        self.d1 = TempDirectory()
        self.d1_path = self.d1.getpath('')
        self.d1.write('foo.txt', 'some nonsense 1')
        self.d1.write('bar.txt', 'some nonsense 1')
        self.d1.write('baz.txt', 'some nonsense 2')
        self.all_files = [self.d1_path + i for i in os.listdir(self.d1_path)]

    def test_2_files_match(self):
        mfmr = MultiFileMultiRegex(self.all_files, ['some nonsense 1'])

        match_groups = mfmr.find_all()
        self.assertItemsEqual(
            match_groups.keys(),
            [self.d1_path + i for i in ['foo.txt', 'bar.txt']])
        self.assertItemsEqual(match_groups.values(), [{
            'some nonsense 1': ['some nonsense 1']
        }, {
            'some nonsense 1': ['some nonsense 1']
        }])

    def test_all_files_match(self):
        mfmr = MultiFileMultiRegex(self.all_files, ['some nonsense \d'])

        match_groups = mfmr.find_all()
        self.assertItemsEqual(match_groups.keys(), self.all_files)
        self.assertItemsEqual(match_groups.values(), [{
            'some nonsense \d': ['some nonsense 1']
        }, {
            'some nonsense \d': ['some nonsense 1']
        }, {
            'some nonsense \d': ['some nonsense 2']
        }])
class Test_SnapshotArchive_Repository(TestCase):
    def setUp(self):
        store = MemoryCommitStorage()
        self.repo = BBRepository(store)
        self.tempdir = TempDirectory()
        self.setup_archive_a_snapshot()

    def setup_archive_a_snapshot(self):
        archive_name = 'somearchive.tgz'
        self.archive_contents = '123'
        self.archive_path = self.tempdir.write(archive_name,
            self.archive_contents)
        self.tag = generate_tag()
        self.first_WAL = '01234'
        self.last_WAL = '45678'
        commit_snapshot_to_repository(self.repo, self.archive_path, self.tag,
            self.first_WAL, self.last_WAL)

    def tearDown(self):
        self.tempdir.cleanup()

    def test_can_retrieve_snapshot_contents_with_tag(self):
        commit = [i for i in self.repo][-1]
        restore_path = self.tempdir.getpath('restorearchive.tgz')
        commit.get_contents_to_filename(restore_path)
        self.assertEqual(self.archive_contents,
            open(restore_path, 'rb').read())

    def test_get_first_WAL_file_for_archived_snapshot_with_tag(self):
        self.assertEqual(self.first_WAL, get_first_WAL(self.repo, self.tag))

    def test_get_last_WAL_file_for_archived_snapshot_with_tag(self):
        self.assertEqual(self.last_WAL, get_last_WAL(self.repo, self.tag))
Exemple #13
0
 def test_with_user(self, repo: Repo, tmpdir: TempDirectory):
     repo.commit_content('a')
     git = Git.clone(repo.path, tmpdir.getpath('clone'),
                     User(name='Foo Bar', email='*****@*****.**'))
     config = (git.path / '.git' / 'config').read_text()
     assert 'name = Foo Bar' in config
     assert 'email = [email protected]' in config
Exemple #14
0
class BaseTestCase(unittest.TestCase):
    """Checkout unittest."""
    def setUp(self):
        self.d1 = TempDirectory()

        self.first_file_name = b'foo.py'
        self.parse_file_1_name = b'bar.py'

        self.d1.write(self.first_file_name, self.parse_file_1_name)
        self.first_file_path = self.d1.getpath(self.first_file_name)

        self.d1.write(self.parse_file_1_name, b'Some nonsense')
        self.parse_file_1_path = self.d1.getpath(self.parse_file_1_name)

    def tearDown(self):
        TempDirectory.cleanup_all()
Exemple #15
0
 def test_repo(self, repo: Repo, tmpdir: TempDirectory):
     repo.commit_content('a')
     source = Git(repo.path)
     git = Git.clone(source, tmpdir.getpath('clone'))
     commit, = git('log', '--format=%h').split()
     compare(git('show', '--pretty=format:%s', '--stat', commit),
             expected=('a commit\n'
                       ' a | 1 +\n'
                       ' 1 file changed, 1 insertion(+)\n'))
Exemple #16
0
def test_problem_transform(dir: TempDirectory, mocker: MockerFixture) -> None:
    """ Tests the problem transform """

    with open("tests/resources/problem.csv") as f_csv, open(dir.getpath("problem.json"), "w") as f_json:        
        problemToJson = ProblemToJson()
        spy = mocker.spy(problemToJson, 'transform')

        problemToJson.csv_to_json(f_csv, f_json)
        assert spy.spy_return
        assert spy.spy_return["patient"]["birth_date"] == "19500701143000"
        assert len(spy.spy_return["problems"]) == 1
        assert spy.spy_return["problems"][0]["annotated_display"] == "This is the annotated display."

    with open(dir.getpath("problem.json")) as f_json:
        lines = f_json.readlines()
        assert len(lines) == 1

        _json = json.loads(lines[0])
        assert _json["patient"]["birth_date"] == "19500701143000"
        assert _json["problems"][0]["annotated_display"] == "This is the annotated display."
Exemple #17
0
def test_allergy_transform(dir: TempDirectory, mocker: MockerFixture) -> None:
    """ Tests the allergy transform """

    with open("tests/resources/allergy.csv") as f_csv, open(dir.getpath("allergy.json"), "w") as f_json:        
        allergyToJson = AllergyToJson()
        spy = mocker.spy(allergyToJson, 'transform')

        allergyToJson.csv_to_json(f_csv, f_json)
        assert spy.spy_return
        assert spy.spy_return["patient"]["birth_date"] == "19500701143000"
        assert len(spy.spy_return["allergys"]) == 1
        assert spy.spy_return["allergys"][0]["onset"] == "20180724"

    with open(dir.getpath("allergy.json")) as f_json:
        lines = f_json.readlines()
        assert len(lines) == 1
        
        _json = json.loads(lines[0])
        assert _json["patient"]["birth_date"] == "19500701143000"
        assert _json["allergys"][0]["onset"] == "20180724"
Exemple #18
0
class RealParserWalkerTestCase(BaseTestCase):
    """Test when first_file and parse files are not in the same directory."""
    def setUp(self):
        super(RealParserWalkerTestCase, self).setUp()
        self.d2 = TempDirectory()
        self.dirname = self.d2.getpath('')

        self.parse_file_2_name = b'baz.py'

        self.d2.write(self.parse_file_1_name, self.parse_file_2_name)
        self.d2.write(self.parse_file_2_name, b'Some nonsense')
        self.parse_file_1_path = self.d2.getpath(self.parse_file_1_name)
        self.parse_file_2_path = self.d2.getpath(self.parse_file_2_name)

        self.pw = ParserWalker(self.first_file_path, self.dirname)

    def test_dif(self):
        """Test second condition. Buy one get one free fruit tea."""
        expected = self.pw.parse_files()
        self.assertItemsEqual(list(expected), [
            self.first_file_path, self.parse_file_1_path,
            self.parse_file_2_path
        ])
Exemple #19
0
 def test_minimal(self, repo: Repo, tmpdir: TempDirectory):
     hash = repo.commit_content('a')
     git = Git.clone(repo.path, tmpdir.getpath('clone'))
     commit, = git('log', '--format=%h').split()
     compare(hash, expected=commit)
     compare(git.git('show', '--pretty=format:%s', '--stat', commit),
             expected=('a commit\n'
                       ' a | 1 +\n'
                       ' 1 file changed, 1 insertion(+)\n'))
     compare(git('remote', '-v').split(),
             expected=[
                 'origin',
                 str(repo.path), '(fetch)', 'origin',
                 str(repo.path), '(push)'
             ])
Exemple #20
0
class TestHelpers(object):
    def setUp(self):
        self.dir = TempDirectory()
        self.missing = self.dir.getpath('missing')
        self.path = search_path()

    def tearDown(self):
        self.dir.cleanup()

    def run_main(self, args=(), output='', return_code=0):
        # so we don't leave log handlers lying around...
        # ...level is so that we check the log level is correctly set
        # in setup_logging
        with LogCapture(level=100):
            with Replacer() as r:
                # set argv
                argv = ['x'] + args
                r.replace('sys.argv', argv)
                r.replace('picky.main.datetime',
                          test_datetime(2001, 1, 2, 3, 4, 5))
                # set PATH env variable
                r.replace('os.environ.PATH', self.path)
                # change to tempdir
                cwd = os.getcwd()
                try:
                    os.chdir(self.dir.path)
                    # get the exit code
                    with ShouldRaise(SystemExit) as s:
                        # capture output
                        with OutputCapture() as actual:
                            main()
                finally:
                    os.chdir(cwd)

        # compare output, with timestamp subbed out
        captured = re.sub('[\d\- :]{19}', '(ts)', actual.captured)
        compare(output, captured)

        # compare return code
        compare(return_code, s.raised.code)
class DatabaseHandlerTests(TestCase):

    def setUp(self):
        self.dir = TempDirectory()
        self.db_path = self.dir.getpath('test.db')
        self.conn = sqlite3.connect(self.db_path)
        self.conn.execute('create table notes '
                          '(filename varchar, text varchar)')
        self.conn.commit()
        self.log = LogCapture()
        
    def tearDown(self):
        self.log.uninstall()
        self.dir.cleanup()
        
    def test_normal(self):
        with DatabaseHandler(self.db_path) as handler:
            handler.conn.execute('insert into notes values (?, ?)',
                                 ('test.txt', 'a note'))
            handler.conn.commit()
        # check the row was inserted and committed
        curs = self.conn.cursor()
        curs.execute('select * from notes')
        self.assertEqual(curs.fetchall(), [('test.txt', 'a note')])
        # check there was no logging
        self.log.check()

    def test_exception(self):
        with ShouldRaise(Exception('foo')):
            with DatabaseHandler(self.db_path) as handler:
                handler.conn.execute('insert into notes values (?, ?)',
                                     ('test.txt', 'a note'))
                raise Exception('foo')
        # check the row not inserted and the transaction was rolled back
        curs = handler.conn.cursor()
        curs.execute('select * from notes')
        self.assertEqual(curs.fetchall(), [])
        # check the error was logged
        self.log.check(('root', 'ERROR', 'Something went wrong'))
Exemple #22
0
def setUpModule():
    """ """
    import os
    d = TempDirectory()
    if os.getenv('SQLALCHEMY_URL'):
        settings['sqlalchemy.url'] = os.getenv('SQLALCHEMY_URL')
    else:
        sqlalchemy_url = "sqlite:///%(here)s/sns.sqlite" % dict(here=d.path)
        settings['sqlalchemy.url'] = sqlalchemy_url
        from alembic.config import Config
        from alembic import command
        alembic_cfg = Config("alembic.ini")
        alembic_cfg.set_main_option('sqlalchemy.url', sqlalchemy_url)
        command.upgrade(alembic_cfg, "head")

    if os.getenv('USE_REDIS'):
        settings['cache.registration.backend'] = 'dogpile.cache.redis'
        settings['pyramid.includes'].append('pyramid_redis_sessions')
    else:
        from sns.testing import FakeLock
        settings['cache.registration.backend'] = "dogpile.cache.dbm"
        settings['cache.registration.arguments.filename'] = d.getpath('registration.dbm')
        settings['cache.registration.arguments.lock_factory'] = FakeLock
        settings['pyramid.includes'].append('sns.signed_cookie_session')
class DatabaseHandlerTests(TestCase):
    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)
        self.db_path = self.dir.getpath('test.db')
        self.conn = sqlite3.connect(self.db_path)
        self.conn.execute('create table notes '
                          '(filename varchar, text varchar)')
        self.conn.commit()
        self.log = LogCapture()
        self.addCleanup(self.log.uninstall)

    def test_normal(self):
        with DatabaseHandler(self.db_path) as handler:
            handler.conn.execute('insert into notes values (?, ?)',
                                 ('test.txt', 'a note'))
            handler.conn.commit()
        # check the row was inserted and committed
        curs = self.conn.cursor()
        curs.execute('select * from notes')
        self.assertEqual(curs.fetchall(), [('test.txt', 'a note')])
        # check there was no logging
        self.log.check()

    def test_exception(self):
        with ShouldRaise(Exception('foo')):
            with DatabaseHandler(self.db_path) as handler:
                handler.conn.execute('insert into notes values (?, ?)',
                                     ('test.txt', 'a note'))
                raise Exception('foo')
        # check the row not inserted and the transaction was rolled back
        curs = handler.conn.cursor()
        curs.execute('select * from notes')
        self.assertEqual(curs.fetchall(), [])
        # check the error was logged
        self.log.check(('root', 'ERROR', 'Something went wrong'))
class BcolzDailyBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        all_trading_days = TradingEnvironment().trading_days
        cls.trading_days = all_trading_days[all_trading_days.get_loc(
            TEST_CALENDAR_START):all_trading_days.get_loc(TEST_CALENDAR_STOP) +
                                            1]

    def setUp(self):

        self.asset_info = EQUITY_INFO
        self.writer = SyntheticDailyBarWriter(
            self.asset_info,
            self.trading_days,
        )

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('daily_equity_pricing.bcolz')

    def tearDown(self):
        self.dir_.cleanup()

    @property
    def assets(self):
        return self.asset_info.index

    def trading_days_between(self, start, end):
        return self.trading_days[self.trading_days.slice_indexer(start, end)]

    def asset_start(self, asset_id):
        return self.writer.asset_start(asset_id)

    def asset_end(self, asset_id):
        return self.writer.asset_end(asset_id)

    def dates_for_asset(self, asset_id):
        start, end = self.asset_start(asset_id), self.asset_end(asset_id)
        return self.trading_days_between(start, end)

    def test_write_ohlcv_content(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        for column in SyntheticDailyBarWriter.OHLCV:
            idx = 0
            data = result[column][:]
            multiplier = 1 if column == 'volume' else 1000
            for asset_id in self.assets:
                for date in self.dates_for_asset(asset_id):
                    self.assertEqual(
                        SyntheticDailyBarWriter.expected_value(
                            asset_id, date, column) * multiplier,
                        data[idx],
                    )
                    idx += 1
            self.assertEqual(idx, len(data))

    def test_write_day_and_id(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        idx = 0
        ids = result['id']
        days = result['day']
        for asset_id in self.assets:
            for date in self.dates_for_asset(asset_id):
                self.assertEqual(ids[idx], asset_id)
                self.assertEqual(date, seconds_to_timestamp(days[idx]))
                idx += 1

    def test_write_attrs(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        expected_first_row = {
            '1': 0,
            '2': 5,  # Asset 1 has 5 trading days.
            '3': 12,  # Asset 2 has 7 trading days.
            '4': 33,  # Asset 3 has 21 trading days.
            '5': 44,  # Asset 4 has 11 trading days.
            '6': 49,  # Asset 5 has 5 trading days.
        }
        expected_last_row = {
            '1': 4,
            '2': 11,
            '3': 32,
            '4': 43,
            '5': 48,
            '6': 57,  # Asset 6 has 9 trading days.
        }
        expected_calendar_offset = {
            '1': 0,  # Starts on 6-01, 1st trading day of month.
            '2': 15,  # Starts on 6-22, 16th trading day of month.
            '3': 1,  # Starts on 6-02, 2nd trading day of month.
            '4': 0,  # Starts on 6-01, 1st trading day of month.
            '5': 9,  # Starts on 6-12, 10th trading day of month.
            '6': 10,  # Starts on 6-15, 11th trading day of month.
        }
        self.assertEqual(result.attrs['first_row'], expected_first_row)
        self.assertEqual(result.attrs['last_row'], expected_last_row)
        self.assertEqual(
            result.attrs['calendar_offset'],
            expected_calendar_offset,
        )
        assert_index_equal(
            self.trading_days,
            DatetimeIndex(result.attrs['calendar'], tz='UTC'),
        )

    def _check_read_results(self, columns, assets, start_date, end_date):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        results = reader.load_raw_arrays(columns, start_date, end_date, assets)
        dates = self.trading_days_between(start_date, end_date)
        for column, result in zip(columns, results):
            assert_array_equal(
                result,
                self.writer.expected_values_2d(
                    dates,
                    assets,
                    column.name,
                ))

    @parameterized.expand([
        ([USEquityPricing.open], ),
        ([USEquityPricing.close, USEquityPricing.volume], ),
        ([USEquityPricing.volume, USEquityPricing.high,
          USEquityPricing.low], ),
        (USEquityPricing.columns, ),
    ])
    def test_read(self, columns):
        self._check_read_results(
            columns,
            self.assets,
            TEST_QUERY_START,
            TEST_QUERY_STOP,
        )

    def test_start_on_asset_start(self):
        """
        Test loading with queries that starts on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.high, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_start(asset),
                end_date=self.trading_days[-1],
            )

    def test_start_on_asset_end(self):
        """
        Test loading with queries that start on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_end(asset),
                end_date=self.trading_days[-1],
            )

    def test_end_on_asset_start(self):
        """
        Test loading with queries that end on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_start(asset),
            )

    def test_end_on_asset_end(self):
        """
        Test loading with queries that end on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_end(asset),
            )

    def test_unadjusted_spot_price(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        # At beginning
        price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'),
                                  'close')
        # Synthetic writes price for date.
        self.assertEqual(135630.0, price)

        # Middle
        price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
                                  'close')
        self.assertEqual(135631.0, price)
        # End
        price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'),
                                  'close')
        self.assertEqual(135634.0, price)

        # Another sid at beginning.
        price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'),
                                  'close')
        self.assertEqual(235651.0, price)

        # Ensure that volume does not have float adjustment applied.
        volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
                                   'volume')
        self.assertEqual(145631, volume)

    def test_unadjusted_spot_price_no_data(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        # before
        with self.assertRaises(NoDataOnDate):
            reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close')

        # after
        with self.assertRaises(NoDataOnDate):
            reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close')

    def test_unadjusted_spot_price_empty_value(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)

        # A sid, day and corresponding index into which to overwrite a zero.
        zero_sid = 1
        zero_day = Timestamp('2015-06-02', tz='UTC')
        zero_ix = reader.sid_day_index(zero_sid, zero_day)

        # Write a zero into the synthetic pricing data at the day and sid,
        # so that a read should now return -1.
        # This a little hacky, in lieu of changing the synthetic data set.
        reader._spot_col('close')[zero_ix] = 0

        close = reader.spot_price(zero_sid, zero_day, 'close')
        self.assertEqual(-1, close)
Exemple #25
0
 def test_init_with_user(self, tmpdir: TempDirectory):
     Git(tmpdir.getpath('foo')).init(
         User(name='Foo Bar', email='*****@*****.**'))
     config = tmpdir.read('foo/.git/config')
     assert b'name = Foo Bar' in config
     assert b'email = [email protected]' in config
Exemple #26
0
 def test_init(self, tmpdir: TempDirectory):
     tmpdir.makedir('foo')
     Git(tmpdir.getpath('foo')).init()
     assert os.path.exists(tmpdir.getpath('foo/.git'))
Exemple #27
0
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        all_market_closes = cls.env.open_and_closes.market_close
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.market_closes = all_market_closes[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': full(9, nan),
                'high': full(9, nan),
                'low': full(9, nan),
                'close': full(9, nan),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(
            fields, minutes[0], minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).
                view(float64),
                'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).
                view(float64),
                'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).
                view(float64),
                'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
                view(float64),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = self.reader.unadjusted_window(
            fields, minutes[0], minutes[-1], [sid])

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(
            columns, minutes[0], minutes[-1], sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Exemple #28
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(data={
            'open': [25.0],
            'high': [27.0],
            'low': [21.0],
            'close': [25.0],
            'volume': [200.0]
        },
                         index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute, start_minute + Timedelta('1 min'),
            start_minute + Timedelta('2 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, nan, 15.1],
            'high': [17.0, nan, 17.1],
            'low': [11.0, nan, 11.1],
            'close': [14.0, nan, 14.1],
            'volume': [1000, 0, 1001]
        },
                           index=minutes)
        self.writer.write(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, nan, 25.1],
            'high': [27.0, nan, 27.1],
            'low': [21.0, nan, 21.1],
            'close': [24.0, nan, 24.1],
            'volume': [2000, 0, 2001]
        },
                           index=minutes)
        self.writer.write(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1],
                                          sids)

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])
Exemple #29
0
class BcolzDailyBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        all_trading_days = TradingEnvironment().trading_days
        cls.trading_days = all_trading_days[
            all_trading_days.get_loc(TEST_CALENDAR_START):
            all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1
        ]

    def setUp(self):

        self.asset_info = EQUITY_INFO
        self.writer = SyntheticDailyBarWriter(
            self.asset_info,
            self.trading_days,
        )

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('daily_equity_pricing.bcolz')

    def tearDown(self):
        self.dir_.cleanup()

    @property
    def assets(self):
        return self.asset_info.index

    def trading_days_between(self, start, end):
        return self.trading_days[self.trading_days.slice_indexer(start, end)]

    def asset_start(self, asset_id):
        return self.writer.asset_start(asset_id)

    def asset_end(self, asset_id):
        return self.writer.asset_end(asset_id)

    def dates_for_asset(self, asset_id):
        start, end = self.asset_start(asset_id), self.asset_end(asset_id)
        return self.trading_days_between(start, end)

    def test_write_ohlcv_content(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        for column in SyntheticDailyBarWriter.OHLCV:
            idx = 0
            data = result[column][:]
            multiplier = 1 if column == 'volume' else 1000
            for asset_id in self.assets:
                for date in self.dates_for_asset(asset_id):
                    self.assertEqual(
                        SyntheticDailyBarWriter.expected_value(
                            asset_id,
                            date,
                            column
                        ) * multiplier,
                        data[idx],
                    )
                    idx += 1
            self.assertEqual(idx, len(data))

    def test_write_day_and_id(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        idx = 0
        ids = result['id']
        days = result['day']
        for asset_id in self.assets:
            for date in self.dates_for_asset(asset_id):
                self.assertEqual(ids[idx], asset_id)
                self.assertEqual(date, seconds_to_timestamp(days[idx]))
                idx += 1

    def test_write_attrs(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        expected_first_row = {
            '1': 0,
            '2': 5,   # Asset 1 has 5 trading days.
            '3': 12,  # Asset 2 has 7 trading days.
            '4': 33,  # Asset 3 has 21 trading days.
            '5': 44,  # Asset 4 has 11 trading days.
            '6': 49,  # Asset 5 has 5 trading days.
        }
        expected_last_row = {
            '1': 4,
            '2': 11,
            '3': 32,
            '4': 43,
            '5': 48,
            '6': 57,    # Asset 6 has 9 trading days.
        }
        expected_calendar_offset = {
            '1': 0,   # Starts on 6-01, 1st trading day of month.
            '2': 15,  # Starts on 6-22, 16th trading day of month.
            '3': 1,   # Starts on 6-02, 2nd trading day of month.
            '4': 0,   # Starts on 6-01, 1st trading day of month.
            '5': 9,   # Starts on 6-12, 10th trading day of month.
            '6': 10,  # Starts on 6-15, 11th trading day of month.
        }
        self.assertEqual(result.attrs['first_row'], expected_first_row)
        self.assertEqual(result.attrs['last_row'], expected_last_row)
        self.assertEqual(
            result.attrs['calendar_offset'],
            expected_calendar_offset,
        )
        assert_index_equal(
            self.trading_days,
            DatetimeIndex(result.attrs['calendar'], tz='UTC'),
        )

    def _check_read_results(self, columns, assets, start_date, end_date):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        results = reader.load_raw_arrays(columns, start_date, end_date, assets)
        dates = self.trading_days_between(start_date, end_date)
        for column, result in zip(columns, results):
            assert_array_equal(
                result,
                self.writer.expected_values_2d(
                    dates,
                    assets,
                    column.name,
                )
            )

    @parameterized.expand([
        ([USEquityPricing.open],),
        ([USEquityPricing.close, USEquityPricing.volume],),
        ([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],),
        (USEquityPricing.columns,),
    ])
    def test_read(self, columns):
        self._check_read_results(
            columns,
            self.assets,
            TEST_QUERY_START,
            TEST_QUERY_STOP,
        )

    def test_start_on_asset_start(self):
        """
        Test loading with queries that starts on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.high, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_start(asset),
                end_date=self.trading_days[-1],
            )

    def test_start_on_asset_end(self):
        """
        Test loading with queries that start on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_end(asset),
                end_date=self.trading_days[-1],
            )

    def test_end_on_asset_start(self):
        """
        Test loading with queries that end on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_start(asset),
            )

    def test_end_on_asset_end(self):
        """
        Test loading with queries that end on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_end(asset),
            )
Exemple #30
0
class PluginWithTempDirTests(TestCase):
    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)

    def run_actions(self, path=None, **kw):
        with LogCapture() as log:
            plugin = make_git_repo(path=path or self.dir.path, **kw)
            with Replacer() as r:
                r.replace("archivist.repos.git.datetime", test_datetime())
                plugin.actions()
        return log

    def git(self, command, repo_path=None):
        return run(["git"] + command.split(), cwd=repo_path or self.dir.path)

    def make_repo_with_content(self, repo=""):
        repo_path = self.dir.getpath(repo) if repo else None
        self.git("init", repo_path)
        self.dir.write(repo + "a", "some content")
        self.dir.write(repo + "b", "other content")
        self.dir.write(repo + "c", "more content")
        self.git("add .", repo_path)
        self.git("commit -m initial", repo_path)
        return repo

    def make_local_changes(self, repo=""):
        self.dir.write(repo + "b", "changed content")
        os.remove(self.dir.getpath(repo + "c"))
        self.dir.write(repo + "d", "new content")

    def status_log_entry(self, lines, repo_path=None):
        return (
            "archivist.repos.git",
            "INFO",
            "\n".join(l.format(repo=repo_path or self.dir.path) for l in lines) + "\n",
        )

    def check_git_log(self, lines, repo_path=None):
        compare("\n".join(lines) + "\n", self.git("log --pretty=format:%s --stat", repo_path))

    def get_dummy_source(self, name):
        class DummySource(Source):
            schema = Schema({})

            def __init__(self, type, name, repo):
                super(DummySource, self).__init__(type, name, repo)

            def process(self, path):
                pass

        return DummySource("dummy", name, "repo")

    def test_path_for_with_name(self):
        compare(
            self.dir.getpath("dummy/the_name"),
            make_git_repo(path=self.dir.path).path_for(self.get_dummy_source("the_name")),
        )
        self.assertTrue(os.path.exists(self.dir.getpath("dummy/the_name")))

    def test_path_for_no_name(self):
        compare(self.dir.getpath("dummy"), make_git_repo(path=self.dir.path).path_for(self.get_dummy_source(name=None)))
        self.assertTrue(os.path.exists(self.dir.getpath("dummy")))

    def test_not_there(self):
        repo_path = self.dir.getpath("var")
        log = self.run_actions(repo_path)
        log.check(("archivist.repos.git", "INFO", "creating git repo at " + repo_path))
        self.assertTrue(self.dir.getpath("var/.git"))

    def test_there_not_git(self):
        repo_path = self.dir.makedir("var")
        log = self.run_actions(repo_path)
        log.check(("archivist.repos.git", "INFO", "creating git repo at " + repo_path))
        self.assertTrue(self.dir.getpath("var/.git"))

    def test_no_changes(self):
        self.git("init")
        log = self.run_actions()
        log.check()  # no logging

    def test_just_log_changes(self):
        self.make_repo_with_content()
        self.make_local_changes()
        log = self.run_actions(commit=False)
        log.check(self.status_log_entry(["changes found in git repo at {repo}:", " M b", " D c", "?? d"]))
        self.check_git_log(["initial", " a | 1 +", " b | 1 +", " c | 1 +", " 3 files changed, 3 insertions(+)"])

    def test_commit_changes(self):
        self.make_repo_with_content()
        self.make_local_changes()
        log = self.run_actions(commit=True)
        log.check(
            self.status_log_entry(["changes found in git repo at {repo}:", " M b", " D c", "?? d"]),
            ("archivist.repos.git", "INFO", "changes committed"),
        )
        compare("", self.git("status --porcelain"))
        self.check_git_log(
            [
                "Recorded by archivist at 2001-01-01 00:00",
                " b | 2 +-",
                " c | 1 -",
                " d | 1 +",
                " 3 files changed, 2 insertions(+), 2 deletions(-)",
                "",
                "initial",
                " a | 1 +",
                " b | 1 +",
                " c | 1 +",
                " 3 files changed, 3 insertions(+)",
            ]
        )

    def test_push_changes(self):
        origin_path = self.dir.makedir("origin")
        self.make_repo_with_content(repo="origin/")
        self.git("config --local --add receive.denyCurrentBranch ignore", origin_path)
        self.git("clone -q " + origin_path + " local")
        self.make_local_changes(repo="local/")

        local_path = self.dir.getpath("local")
        log = self.run_actions(commit=True, push=True, path=local_path)
        log.check(
            self.status_log_entry(
                ["changes found in git repo at {repo}:", " M b", " D c", "?? d"], repo_path=local_path
            ),
            ("archivist.repos.git", "INFO", "changes committed"),
            ("archivist.repos.git", "INFO", "changes pushed"),
        )
        self.check_git_log(
            [
                "Recorded by archivist at 2001-01-01 00:00",
                " b | 2 +-",
                " c | 1 -",
                " d | 1 +",
                " 3 files changed, 2 insertions(+), 2 deletions(-)",
                "",
                "initial",
                " a | 1 +",
                " b | 1 +",
                " c | 1 +",
                " 3 files changed, 3 insertions(+)",
            ],
            repo_path=origin_path,
        )

    def test_push_no_changes(self):
        self.git("init")
        log = self.run_actions(commit=True, push=True)
        log.check()  # no logging

    def test_default_repo_config(self):
        # can't test actions due to default path
        GitRepo(**GitRepo.schema(default_repo_config))
class TestJenkinsSource(TestCase):

    def setUp(self):
        self.dir = TempDirectory()
        self.addCleanup(self.dir.cleanup)

    def test_abc(self):
        self.assertTrue(issubclass(Plugin, Source))

    def test_schema_max(self):
        compare(
            dict(type='jenkins', name='core', repo='config',
                 path=self.dir.path),
            actual=Plugin.schema(
                dict(type='jenkins', name='core', repo='config',
                     path=self.dir.path)
            ))

    def test_schema_min(self):
        compare(
            dict(type='jenkins', name='jenkins',
                 repo='config', path=self.dir.path),
            actual=Plugin.schema(
                dict(type='jenkins', repo='config',
                     path=self.dir.path)
            ))

    def test_schema_wrong_type(self):
        text = "expected str for dictionary value @ data['path']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='bar', path=['/']))

    def test_schema_extra_keys(self):
        with ShouldFailSchemaWith("extra keys not allowed @ data['foo']"):
            Plugin.schema(dict(type='jenkins', foo='bar'))

    def test_invalid_name(self):
        text = "expected str for dictionary value @ data['name']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='jenkins', name=[]))

    def test_no_path(self):
        text = "'' does not exist for dictionary value @ data['path']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='jenkins', path=''))

    def test_path_not_string(self):
        text = "expected str for dictionary value @ data['path']"
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='jenkins', path=1))

    def test_path_not_there(self):
        invalid = self.dir.getpath('foo')
        text = "'%s' does not exist for dictionary value @ data['path']" % (
            invalid
        )
        with ShouldFailSchemaWith(text):
            Plugin.schema(dict(type='jenkins', path=invalid))

    def test_interface(self):
        plugin = Plugin('source', name='jenkins', repo='config',
                        path='root')
        compare(plugin.type, 'source')
        compare(plugin.name, 'jenkins')
        compare(plugin.repo, 'config')
        compare(plugin.source_paths, 'root')
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)
class BcolzDailyBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        all_trading_days = TradingEnvironment().trading_days
        cls.trading_days = all_trading_days[
            all_trading_days.get_loc(TEST_CALENDAR_START):
            all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1
        ]

    def setUp(self):

        self.asset_info = EQUITY_INFO
        self.writer = SyntheticDailyBarWriter(
            self.asset_info,
            self.trading_days,
        )

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('daily_equity_pricing.bcolz')

    def tearDown(self):
        self.dir_.cleanup()

    @property
    def assets(self):
        return self.asset_info.index

    def trading_days_between(self, start, end):
        return self.trading_days[self.trading_days.slice_indexer(start, end)]

    def asset_start(self, asset_id):
        return self.writer.asset_start(asset_id)

    def asset_end(self, asset_id):
        return self.writer.asset_end(asset_id)

    def dates_for_asset(self, asset_id):
        start, end = self.asset_start(asset_id), self.asset_end(asset_id)
        return self.trading_days_between(start, end)

    def test_write_ohlcv_content(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        for column in SyntheticDailyBarWriter.OHLCV:
            idx = 0
            data = result[column][:]
            multiplier = 1 if column == 'volume' else 1000
            for asset_id in self.assets:
                for date in self.dates_for_asset(asset_id):
                    self.assertEqual(
                        SyntheticDailyBarWriter.expected_value(
                            asset_id,
                            date,
                            column
                        ) * multiplier,
                        data[idx],
                    )
                    idx += 1
            self.assertEqual(idx, len(data))

    def test_write_day_and_id(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        idx = 0
        ids = result['id']
        days = result['day']
        for asset_id in self.assets:
            for date in self.dates_for_asset(asset_id):
                self.assertEqual(ids[idx], asset_id)
                self.assertEqual(date, seconds_to_timestamp(days[idx]))
                idx += 1

    def test_write_attrs(self):
        result = self.writer.write(self.dest, self.trading_days, self.assets)
        expected_first_row = {
            '1': 0,
            '2': 5,   # Asset 1 has 5 trading days.
            '3': 12,  # Asset 2 has 7 trading days.
            '4': 33,  # Asset 3 has 21 trading days.
            '5': 44,  # Asset 4 has 11 trading days.
            '6': 49,  # Asset 5 has 5 trading days.
        }
        expected_last_row = {
            '1': 4,
            '2': 11,
            '3': 32,
            '4': 43,
            '5': 48,
            '6': 57,    # Asset 6 has 9 trading days.
        }
        expected_calendar_offset = {
            '1': 0,   # Starts on 6-01, 1st trading day of month.
            '2': 15,  # Starts on 6-22, 16th trading day of month.
            '3': 1,   # Starts on 6-02, 2nd trading day of month.
            '4': 0,   # Starts on 6-01, 1st trading day of month.
            '5': 9,   # Starts on 6-12, 10th trading day of month.
            '6': 10,  # Starts on 6-15, 11th trading day of month.
        }
        self.assertEqual(result.attrs['first_row'], expected_first_row)
        self.assertEqual(result.attrs['last_row'], expected_last_row)
        self.assertEqual(
            result.attrs['calendar_offset'],
            expected_calendar_offset,
        )
        assert_index_equal(
            self.trading_days,
            DatetimeIndex(result.attrs['calendar'], tz='UTC'),
        )

    def _check_read_results(self, columns, assets, start_date, end_date):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        results = reader.load_raw_arrays(columns, start_date, end_date, assets)
        dates = self.trading_days_between(start_date, end_date)
        for column, result in zip(columns, results):
            assert_array_equal(
                result,
                self.writer.expected_values_2d(
                    dates,
                    assets,
                    column.name,
                )
            )

    @parameterized.expand([
        ([USEquityPricing.open],),
        ([USEquityPricing.close, USEquityPricing.volume],),
        ([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],),
        (USEquityPricing.columns,),
    ])
    def test_read(self, columns):
        self._check_read_results(
            columns,
            self.assets,
            TEST_QUERY_START,
            TEST_QUERY_STOP,
        )

    def test_start_on_asset_start(self):
        """
        Test loading with queries that starts on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.high, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_start(asset),
                end_date=self.trading_days[-1],
            )

    def test_start_on_asset_end(self):
        """
        Test loading with queries that start on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.asset_end(asset),
                end_date=self.trading_days[-1],
            )

    def test_end_on_asset_start(self):
        """
        Test loading with queries that end on the first day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_start(asset),
            )

    def test_end_on_asset_end(self):
        """
        Test loading with queries that end on the last day of each asset's
        lifetime.
        """
        columns = [USEquityPricing.close, USEquityPricing.volume]
        for asset in self.assets:
            self._check_read_results(
                columns,
                self.assets,
                start_date=self.trading_days[0],
                end_date=self.asset_end(asset),
            )

    def test_unadjusted_spot_price(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        # At beginning
        price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'),
                                  'close')
        # Synthetic writes price for date.
        self.assertEqual(135630.0, price)

        # Middle
        price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
                                  'close')
        self.assertEqual(135631.0, price)
        # End
        price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'),
                                  'close')
        self.assertEqual(135634.0, price)

        # Another sid at beginning.
        price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'),
                                  'close')
        self.assertEqual(235651.0, price)

        # Ensure that volume does not have float adjustment applied.
        volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'),
                                   'volume')
        self.assertEqual(145631, volume)

    def test_unadjusted_spot_price_no_data(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)
        # before
        with self.assertRaises(NoDataOnDate):
            reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close')

        # after
        with self.assertRaises(NoDataOnDate):
            reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close')

    def test_unadjusted_spot_price_empty_value(self):
        table = self.writer.write(self.dest, self.trading_days, self.assets)
        reader = BcolzDailyBarReader(table)

        # A sid, day and corresponding index into which to overwrite a zero.
        zero_sid = 1
        zero_day = Timestamp('2015-06-02', tz='UTC')
        zero_ix = reader.sid_day_index(zero_sid, zero_day)

        # Write a zero into the synthetic pricing data at the day and sid,
        # so that a read should now return -1.
        # This a little hacky, in lieu of changing the synthetic data set.
        reader._spot_col('close')[zero_ix] = 0

        close = reader.spot_price(zero_sid, zero_day, 'close')
        self.assertEqual(-1, close)
class BaseSupervisorTestCase(TestCase):
    """
    Base class for running supervisor tests
    """
    maxDiff = None
    integration_test = 1

    def __init__(self, *args, **kwargs):
        super(BaseSupervisorTestCase, self).__init__(*args, **kwargs)
        self.supervisor = None
        self.logstash = None
        # store, as it's also used by supervisorctl
        self._config_file_path = None

    def setUp(self):
        self.scratch = TempDirectory()

    def tearDown(self):
        self.scratch.cleanup()

    def run_supervisor(self, overrides, configuration_string):
        """
        Runs Supervisor
        """
        environment = os.environ.copy()
        environment.update(overrides)

        working_directory = os.path.dirname(__file__)

        template_path = os.path.join(working_directory, 'supervisord.template')
        with open(template_path) as template:
            configuration = template.read()
            configuration += configuration_string
            self.scratch.write('supervisor.conf', configuration, 'utf-8')

        # store, as it's also used by supervisorctl
        self._config_file_path = self.scratch.getpath('supervisor.conf')

        self.supervisor = subprocess.Popen(
            ['supervisord', '-c', self._config_file_path],
            env=environment,
            cwd=os.path.dirname(working_directory),
        )

    def shutdown_supervisor(self):
        """
        Shuts Supervisor down
        """
        self.supervisor.terminate()
        while self.supervisor.poll() is None:
            # need to wait while the process kills off it's children and exits
            # so that it doesn't block the port
            sleep(1)

    def run_supervisorctl(self, args):
        """
        Runs supervisorctl using the test suites config file
        """
        command = [
            'supervisorctl',
            '-c', self._config_file_path,
        ]
        command += args

        return subprocess.call(command)

    def run_logstash(self):
        """
        Runs a socketserver instance emulating Logstash
        """
        self.logstash = socketserver.UDPServer(('0.0.0.0', 0), LogstashHandler)
        threading.Thread(target=self.logstash.serve_forever).start()
        return self.logstash

    def shutdown_logstash(self):
        """
        Shuts the socketserver instance down
        """
        self.logstash.shutdown()
        self.logstash.server_close()

    def messages(self, clear_buffer=False, wait_for=None):
        """
        Returns the contents of the logstash message buffer
        """
        messages = []
        if wait_for is not None:
            while len(messages) < wait_for:
                sleep(0.1)
                messages = self.logstash.RequestHandlerClass.messages[:]
        else:
            messages = self.logstash.RequestHandlerClass.messages[:]

        parsed_messages = list(map(strip_volatile, messages))
        if clear_buffer:
            self.clear_message_buffer()

        return parsed_messages

    def get_message_buffer(self):
        """
        Returns the raw logstash message buffer
        """
        return self.logstash.RequestHandlerClass.messages[:]

    def clear_message_buffer(self):
        """
        Clears the logstash message buffer
        """
        self.logstash.RequestHandlerClass.messages = []
Exemple #35
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        all_market_closes = cls.env.open_and_closes.market_close
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.market_closes = all_market_closes[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write_sid(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write_sid(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write_sid(sid, data)

    def test_append_to_same_day(self):
        """
        Test writing data with the same date as existing data in our file.
        """
        sid = 1

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[first_minute])
        self.writer.write_sid(sid, data)

        # Write data in the same day as the previous minute
        second_minute = first_minute + Timedelta(minutes=1)
        new_data = DataFrame(data={
            'open': [5.0],
            'high': [10.0],
            'low': [3.0],
            'close': [7.0],
            'volume': [10.0]
        },
                             index=[second_minute])
        self.writer.write_sid(sid, new_data)

        open_price = self.reader.get_value(sid, second_minute, 'open')
        self.assertEquals(5.0, open_price)
        high_price = self.reader.get_value(sid, second_minute, 'high')
        self.assertEquals(10.0, high_price)
        low_price = self.reader.get_value(sid, second_minute, 'low')
        self.assertEquals(3.0, low_price)
        close_price = self.reader.get_value(sid, second_minute, 'close')
        self.assertEquals(7.0, close_price)
        volume_price = self.reader.get_value(sid, second_minute, 'volume')
        self.assertEquals(10.0, volume_price)

    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(data=ohlcv, index=[first_minute])
        self.writer.write_sid(sid, data)

        next_day_minute = first_minute + Timedelta(days=1)
        new_data = DataFrame(data=ohlcv, index=[next_day_minute])
        self.writer.write_sid(sid, new_data)

        second_minute = first_minute + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(nan,
                                self.reader.get_value(sid, second_minute, col))
        self.assertEqual(0, self.reader.get_value(sid, second_minute,
                                                  'volume'))

        # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows.
        # The second day should contain a single row.
        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            US_EQUITIES_MINUTES_PER_DAY + 1,
        )

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write_sid(sids[0], data)

        data = DataFrame(data={
            'open': [25.0],
            'high': [27.0],
            'low': [21.0],
            'close': [25.0],
            'volume': [200.0]
        },
                         index=[minute])
        self.writer.write_sid(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        day = TEST_CALENDAR_START + freq
        minute = self.market_opens[day]

        data = DataFrame(data={
            'open': [15.0],
            'high': [17.0],
            'low': [11.0],
            'close': [15.0],
            'volume': [100.0]
        },
                         index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        # Check that if we then pad the rest of this day, we end up with
        # 2 days worth of minutes.
        self.writer.pad(sid, day)

        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            self.writer._minutes_per_day * 2,
        )

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(data={
            'open': full(9, nan),
            'high': full(9, nan),
            'low': full(9, nan),
            'close': full(9, nan),
            'volume': full(9, 0),
        },
                         index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(
            map(
                transpose,
                self.reader.load_raw_arrays(
                    fields,
                    minutes[0],
                    minutes[-1],
                    [sid],
                )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(data={
            'open':
            ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).view(float64),
            'high': ((0b11111111111 << 52) +
                     arange(11, 20, dtype=int64)).view(float64),
            'low': ((0b11111111111 << 52) +
                    arange(21, 30, dtype=int64)).view(float64),
            'close': ((0b11111111111 << 52) +
                      arange(31, 40, dtype=int64)).view(float64),
            'volume':
            full(9, 0),
        },
                         index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(
            map(
                transpose,
                self.reader.load_raw_arrays(
                    fields,
                    minutes[0],
                    minutes[-1],
                    [sid],
                )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_cols_mismatch_length(self):
        dts = date_range(self.market_opens[self.test_calendar_start],
                         periods=2,
                         freq='min').asi8.astype('datetime64[s]')
        sid = 1
        cols = {
            'open': array([10.0, 11.0, 12.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0, 33.0, 34.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0, 52.0])
        }
        with self.assertRaises(BcolzMinuteWriterColumnMismatch):
            self.writer.write_cols(sid, dts, cols)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [
            start_minute, start_minute + Timedelta('1 min'),
            start_minute + Timedelta('2 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, nan, 15.1],
            'high': [17.0, nan, 17.1],
            'low': [11.0, nan, 11.1],
            'close': [14.0, nan, 14.1],
            'volume': [1000, 0, 1001]
        },
                           index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, nan, 25.1],
            'high': [27.0, nan, 27.1],
            'low': [21.0, nan, 21.1],
            'close': [24.0, nan, 24.1],
            'volume': [2000, 0, 2001]
        },
                           index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                )))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])

    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC')
        xmas_eve = Timestamp('2015-12-24', tz='UTC')
        market_day_after_xmas = Timestamp('2015-12-28', tz='UTC')

        minutes = [
            self.market_closes[day_before_thanksgiving] - Timedelta('2 min'),
            self.market_closes[xmas_eve] - Timedelta('1 min'),
            self.market_opens[market_day_after_xmas] + Timedelta('1 min')
        ]
        sids = [1, 2]
        data_1 = DataFrame(data={
            'open': [15.0, 15.1, 15.2],
            'high': [17.0, 17.1, 17.2],
            'low': [11.0, 11.1, 11.3],
            'close': [14.0, 14.1, 14.2],
            'volume': [1000, 1001, 1002],
        },
                           index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(data={
            'open': [25.0, 25.1, 25.2],
            'high': [27.0, 27.1, 27.2],
            'low': [21.0, 21.1, 21.2],
            'close': [24.0, 24.1, 24.2],
            'volume': [2000, 2001, 2002],
        },
                           index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(
            map(
                transpose,
                reader.load_raw_arrays(
                    columns,
                    minutes[0],
                    minutes[-1],
                    sids,
                )))

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = self.env.market_minutes.get_loc(minutes[0])
        minute_locs = [
            self.env.market_minutes.get_loc(minute) - start_minute_loc
            for minute in minutes
        ]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])

    def test_adjust_non_trading_minutes(self):
        start_day = Timestamp('2015-06-01', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 781),
            'high': arange(1, 781),
            'low': arange(1, 781),
            'close': arange(1, 781),
            'volume': arange(1, 781)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-01 20:00:00', tz='UTC'),
                                  'open'), 390)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-02 20:00:00', tz='UTC'),
                                  'open'), 780)

        self.assertEqual(
            self.reader.get_value(sid, Timestamp('2015-06-02', tz='UTC'),
                                  'open'), 390)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-06-02 20:01:00', tz='UTC'),
                                  'open'), 780)

    def test_adjust_non_trading_minutes_half_days(self):
        # half day
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-11-30', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 601),
            'high': arange(1, 601),
            'low': arange(1, 601),
            'close': arange(1, 601),
            'volume': arange(1, 601)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-27 18:00:00', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-30 21:00:00', tz='UTC'),
                                  'open'), 600)

        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-27 18:01:00', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid, Timestamp('2015-11-30', tz='UTC'),
                                  'open'), 210)
        self.assertEqual(
            self.reader.get_value(sid,
                                  Timestamp('2015-11-30 21:01:00', tz='UTC'),
                                  'open'), 600)

    def test_set_sid_attrs(self):
        """Confirm that we can set the attributes of a sid's file correctly.
        """

        sid = 1
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')
        attrs = {
            'start_day': start_day.value / int(1e9),
            'end_day': end_day.value / int(1e9),
            'factor': 100,
        }

        # Write the attributes
        self.writer.set_sid_attrs(sid, **attrs)
        # Read the attributes
        for k, v in attrs.items():
            self.assertEqual(self.reader.get_sid_attr(sid, k), v)
Exemple #36
0
class BcolzMinuteBarTestCase(TestCase):
    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP)
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(data={
            'open': [0],
            'high': [0],
            'low': [0],
            'close': [0],
            'volume': [0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(start=self.test_calendar_start + 1,
                                     end=self.test_calendar_start + 3)]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(data={
            'open': [10.0, 11.0],
            'high': [20.0, 21.0],
            'low': [30.0, 31.0],
            'close': [40.0, 41.0],
            'volume': [50.0, 51.0]
        },
                         index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(data={
            'open': [10.0],
            'high': [20.0],
            'low': [30.0],
            'close': [40.0],
            'volume': [50.0]
        },
                         index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        all_market_closes = cls.env.open_and_closes.market_close
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.market_closes = all_market_closes[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            self.market_closes,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write_sid(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write_sid(sid, data)

    def test_append_to_same_day(self):
        """
        Test writing data with the same date as existing data in our file.
        """
        sid = 1

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[first_minute])
        self.writer.write_sid(sid, data)

        # Write data in the same day as the previous minute
        second_minute = first_minute + Timedelta(minutes=1)
        new_data = DataFrame(
            data={
                'open': [5.0],
                'high': [10.0],
                'low': [3.0],
                'close': [7.0],
                'volume': [10.0]
            },
            index=[second_minute])
        self.writer.write_sid(sid, new_data)

        open_price = self.reader.get_value(sid, second_minute, 'open')
        self.assertEquals(5.0, open_price)
        high_price = self.reader.get_value(sid, second_minute, 'high')
        self.assertEquals(10.0, high_price)
        low_price = self.reader.get_value(sid, second_minute, 'low')
        self.assertEquals(3.0, low_price)
        close_price = self.reader.get_value(sid, second_minute, 'close')
        self.assertEquals(7.0, close_price)
        volume_price = self.reader.get_value(sid, second_minute, 'volume')
        self.assertEquals(10.0, volume_price)

    def test_append_on_new_day(self):
        sid = 1

        ohlcv = {
            'open': [2.0],
            'high': [3.0],
            'low': [1.0],
            'close': [2.0],
            'volume': [10.0]
        }

        first_minute = self.market_opens[TEST_CALENDAR_START]
        data = DataFrame(
            data=ohlcv,
            index=[first_minute])
        self.writer.write_sid(sid, data)

        next_day_minute = first_minute + Timedelta(days=1)
        new_data = DataFrame(
            data=ohlcv,
            index=[next_day_minute])
        self.writer.write_sid(sid, new_data)

        second_minute = first_minute + Timedelta(minutes=1)

        # The second minute should have been padded with zeros
        for col in ('open', 'high', 'low', 'close'):
            assert_almost_equal(
                nan, self.reader.get_value(sid, second_minute, col)
            )
        self.assertEqual(
            0, self.reader.get_value(sid, second_minute, 'volume')
        )

        # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows.
        # The second day should contain a single row.
        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            US_EQUITIES_MINUTES_PER_DAY + 1,
        )

    def test_write_multiple_sids(self):
        """
        Test writing multiple sids.

        Tests both that the data is written to the correct sid, as well as
        ensuring that the logic for creating the subdirectory path to each sid
        does not cause issues from attempts to recreate existing paths.
        (Calling out this coverage, because an assertion of that logic does not
        show up in the test itself, but is exercised by the act of attempting
        to write two consecutive sids, which would be written to the same
        containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz)

        Before applying a check to make sure the path writing did not
        re-attempt directory creation an OSError like the following would
        occur:

        ```
        OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00'
        ```
        """
        minute = self.market_opens[TEST_CALENDAR_START]
        sids = [1, 2]
        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write_sid(sids[0], data)

        data = DataFrame(
            data={
                'open': [25.0],
                'high': [27.0],
                'low': [21.0],
                'close': [25.0],
                'volume': [200.0]
            },
            index=[minute])
        self.writer.write_sid(sids[1], data)

        sid = sids[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        sid = sids[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(25.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(27.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(21.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(25.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(200.0, volume_price)

    def test_pad_data(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        day = TEST_CALENDAR_START + freq
        minute = self.market_opens[day]

        data = DataFrame(
            data={
                'open': [15.0],
                'high': [17.0],
                'low': [11.0],
                'close': [15.0],
                'volume': [100.0]
            },
            index=[minute])
        self.writer.write_sid(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(15.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(17.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(11.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(15.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(100.0, volume_price)

        # Check that if we then pad the rest of this day, we end up with
        # 2 days worth of minutes.
        self.writer.pad(sid, day)

        self.assertEqual(
            len(self.writer._ensure_ctable(sid)),
            self.writer._minutes_per_day * 2,
        )

    def test_nans(self):
        """
        Test writing empty data.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': full(9, nan),
                'high': full(9, nan),
                'low': full(9, nan),
                'close': full(9, nan),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(map(transpose, self.reader.load_raw_arrays(
            fields, minutes[0], minutes[-1], [sid],
        )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_differing_nans(self):
        """
        Also test nans of differing values/construction.
        """
        sid = 1
        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertIs(last_date, NaT)

        self.writer.pad(sid, TEST_CALENDAR_START)

        last_date = self.writer.last_date_in_output_for_sid(sid)
        self.assertEqual(last_date, TEST_CALENDAR_START)

        freq = self.market_opens.index.freq
        minute = self.market_opens[TEST_CALENDAR_START + freq]
        minutes = date_range(minute, periods=9, freq='min')
        data = DataFrame(
            data={
                'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).
                view(float64),
                'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).
                view(float64),
                'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).
                view(float64),
                'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).
                view(float64),
                'volume': full(9, 0),
            },
            index=[minutes])
        self.writer.write_sid(sid, data)

        fields = ['open', 'high', 'low', 'close', 'volume']

        ohlcv_window = list(map(transpose, self.reader.load_raw_arrays(
            fields, minutes[0], minutes[-1], [sid],
        )))

        for i, field in enumerate(fields):
            if field != 'volume':
                assert_array_equal(full(9, nan), ohlcv_window[i][0])
            else:
                assert_array_equal(zeros(9), ohlcv_window[i][0])

    def test_write_cols(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        cols = {
            'open': array([10.0, 11.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0])
        }
        dts = array([minute_0, minute_1], dtype='datetime64[s]')
        self.writer.write_cols(sid, dts, cols)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_cols_mismatch_length(self):
        dts = date_range(self.market_opens[self.test_calendar_start],
                         periods=2, freq='min').asi8.astype('datetime64[s]')
        sid = 1
        cols = {
            'open': array([10.0, 11.0, 12.0]),
            'high': array([20.0, 21.0]),
            'low': array([30.0, 31.0, 33.0, 34.0]),
            'close': array([40.0, 41.0]),
            'volume': array([50.0, 51.0, 52.0])
        }
        with self.assertRaises(BcolzMinuteWriterColumnMismatch):
            self.writer.write_cols(sid, dts, cols)

    def test_unadjusted_minutes(self):
        """
        Test unadjusted minutes.
        """
        start_minute = self.market_opens[TEST_CALENDAR_START]
        minutes = [start_minute,
                   start_minute + Timedelta('1 min'),
                   start_minute + Timedelta('2 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [15.0, nan, 15.1],
                'high': [17.0, nan, 17.1],
                'low': [11.0, nan, 11.1],
                'close': [14.0, nan, 14.1],
                'volume': [1000, 0, 1001]
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, nan, 25.1],
                'high': [27.0, nan, 27.1],
                'low': [21.0, nan, 21.1],
                'close': [24.0, nan, 24.1],
                'volume': [2000, 0, 2001]
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid][col], arrays[i][j])

    def test_unadjusted_minutes_early_close(self):
        """
        Test unadjusted minute window, ensuring that early closes are filtered
        out.
        """
        day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC')
        xmas_eve = Timestamp('2015-12-24', tz='UTC')
        market_day_after_xmas = Timestamp('2015-12-28', tz='UTC')

        minutes = [self.market_closes[day_before_thanksgiving] -
                   Timedelta('2 min'),
                   self.market_closes[xmas_eve] - Timedelta('1 min'),
                   self.market_opens[market_day_after_xmas] +
                   Timedelta('1 min')]
        sids = [1, 2]
        data_1 = DataFrame(
            data={
                'open': [
                    15.0, 15.1, 15.2],
                'high': [17.0, 17.1, 17.2],
                'low': [11.0, 11.1, 11.3],
                'close': [14.0, 14.1, 14.2],
                'volume': [1000, 1001, 1002],
            },
            index=minutes)
        self.writer.write_sid(sids[0], data_1)

        data_2 = DataFrame(
            data={
                'open': [25.0, 25.1, 25.2],
                'high': [27.0, 27.1, 27.2],
                'low': [21.0, 21.1, 21.2],
                'close': [24.0, 24.1, 24.2],
                'volume': [2000, 2001, 2002],
            },
            index=minutes)
        self.writer.write_sid(sids[1], data_2)

        reader = BcolzMinuteBarReader(self.dest)

        columns = ['open', 'high', 'low', 'close', 'volume']
        sids = [sids[0], sids[1]]
        arrays = list(map(transpose, reader.load_raw_arrays(
            columns, minutes[0], minutes[-1], sids,
        )))

        data = {sids[0]: data_1, sids[1]: data_2}

        start_minute_loc = self.env.market_minutes.get_loc(minutes[0])
        minute_locs = [self.env.market_minutes.get_loc(minute) -
                       start_minute_loc
                       for minute in minutes]

        for i, col in enumerate(columns):
            for j, sid in enumerate(sids):
                assert_almost_equal(data[sid].loc[minutes, col],
                                    arrays[i][j][minute_locs])

    def test_adjust_non_trading_minutes(self):
        start_day = Timestamp('2015-06-01', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 781),
            'high': arange(1, 781),
            'low': arange(1, 781),
            'close': arange(1, 781),
            'volume': arange(1, 781)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-01 20:00:00', tz='UTC'),
                'open'),
            390)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02 20:00:00', tz='UTC'),
                'open'),
            780)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02', tz='UTC'),
                'open'),
            390)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-06-02 20:01:00', tz='UTC'),
                'open'),
            780)

    def test_adjust_non_trading_minutes_half_days(self):
        # half day
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-11-30', tz='UTC')

        sid = 1
        cols = {
            'open': arange(1, 601),
            'high': arange(1, 601),
            'low': arange(1, 601),
            'close': arange(1, 601),
            'volume': arange(1, 601)
        }
        dts = array(self.env.minutes_for_days_in_range(start_day, end_day))
        self.writer.write_cols(sid, dts, cols)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-27 18:00:00', tz='UTC'),
                'open'),
            210)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30 21:00:00', tz='UTC'),
                'open'),
            600)

        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-27 18:01:00', tz='UTC'),
                'open'),
            210)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30', tz='UTC'),
                'open'),
            210)
        self.assertEqual(
            self.reader.get_value(
                sid,
                Timestamp('2015-11-30 21:01:00', tz='UTC'),
                'open'),
            600)

    def test_set_sid_attrs(self):
        """Confirm that we can set the attributes of a sid's file correctly.
        """

        sid = 1
        start_day = Timestamp('2015-11-27', tz='UTC')
        end_day = Timestamp('2015-06-02', tz='UTC')
        attrs = {
            'start_day': start_day.value / int(1e9),
            'end_day': end_day.value / int(1e9),
            'factor': 100,
        }

        # Write the attributes
        self.writer.set_sid_attrs(sid, **attrs)
        # Read the attributes
        for k, v in attrs.items():
            self.assertEqual(self.reader.get_sid_attr(sid, k), v)
class BaseTester(object):
    # "silent"

    _multiprocess_can_split_ = True  #If a context’s fixtures are re-entrant, set _multiprocess_can_split_ = True in the context, and the plugin will dispatch tests in suites bound to that context as if the context had no fixtures. This means that the fixtures will execute concurrently and multiple times, typically once per test.

    #_multiprocess_shared_ = True #If a context’s fixtures can be shared by tests running in different processes – such as a package-level fixture that starts an external http server or initializes a shared database – then set _multiprocess_shared_ = True in the context. These fixtures will then execute in the primary nose process, and tests in those contexts will be individually dispatched to run in parallel.

    def setUp(self):

        create_test_data()

        gc.collect(
        )  ## Start Garbage Collector, before start with the new test case.

        #### MODI######
        #self.mode = "test"
        #self.mode = "test+s+"
        #self.mode = "test+s-"
        #self.mode = "dev"
        #self.mode = "dev-"
        #self.mode = "silent"
        self.mode = "error"

        #### Set TestsConfiger #####
        clear_logger()
        self.configer = TestsConfiger(
            mode="silent")  # MODE SHOULD BE "test". !!!

        self.tempdir = TempDirectory()

        self.path_to_zas_rep_tools = self.configer.path_to_zas_rep_tools
        #gc.collect()

    #@classmethod
    def tearDown(self):
        #del self.configer
        t = self.tempdir
        #self.tempdir.cleanup()
        del self
        gc.collect()
        t.cleanup()
        del t
        #del self
        #gc.collect()

    @nottest
    def create_all_test_data(self):
        self.prj_folder()
        self.test_dbs()
        self.blogger_corpus()
        self.twitter_corpus()
        self.blogger_lists()

    @nottest
    def test_dbs(self):
        #####################
        #### Test DBs########
        #######Begin#########

        self.path_to_testdbs = self.configer.path_to_testdbs
        self.db_blogger_plaintext_corp_en = self.configer.test_dbs[
            "plaintext"]["blogger"]["en"]["corpus"]
        self.db_blogger_plaintext_corp_de = self.configer.test_dbs[
            "plaintext"]["blogger"]["de"]["corpus"]
        self.db_blogger_plaintext_corp_test = self.configer.test_dbs[
            "plaintext"]["blogger"]["test"]["corpus"]
        self.db_blogger_plaintext_stats_en = self.configer.test_dbs[
            "plaintext"]["blogger"]["en"]["stats"]
        self.db_blogger_plaintext_stats_de = self.configer.test_dbs[
            "plaintext"]["blogger"]["de"]["stats"]
        self.db_blogger_plaintext_stats_test = self.configer.test_dbs[
            "plaintext"]["blogger"]["test"]["stats"]

        self.db_twitter_encrypted_corp_de = self.configer.test_dbs[
            "encrypted"]["twitter"]["de"]["corpus"]
        self.db_twitter_encrypted_stats_de = self.configer.test_dbs[
            "encrypted"]["twitter"]["de"]["stats"]

        ## TempDir
        self.tempdir.makedir('TestDBs')
        self.tempdir_testdbs = self.tempdir.getpath('TestDBs')
        copy_tree(
            os.path.join(self.path_to_zas_rep_tools, self.path_to_testdbs),
            self.tempdir_testdbs)
        #p(self.tempdir_testdbs)
        #######End###########
        #### Test DBs########
        #####################

    def blogger_corpus(self):
        #####################
        # Test Blogger Corpus#
        #######Begin#########

        self.path_to_test_sets_for_blogger_Corpus = "data/tests_data/Corpora/BloggerCorpus"

        #TXT
        self.txt_blogger_hightrepetativ_set = "txt/HighRepetativSubSet"
        self.txt_blogger_small_fake_set = "txt/SmallFakeSubset"
        #self.txt_blogger_small_sub_set = "txt/SmallSubset"

        #CSV
        self.csv_blogger_hightrepetativ_set = "csv/HighRepetativSubSet"
        self.csv_blogger_small_fake_set = "csv/SmallFakeSubset"
        # #self.csv_blogger_small_sub_set = "csv/SmallSubset"

        #XML
        self.xml_blogger_hightrepetativ_set = "xml/HighRepetativSubSet"
        self.xml_blogger_small_fake_set = "xml/SmallFakeSubset"
        #self.xml_blogger_small_sub_set = "xml/SmallSubset"

        #JSON
        self.json_blogger_hightrepetativ_set = "json/HighRepetativSubSet"
        self.json_blogger_small_fake_set = "json/SmallFakeSubset"
        # #self.json_blogger_small_sub_set = "json/SmallSubset"

        ## TempDir
        #self.path_to_test_corpora  = "data/tests_data/Corpora"
        self.tempdir.makedir('BloggerCorpus')
        self.tempdir_blogger_corp = self.tempdir.getpath('BloggerCorpus')
        copy_tree(
            os.path.join(self.path_to_zas_rep_tools,
                         self.path_to_test_sets_for_blogger_Corpus),
            self.tempdir_blogger_corp)

        #######End###########
        # Test Blogger Corpus#
        #####################

    def twitter_corpus(self):

        #####################
        # Test Twitter Corpus#
        #######Begin#########

        self.path_to_test_sets_for_twitter_Corpus = "data/tests_data/Corpora/TwitterCorpus"
        self.json_twitter_set = "JSON/zas-rep-tool/"
        self.csv_twitter_set = "CSV/zas-rep-tool/"
        self.xml_twitter_set = "XML/zas-rep-tool/"

        ## TempDir
        #self.path_to_test_corpora  = "data/tests_data/Corpora"
        self.tempdir.makedir('TwitterCorpus')
        self.tempdir_twitter_corp = self.tempdir.getpath('TwitterCorpus')
        copy_tree(
            os.path.join(self.path_to_zas_rep_tools,
                         self.path_to_test_sets_for_twitter_Corpus),
            self.tempdir_twitter_corp)

        #######End###########
        # Test Twitter Corpus#
        #####################

    def blogger_lists(self):

        #####################
        #### Test Blogger ####
        #######Begin#########

        self.input_list_fake_blogger_corpus = [{
            'rowid': '1',
            'star_constellation': 'Capricorn',
            'text':
            u'Well, the angel won. I went to work today....after alot of internal struggle with the facts. I calculated sick days left this year,',
            'working_area': 'Consulting',
            'age': '46',
            'id': '324114',
            'gender': 'female'
        }, {
            'rowid': '2',
            'star_constellation': 'Pisces',
            'text': u"urlLink Drawing Game  It's PICTIONARY. It's very cool.",
            'working_area': 'indUnk',
            'age': '24',
            'id': '416465',
            'gender': 'male'
        }, {
            'rowid': '3',
            'star_constellation': 'Virgo',
            'text':
            u'The mango said, "Hi there!!.... \n"Hi there!!.... \n"Hi there!!.... ',
            'working_area': 'Non-Profit',
            'age': '17',
            'id': '322624',
            'gender': 'female'
        }]

        self.input_list_blogger_corpus_high_repetativ_subset = [{
            'rowid':
            '1',
            'star_constellation':
            'Capricorn',
            'text':
            u'@lovelypig #direct_to_haven 67666 8997 -))) -) -P Neeeeeeeeeeeeeeeeiiiiiiinnnnn!!!!! Bitte nicht \U0001f602\U0001f602\U0001f602 \nTest Version von einem Tweeeeeeeeet=)))))))\nnoch einen Tweeeeeeeeet=))))))) \U0001f605\U0001f605',
            'working_area':
            'Consulting',
            'age':
            '46',
            'id':
            '324114',
            'gender':
            'female'
        }, {
            'rowid':
            '2',
            'star_constellation':
            'Pisces',
            'text':
            u'Einen weiteren Thread eingef\xfcgt!!! juHuuuuuuuu=) \U0001f49b\U0001f49b\U0001f49b\nden vierten Threadddddd!!! wooooowwwwww!!! \u263a\ufe0f \U0001f61c\U0001f61c\U0001f61c\nDas ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen beinhalten sollte. \U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c Das ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen \U0001f61c\U0001f61c\U0001f61c\U0001f61c\nDas ist einnnneeeen Teeeeest Quoted Tweet, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen beinhalten sollte. \U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c Das ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen \U0001f61c\U0001f61c h',
            'working_area':
            'indUnk',
            'age':
            '24',
            'id':
            '416465',
            'gender':
            'male'
        }, {
            'rowid':
            '3',
            'star_constellation':
            'Virgo',
            'text':
            u'Eine Teeeeeest Diskussion wird er\xf6ffnet!!! @zas-rep-tools \nEinen Test Retweet wird gepostet!!!!! Juhuuuuuu=) \U0001f600\U0001f600\U0001f600\U0001f600\nnoooooooch einen Tweeeeeeeeet=)))))))',
            'working_area':
            'Non-Profit',
            'age':
            '17',
            'id':
            '322624',
            'gender':
            'female'
        }]

        self.input_list_blogger_corpus_dirty = [{
            'rowid': '1',
            'star_constellation': 'Capricorn',
            'text':
            u'@lovelypig #direct_to_haven 67666 8997 -))) -) -P Neeeeeeeeeeeeeeeeiiiiiiinnnnn!!!!! Bitte nicht @lovelypig \U0001f602\U0001f602\U0001f602 \nTest Version von einem Tweeeeeeeeet=)))))))\nnoch einen Tweeeeeeeeet=))))))) 111111 22222 3. 444 \U0001f605\U0001f605',
            'working_area': 'Consulting',
            'age': '46',
            'id': '324114',
            'gender': 'female'
        }, {
            'rowid': '2',
            'star_constellation': 'Virgo',
            'text':
            u'Eine Teeeeeest Diskussion wird er\xf6ffnet!!! @zas-rep-tools #doit #stay_you \nEinen Test Retweet wird gepostet!!!!! =))))))) #stay_your_self',
            'working_area': 'Non-Profit',
            'age': '17',
            'id': '322624',
            'gender': 'female'
        }]
        self.fieldnames = self.configer.columns_in_doc_table["blogger"]

        #######End###########
        #### Test Blogger ####
        #####################

    def prj_folder(self):
        #####################
        #### Test PrjFolder #
        #######Begin#########
        ## TempDir
        self.tempdir.makedir('ProjectFolder')
        self.tempdir_project_folder = self.tempdir.getpath('ProjectFolder')
class Skeleton_S3CommitStorage_Against_Mock(TestCase):
    __test__ = False

    def setUp(self):
        self.tempdir = TempDirectory()
        self.setup_mock_defaults()
        self.store = S3CommitStorage(self.mock_bucket, self.bucket_prefix)

    def setup_mock_defaults(self):
        self.mock_bucket = Mock(spec=Bucket)
        self.set_bucket_list([])

    def tearDown(self):
        self.tempdir.cleanup()

    def prefix_key(self, key):
        return ''.join([self.bucket_prefix or '', key])

    def set_bucket_list(self, keynames):
        prefixed_keynames = [self.prefix_key(key) for key in keynames]
        key_objs = [Key(None, key) for key in prefixed_keynames]
        self.mock_bucket.list.return_value = key_objs

    def test_get_tags_calls_bucket_list__empty(self):
        self.assertEqual([], self.store.get_tags())
        self.mock_bucket.list.assert_called_with(prefix=self.prefix_key(''))

    def test_get_tags_calls_bucket_list_not_empty(self):
        self.set_bucket_list(['tag1_msg1', 'tag2_msg2'])
        self.store.get_tags()
        self.mock_bucket.list.assert_called_with(prefix=self.prefix_key(''))

    def test_get_tags_parses_keys_properly(self):
        self.set_bucket_list(['tag1_msg1', 'tag2_msg2'])
        self.assertEqual(['tag1', 'tag2'], self.store.get_tags())

    def test_get_message_for_tag_calls_bucket_list(self):
        self.set_bucket_list(['tag1_msg1'])
        self.store.get_message_for_tag('tag1')
        prefix = self.prefix_key(''.join(['tag1', '_']))
        self.mock_bucket.list.assert_called_with(prefix=prefix)

    def test_get_message_for_tag_parses_keyname_properly(self):
        self.set_bucket_list(['tag1_msg1'])
        self.assertEqual('msg1', self.store.get_message_for_tag('tag1'))

    def test_add_commit_calls_new_key_with_expected_format(self):
        filename1 = self.tempdir.write('file1', 'some file contents')
        self.store.add_commit('tag1', open(filename1, 'rb'), 'some_message')
        expected_key_name = self.prefix_key(''.join(['tag1',
            '_', 'some_message']))
        self.mock_bucket.new_key.assert_called_with(expected_key_name)

    def test_add_commit_calls_set_contents_from_filename(self):
        commit_contents = 'some file contents'
        filename1 = self.tempdir.write('file1', commit_contents)
        fp1 = open(filename1, 'rb')
        self.store.add_commit('tag1', fp1, 'some_message')
        new_key_mock = self.mock_bucket.new_key.return_value
        new_key_mock.set_contents_from_file.assert_called_once()
        mock_last_called_kwargs = new_key_mock.\
            set_contents_from_file.call_args[1]
        mock_actual_headers = mock_last_called_kwargs['headers']
        lower_headers = {}
        for k in mock_actual_headers:
            lower_headers[k.lower()] = mock_actual_headers[k]
        expected_headers = {
            'Content-Type': 'application/octet-stream',
            'Content-Encoding': 'gzip',
            'x-amz-meta-fingerprint': md5(commit_contents).hexdigest()}
        for header in expected_headers:
            self.assertIn(header.lower(), lower_headers.keys())
            self.assertEqual(expected_headers[header],
                lower_headers[header.lower()])

    def test_delete_commit_calls_get_key(self):
        self.set_bucket_list(['tag1_msg1'])
        self.store.delete_commit('tag1')
        self.mock_bucket.get_key.assert_called_with(
            self.prefix_key('tag1_msg1'))
        self.mock_bucket.get_key.return_value.delete.assert_called_with()

    def test_get_commit_contents_calls_get_contents_to_filename(self):
        self.set_bucket_list(['tag1_msg1'])
        target_file = self.tempdir.getpath('restored_file')
        self.store.get_commit_contents_to_filename('tag1', target_file)
        get_key = self.mock_bucket.get_key.return_value
        get_key.get_contents_to_filename.assert_called_once()

    def test_if_no_content_encoding_restores_to_target_file(self):
        self.set_bucket_list(['tag1_msg1'])
        target_file = self.tempdir.getpath('restored_file')
        self.store.get_commit_contents_to_filename('tag1', target_file)
        get_key = self.mock_bucket.get_key.return_value
        get_key.get_contents_to_filename.assert_called_with(target_file)

    def test_if_content_encoding_gzip_calls_gunzip_key_to_filename(self):
        self.set_bucket_list(['tag1_msg1'])
        target_file = self.tempdir.getpath('restored_file')
        self.store._gunzip_key_to_filename = Mock()
        # set content-encoding on key
        get_key = self.mock_bucket.get_key.return_value
        get_key.content_encoding = 'gzip'
        self.store.get_commit_contents_to_filename('tag1', target_file)
        get_key.get_contents_to_filename.assert_called_once()
        self.store._gunzip_key_to_filename.assert_called_with(get_key,
            target_file)

    def test_get_commit_contents_raises_Exception_if_file_exists(self):
        self.set_bucket_list(['tag1_msg1'])
        file1 = self.tempdir.write('file1', 'some file contents')

        def will_raise_Exception():
            self.store.get_commit_contents_to_filename('tag1', file1)
        self.assertRaises(FileAlreadyExistsError, will_raise_Exception)

    def test_dictionary_interface_returns_a_commit_object(self):
        self.set_bucket_list(['tag1_msg1'])
        commit = self.store['tag1']
        self.assertEqual('tag1', commit.tag)
        self.assertEqual('msg1', commit.message)

    def test_dictionary_interface_uses_custom_header_to_get_fingerprint(self):
        self.set_bucket_list(['tag1_msg1'])
        self.mock_bucket.get_key.return_value.get_metadata.return_value = '123'
        commit = self.store['tag1']
        self.assertEqual('tag1', commit.tag)
        self.assertEqual('msg1', commit.message)
        self.assertEqual('123', commit.fingerprint)

    def test_dictionary_interface_raises_Exception_if_unknown_tag(self):

        def will_raise_UnknownTagError():
            self.store['tag1']
        self.assertRaises(UnknownTagError, will_raise_UnknownTagError)

    def test_contains_interface_calls_bucket_list_with_prefix(self):
        self.set_bucket_list(['tag1_msg1'])
        'tag1' in self.store
        self.mock_bucket.list.assert_called_with(
            prefix=self.prefix_key('tag1_'))

    def test_contains_interface_returns_true_for_tags_in_bucket(self):
        self.set_bucket_list(['tag1_msg1'])
        self.assertTrue('tag1' in self.store)

    def test_contains_interface_returns_false_for_tags_not_in_bucket(self):
        self.assertFalse('tag2' in self.store)

    def test_raises_Exception_if_prefix_has_leading_slash(self):
        def will_raise_Exception():
            S3CommitStorage(self.mock_bucket, '/illegal_prefix')
        self.assertRaises(Exception, will_raise_Exception)
class BaseSupervisorTestCase(TestCase):
    """
    Base class for running supervisor tests
    """
    maxDiff = None

    def __init__(self, *args, **kwargs):
        super(BaseSupervisorTestCase, self).__init__(*args, **kwargs)
        self.supervisor = None
        self.logstash = None
        # store, as it's also used by supervisorctl
        self._config_file_path = None

    def setUp(self):
        self.scratch = TempDirectory()

    def tearDown(self):
        self.scratch.cleanup()

    def run_supervisor(self, overrides, configuration_string):
        """
        Runs Supervisor
        """
        environment = os.environ.copy()
        environment.update(overrides)

        working_directory = os.path.dirname(__file__)

        template_path = os.path.join(working_directory, 'supervisord.template')
        with open(template_path) as template:
            configuration = template.read()
            configuration += configuration_string
            self.scratch.write('supervisor.conf', configuration, 'utf-8')

        # store, as it's also used by supervisorctl
        self._config_file_path = self.scratch.getpath('supervisor.conf')

        self.supervisor = subprocess.Popen(
            ['supervisord', '-c', self._config_file_path],
            env=environment,
            cwd=os.path.dirname(working_directory),
        )

    def shutdown_supervisor(self):
        """
        Shuts Supervisor down
        """
        self.supervisor.terminate()
        while self.supervisor.poll() is None:
            # need to wait while the process kills off it's children and exits
            # so that it doesn't block the port
            sleep(1)

    def run_supervisorctl(self, args):
        """
        Runs supervisorctl using the test suites config file
        """
        command = [
            'supervisorctl',
            '-c', self._config_file_path,
        ]
        command += args

        return subprocess.call(command)

    def run_logstash(self):
        """
        Runs a socketserver instance emulating Logstash
        """
        self.logstash = socketserver.UDPServer(('0.0.0.0', 0), LogstashHandler)
        threading.Thread(target=self.logstash.serve_forever).start()
        return self.logstash

    def shutdown_logstash(self):
        """
        Shuts the socketserver instance down
        """
        self.logstash.shutdown()
        self.logstash.server_close()

    def messages(self, clear_buffer=False, wait_for=None):
        """
        Returns the contents of the logstash message buffer
        """
        messages = []
        if wait_for is not None:
            while len(messages) < wait_for:
                sleep(0.1)
                messages = self.logstash.RequestHandlerClass.messages[:]
        else:
            messages = self.logstash.RequestHandlerClass.messages[:]

        parsed_messages = list(map(strip_volatile, messages))
        if clear_buffer:
            self.clear_message_buffer()

        return parsed_messages

    def get_message_buffer(self):
        """
        Returns the raw logstash message buffer
        """
        return self.logstash.RequestHandlerClass.messages[:]

    def clear_message_buffer(self):
        """
        Clears the logstash message buffer
        """
        self.logstash.RequestHandlerClass.messages = []
class Skeleton_Repository_Operations_With_SpecificCommitStorage(TestCase):
    __test__ = False  # to prevent nose from running this skeleton

    def setUp(self):
        raise Exception('This is a skeleton for test - you need to provide'
                        ' your own setUp() and tearDown()')

    def setup_tempdir(self):
        # call this from your setUp
        self.tempdir = TempDirectory()
        self.file1_contents = 'some contents'
        self.file2_contents = 'some other contents'
        self.filename1 = self.tempdir.write('file1', self.file1_contents)
        self.filename2 = self.tempdir.write('file2', self.file2_contents)

    def teardown_tempdir(self):
        # call this from your tearDown
        self.tempdir.cleanup()

    def setup_repository(self):
        # call this from your setUp after creating your store
        self.repo = BBRepository(self.store)

    def commit_filename1(self, tag, message=None):
        self.repo.create_commit_from_filename(tag, self.filename1, message)

    def commit_filename2(self, tag, message=None):
        self.repo.create_commit_from_filename(tag, self.filename2, message)

    def test_can_commit_filenames_to_repository(self):
        self.commit_filename1('some-tag')

    def test_commit_tag_characters_are_limited(self):
        def will_raise_Exception():
            self.commit_filename1('illegal tag with spaces')
        self.assertRaises(Exception, will_raise_Exception)

    def test_commit_tag_must_be_non_empty(self):
        def will_raise_Exception():
            self.commit_filename1('')
        self.assertRaises(Exception, will_raise_Exception)

    def test_repo_is_empty_to_start(self):
        self.assertEqual([], [c for c in self.repo])

    def test_can_commit_files_and_list_commits(self):
        self.commit_filename1('some-tag')
        self.assertEqual(['some-tag'], [c.tag for c in self.repo])

    def test_can_commit_and_retrieve_contents(self):
        self.commit_filename1('some-tag')
        commit = self.repo['some-tag']
        restore_file = self.tempdir.getpath('file3')
        commit.get_contents_to_filename(restore_file)
        self.assertEqual(self.file1_contents, open(restore_file, 'rb').read())

    def test_tags_are_unique(self):
        self.commit_filename1('some-tag')

        def will_raise_DuplicateTagError():
            self.repo.create_commit_from_filename('some-tag', self.filename2)
        self.assertRaises(DuplicateTagError, will_raise_DuplicateTagError)

    def test_duplicate_tag_with_identical_contents_okay(self):
        self.commit_filename1('some-tag')
        self.commit_filename1('some-tag')
        commit = self.repo['some-tag']
        restore_file = self.tempdir.getpath('file3')
        commit.get_contents_to_filename(restore_file)
        self.assertEqual(self.file1_contents, open(restore_file, 'rb').read())

    def test_can_get_commit_before_a_given_commit(self):
        self.commit_filename1('a')
        self.commit_filename1('b')
        commit_b = self.repo['b']
        self.assertEqual('a', self.repo.get_commit_before(commit_b).tag)

    def test_commit_before_first_raises_ValueError(self):
        self.commit_filename1('a')

        def will_raise_ValueError():
            self.repo.get_commit_before(self.repo['a'])
        self.assertRaises(ValueError, will_raise_ValueError)

    def test_commits_are_sorted(self):
        self.commit_filename1('c')
        self.commit_filename1('a')
        self.commit_filename1('b')
        self.assertEqual(['a', 'b', 'c'], [c.tag for c in self.repo])

    def test_can_delete_commits_before_a_specified_commit(self):
        self.commit_filename1('a')
        self.commit_filename1('b')
        self.commit_filename1('c')
        self.repo.delete_commits_before(self.repo['c'])
        self.assertEqual(['c'], [c.tag for c in self.repo])

    def test_can_store_and_retrieve_message_with_commit(self):
        message = 'some-extra-data'
        self.commit_filename1('a', message)
        commit = self.repo['a']
        self.assertEqual(message, commit.message)

    def test_message_characters_limited_to_alphanumeric_and_underscore(self):

        def will_raise_Exception():
            self.commit_filename1('a', 'some illegal message')
        self.assertRaises(Exception, will_raise_Exception)

    def test_UTC_iso_datetime_is_a_valid_tag(self):
        self.commit_filename1(datetime.utcnow().isoformat())

    def test_UTC_iso_datetime_is_a_valid_message(self):
        self.commit_filename1('a', datetime.utcnow().isoformat())
        self.commit_filename1(datetime.utcnow().isoformat())

    def test_empty_repo_has_zero_size(self):
        self.assertEqual(0, self.repo.get_repository_size())

    def get_expected_size_from_contents(self, file_contents):
        expected_size = 0
        for item in file_contents:
            expected_size += len(item)
        return expected_size

    def test_can_get_repo_size_one_commit(self):
        self.commit_filename1('a', 'A')
        self.assertEqual(
            self.get_expected_size_from_contents(self.file1_contents),
            self.repo.get_repository_size())

    def test_can_get_repo_size_many_different_commits(self):
        file_contents = []
        self.commit_filename1('a', 'A')
        file_contents.append(self.file1_contents)
        self.commit_filename2('b', 'B')
        file_contents.append(self.file2_contents)
        self.commit_filename1('c', 'C')
        file_contents.append(self.file1_contents)
        self.commit_filename2('d', 'D')
        file_contents.append(self.file2_contents)
        expected_size = self.get_expected_size_from_contents(file_contents)
        self.assertEqual(expected_size,
            self.repo.get_repository_size())

    def test_can_get_repo_size_after_delete(self):
        file_contents = []
        self.commit_filename1('a', 'A')
        file_contents.append(self.file1_contents)
        self.commit_filename2('b', 'B')
        file_contents.append(self.file2_contents)
        self.commit_filename1('c', 'C')
        file_contents.append(self.file1_contents)
        self.commit_filename2('d', 'D')
        file_contents.append(self.file2_contents)
        self.repo.delete_commits_before(self.repo['d'])
        file_contents = file_contents[3:]
        expected_size = \
            self.get_expected_size_from_contents(file_contents)
        self.assertEqual(expected_size,
            self.repo.get_repository_size())

    def test_empty_repo_has_zero_items(self):
        self.assertEqual(0, self.repo.get_number_of_items())

    def test_can_get_number_items_one_commit(self):
        file_contents = []
        self.commit_filename1('a', 'A')
        file_contents.append(self.file1_contents)
        items = len(file_contents)
        self.assertEqual(items, self.repo.get_number_of_items())

    def test_can_get_number_of_items_many_different_commits(self):
        file_contents = []
        self.commit_filename1('a', 'A')
        file_contents.append(self.file1_contents)
        self.commit_filename2('b', 'B')
        file_contents.append(self.file2_contents)
        self.commit_filename1('c', 'C')
        file_contents.append(self.file1_contents)
        self.commit_filename2('d', 'D')
        file_contents.append(self.file2_contents)
        expected_size = len(file_contents)
        self.assertEqual(expected_size,
            self.repo.get_number_of_items())

    def test_can_get_number_of_items_after_delete(self):
        file_contents = []
        self.commit_filename1('a', 'A')
        file_contents.append(self.file1_contents)
        self.commit_filename2('b', 'B')
        file_contents.append(self.file2_contents)
        self.commit_filename1('c', 'C')
        file_contents.append(self.file1_contents)
        self.commit_filename2('d', 'D')
        file_contents.append(self.file2_contents)
        self.repo.delete_commits_before(self.repo['d'])
        file_contents = file_contents[3:]
        expected_size = len(file_contents)
        self.assertEqual(expected_size, self.repo.get_number_of_items())
class BcolzMinuteBarTestCase(TestCase):

    @classmethod
    def setUpClass(cls):
        cls.env = TradingEnvironment()
        all_market_opens = cls.env.open_and_closes.market_open
        indexer = all_market_opens.index.slice_indexer(
            start=TEST_CALENDAR_START,
            end=TEST_CALENDAR_STOP
        )
        cls.market_opens = all_market_opens[indexer]
        cls.test_calendar_start = cls.market_opens.index[0]
        cls.test_calendar_stop = cls.market_opens.index[-1]

    def setUp(self):

        self.dir_ = TempDirectory()
        self.dir_.create()
        self.dest = self.dir_.getpath('minute_bars')
        os.makedirs(self.dest)
        self.writer = BcolzMinuteBarWriter(
            TEST_CALENDAR_START,
            self.dest,
            self.market_opens,
            US_EQUITIES_MINUTES_PER_DAY,
        )
        self.reader = BcolzMinuteBarReader(self.dest)

    def tearDown(self):
        self.dir_.cleanup()

    def test_write_one_ohlcv(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_two_bars(self):
        minute_0 = self.market_opens[self.test_calendar_start]
        minute_1 = minute_0 + timedelta(minutes=1)
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=[minute_0, minute_1])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute_0, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute_0, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute_0, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute_0, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute_0, 'volume')

        self.assertEquals(50.0, volume_price)

        open_price = self.reader.get_value(sid, minute_1, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute_1, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute_1, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute_1, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute_1, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_write_on_second_day(self):
        second_day = self.test_calendar_start + 1
        minute = self.market_opens[second_day]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

    def test_write_empty(self):
        minute = self.market_opens[self.test_calendar_start]
        sid = 1
        data = DataFrame(
            data={
                'open': [0],
                'high': [0],
                'low': [0],
                'close': [0],
                'volume': [0]
            },
            index=[minute])
        self.writer.write(sid, data)

        open_price = self.reader.get_value(sid, minute, 'open')

        assert_almost_equal(nan, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        assert_almost_equal(nan, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        assert_almost_equal(nan, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        assert_almost_equal(nan, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        assert_almost_equal(0, volume_price)

    def test_write_on_multiple_days(self):

        tds = self.market_opens.index
        days = tds[tds.slice_indexer(
            start=self.test_calendar_start + 1,
            end=self.test_calendar_start + 3
        )]
        minutes = DatetimeIndex([
            self.market_opens[days[0]] + timedelta(minutes=60),
            self.market_opens[days[1]] + timedelta(minutes=120),
        ])
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0, 11.0],
                'high': [20.0, 21.0],
                'low': [30.0, 31.0],
                'close': [40.0, 41.0],
                'volume': [50.0, 51.0]
            },
            index=minutes)
        self.writer.write(sid, data)

        minute = minutes[0]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(10.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(20.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(30.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(40.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(50.0, volume_price)

        minute = minutes[1]

        open_price = self.reader.get_value(sid, minute, 'open')

        self.assertEquals(11.0, open_price)

        high_price = self.reader.get_value(sid, minute, 'high')

        self.assertEquals(21.0, high_price)

        low_price = self.reader.get_value(sid, minute, 'low')

        self.assertEquals(31.0, low_price)

        close_price = self.reader.get_value(sid, minute, 'close')

        self.assertEquals(41.0, close_price)

        volume_price = self.reader.get_value(sid, minute, 'volume')

        self.assertEquals(51.0, volume_price)

    def test_no_overwrite(self):
        minute = self.market_opens[TEST_CALENDAR_START]
        sid = 1
        data = DataFrame(
            data={
                'open': [10.0],
                'high': [20.0],
                'low': [30.0],
                'close': [40.0],
                'volume': [50.0]
            },
            index=[minute])
        self.writer.write(sid, data)

        with self.assertRaises(BcolzMinuteOverlappingData):
            self.writer.write(sid, data)
Exemple #43
0
class SendFileResponseTest(TestCase):
    u"""
    Tests ``send_file_response()`` function. Checks that regular files are sent correctly, but
    sending non-regular or non-existent files raises an exception. Also checks that if the request
    has ``HTTP_IF_MODIFIED_SINCE`` header, the file is sent only if it was changes since then.
    Finally checks if ``Last-Modified``, ``Content-Disposition`` and ``Content-Length`` headers are
    set correctly.
    """
    def file_view(request):
        path = request.GET[u'path']
        name = request.GET[u'name']
        content_type = request.GET[u'content-type']
        return send_file_response(request, path, name, content_type)

    urls = patterns(
        u'',
        url(r'^file/$', file_view),
    )

    def setUp(self):
        self.tempdir = TempDirectory()

    def tearDown(self):
        self.tempdir.cleanup()

    def _create_file(self, filename=u'myfile.tmp', content=u'Some text.'):
        self.tempdir.write(filename, content)
        return self.tempdir.getpath(filename)

    def _request_file(self,
                      path,
                      name=u'filename.bin',
                      content_type=u'text/plain',
                      **kwargs):
        params = urlencode({
            u'path': path,
            u'name': name,
            u'content-type': content_type
        })
        return self.client.get(u'/file/?%s' % params, **kwargs)

    def _check_response(self, response, klass, status_code):
        self.assertIs(type(response), klass)
        self.assertEqual(response.status_code, status_code)

    def _check_content(self, response, path):
        with open(path, 'rb') as f:
            content = f.read()
        self.assertEqual(u''.join(response.streaming_content), content)

    def test_regular_file(self):
        path = self._create_file()
        response = self._request_file(path)
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_directory_raises_exception(self):
        with self.assertRaisesMessage(OSError, u'Not a regular file: /'):
            response = self._request_file(u'/')

    def test_nonexistent_file_raises_exception(self):
        with self.assertRaisesMessage(
                OSError,
                u"[Errno 2] No such file or directory: '/nonexistent.txt'"):
            response = self._request_file(u'/nonexistent.txt')

    def test_random_file(self):
        content = random_string(random.randrange(1000, 2000))
        path = self._create_file(content=content)
        response = self._request_file(path)
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_if_modified_since_with_modified_file(self):
        u"""
        Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was indeed
        modified since then, the file is sent.
        """
        modified_timestamp = 1413500000
        if_modified_since_timestamp = modified_timestamp + 1000000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(
            path,
            HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp))
        self._check_response(response, HttpResponseNotModified, 304)

    def test_if_modified_since_with_unmodified_file(self):
        u"""
        Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was NOT
        modified since then, the file is not sent.
        """
        modified_timestamp = 1413500000
        if_modified_since_timestamp = modified_timestamp - 1000000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(
            path,
            HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp))
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_last_modified_response_header(self):
        modified_timestamp = 1413500000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(path)
        self.assertEqual(response[u'Last-Modified'],
                         u'Thu, 16 Oct 2014 22:53:20 GMT')

    def test_content_length_header(self):
        path = self._create_file(content=u'1234567890')
        response = self._request_file(path)
        self.assertEqual(response[u'Content-Length'], u'10')

    def test_content_length_header_for_random_file(self):
        content = random_string(random.randrange(1000, 2000))
        path = self._create_file(content=content)
        response = self._request_file(path)
        self.assertEqual(response[u'Content-Length'], str(len(content)))

    def test_content_disposition_header(self):
        path = self._create_file()
        response = self._request_file(path, u'thefile.txt')
        self.assertEqual(response[u'Content-Disposition'],
                         u"attachment; filename*=UTF-8''thefile.txt")

    def test_content_disposition_header_with_space(self):
        path = self._create_file()
        response = self._request_file(path, u'the file.txt')
        self.assertEqual(response[u'Content-Disposition'],
                         u"attachment; filename*=UTF-8''the%20file.txt")

    def test_content_disposition_header_with_diacritic(self):
        path = self._create_file()
        response = self._request_file(path, u'ľťéŠÝÄÚ.txt')
        self.assertEqual(
            response[u'Content-Disposition'],
            u"attachment; filename*=UTF-8''%C4%BE%C5%A5%C3%A9%C5%A0%C3%9D%C3%84%C3%9A.txt"
        )

    def test_content_disposition_header_with_random_unicode_junk(self):
        path = self._create_file()
        name = random_string(
            20,
            chars=
            u'BacòԉíρsûϻᏧolrѕìtãmeéӽѵ߀ɭpèлuin.Iüà,ɦëǥhƅɢïêgԁSùúâɑfäbƃdkϳɰյƙyáFХ-åɋw'
        )
        response = self._request_file(path, name)
        self.assertEqual(response[u'Content-Disposition'],
                         u"attachment; filename*=UTF-8''%s" % urlquote(name))
Exemple #44
0
class Test_archivepgsql_BasicCommandLineOperation(TestCase):
    ARCHIVEPGSQL_PATH = os.path.join('bbpgsql', 'cmdline_scripts')
    CONFIG_FILE = 'config.ini'
    exe_script = 'archivewal'

    def setUp(self):
        self.setup_environment()
        self.setup_config()
        self.cmd = [self.exe_script, '--dry-run', '--config', self.config_path]

    def setup_environment(self):
        self.env = deepcopy(os.environ)
        self.env['PATH'] = ''.join([
            self.env['PATH'],
            ':',
            self.ARCHIVEPGSQL_PATH])
        self.tempdir = TempDirectory()

    def setup_config(self):
        self.storage_path = self.tempdir.makedir('repo')
        self.config_path = self.tempdir.getpath(self.CONFIG_FILE)
        self.log_file = self.tempdir.getpath('bbpgsql.log')
        self.config_dict = {
            'WAL':  {
                'driver': 'filesystem',
                'path': self.storage_path,
            },
            'General': {
                'pgsql_data_directory': self.tempdir.path,
            },
            'Logging': {
                'logfile': self.log_file,
                'level': 'DEBUG',
            },
        }
        write_config_to_filename(self.config_dict, self.config_path)
        #print '----'
        #print open(self.config_path, 'rb').read()
        #print '----'
        self.pg_xlog_path = 'pg_xlog'
        self.tempdir.makedir(self.pg_xlog_path)
        self.wal_basename = '00001'
        self.wal_filename = os.path.join(self.pg_xlog_path, self.wal_basename)
        self.tempdir.write(self.wal_filename, '')
        print 'TEMPDIR', self.tempdir.listdir(recursive=True)

    def tearDown(self):
        self.tempdir.cleanup()

    def test_archivewal_returns_error_with_if_less_than_one_argument(self):
        proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT)
        proc.wait()
        print(proc.stdout.read())
        self.assertNotEqual(0, proc.returncode)

    def test_archivewal_logs_error_with_if_less_than_one_argument(self):
        proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT)
        proc.wait()
        self.assertNotEqual(0, proc.returncode)
        print proc.stdout.read()
        log_output = open(self.log_file, 'rb').read()
        print 'log_output:'
        print log_output
        assert 'ERROR' in log_output

    def test_archivewal_success_with_file(self):
        self.cmd.append(self.wal_filename)
        proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT)
        proc.wait()
        print proc.stdout.read()
        self.assertEqual(0, proc.returncode)

    def test_archivewal_actually_archives_file(self):
        self.cmd.append(self.wal_filename)
        proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT)
        proc.wait()
        print proc.stdout.read()
        self.assertEqual(0, proc.returncode)
        archives = os.listdir(self.storage_path)
        print archives
        self.assertTrue(archives[0].startswith(''.join([
            self.wal_basename, '_'])))
class SendFileResponseTest(TestCase):
    u"""
    Tests ``send_file_response()`` function. Checks that regular files are sent correctly, but
    sending non-regular or non-existent files raises an exception. Also checks that if the request
    has ``HTTP_IF_MODIFIED_SINCE`` header, the file is sent only if it was changes since then.
    Finally checks if ``Last-Modified``, ``Content-Disposition`` and ``Content-Length`` headers are
    set correctly.
    """

    def file_view(request):
        path = request.GET[u'path']
        name = request.GET[u'name']
        content_type = request.GET[u'content-type']
        return send_file_response(request, path, name, content_type)

    urls = patterns(u'',
        url(r'^file/$', file_view),
        )

    def setUp(self):
        self.tempdir = TempDirectory()

    def tearDown(self):
        self.tempdir.cleanup()


    def _create_file(self, filename=u'myfile.tmp', content=u'Some text.'):
        self.tempdir.write(filename, content)
        return self.tempdir.getpath(filename)

    def _request_file(self, path, name=u'filename.bin', content_type=u'text/plain', **kwargs):
        params = urlencode({u'path': path, u'name': name, u'content-type': content_type})
        return self.client.get(u'/file/?%s' % params, **kwargs)

    def _check_response(self, response, klass, status_code):
        self.assertIs(type(response), klass)
        self.assertEqual(response.status_code, status_code)

    def _check_content(self, response, path):
        with open(path, 'rb') as f:
            content = f.read()
        self.assertEqual(u''.join(response.streaming_content), content)


    def test_regular_file(self):
        path = self._create_file()
        response = self._request_file(path)
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_directory_raises_exception(self):
        with self.assertRaisesMessage(OSError, u'Not a regular file: /'):
            response = self._request_file(u'/')

    def test_nonexistent_file_raises_exception(self):
        with self.assertRaisesMessage(OSError, u"[Errno 2] No such file or directory: '/nonexistent.txt'"):
            response = self._request_file(u'/nonexistent.txt')

    def test_random_file(self):
        content = random_string(random.randrange(1000, 2000))
        path = self._create_file(content=content)
        response = self._request_file(path)
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_if_modified_since_with_modified_file(self):
        u"""
        Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was indeed
        modified since then, the file is sent.
        """
        modified_timestamp = 1413500000
        if_modified_since_timestamp = modified_timestamp + 1000000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp))
        self._check_response(response, HttpResponseNotModified, 304)

    def test_if_modified_since_with_unmodified_file(self):
        u"""
        Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was NOT
        modified since then, the file is not sent.
        """
        modified_timestamp = 1413500000
        if_modified_since_timestamp = modified_timestamp - 1000000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp))
        self._check_response(response, FileResponse, 200)
        self._check_content(response, path)

    def test_last_modified_response_header(self):
        modified_timestamp = 1413500000

        path = self._create_file()
        os.utime(path, (modified_timestamp, modified_timestamp))
        response = self._request_file(path)
        self.assertEqual(response[u'Last-Modified'], u'Thu, 16 Oct 2014 22:53:20 GMT')

    def test_content_length_header(self):
        path = self._create_file(content=u'1234567890')
        response = self._request_file(path)
        self.assertEqual(response[u'Content-Length'], u'10')

    def test_content_length_header_for_random_file(self):
        content = random_string(random.randrange(1000, 2000))
        path = self._create_file(content=content)
        response = self._request_file(path)
        self.assertEqual(response[u'Content-Length'], str(len(content)))

    def test_content_disposition_header(self):
        path = self._create_file()
        response = self._request_file(path, u'thefile.txt')
        self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''thefile.txt")

    def test_content_disposition_header_with_space(self):
        path = self._create_file()
        response = self._request_file(path, u'the file.txt')
        self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''the%20file.txt")

    def test_content_disposition_header_with_diacritic(self):
        path = self._create_file()
        response = self._request_file(path, u'ľťéŠÝÄÚ.txt')
        self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%C4%BE%C5%A5%C3%A9%C5%A0%C3%9D%C3%84%C3%9A.txt")

    def test_content_disposition_header_with_random_unicode_junk(self):
        path = self._create_file()
        name = random_string(20, chars=u'BacòԉíρsûϻᏧolrѕìtãmeéӽѵ߀ɭpèлuin.Iüà,ɦëǥhƅɢïêgԁSùúâɑfäbƃdkϳɰյƙyáFХ-åɋw')
        response = self._request_file(path, name)
        self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%s" % urlquote(name))