class GitHelper(object): repo = 'local/' def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) def git(self, command, repo=None): repo_path = self.dir.getpath(repo or self.repo) try: return check_output(['git'] + command.split(), cwd=repo_path, stderr=STDOUT) except CalledProcessError as e: self.fail(e.output) def git_rev_parse(self, label, repo=None): return self.git('rev-parse --verify -q --short ' + label, repo).strip() def check_tags(self, expected, repo=None): actual = {} for tag in self.git('tag', repo).split(): actual[tag] = self.git_rev_parse(tag, repo) compare(expected, actual=actual) def make_repo_with_content(self, repo): if not os.path.exists(self.dir.getpath(repo)): self.dir.makedir(repo) self.git('init', repo) self.dir.write(repo + 'a', 'some content') self.dir.write(repo + 'b', 'other content') self.dir.write(repo + 'c', 'more content') self.git('add .', repo) self.git('commit -m initial', repo)
class TestPrepareTarget(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) replace = Replacer() replace('workfront.generate.TARGET_ROOT', self.dir.path) self.addCleanup(replace.restore) self.session = Session('test') def test_from_scratch(self): path = prepare_target(self.session) compare(path, expected=self.dir.getpath('unsupported.py')) self.dir.compare(expected=[]) def test_everything(self): self.dir.write('unsupported.py', b'yy') path = prepare_target(self.session) compare(path, expected=self.dir.getpath('unsupported.py')) self.dir.compare(expected=['unsupported.py']) compare(self.dir.read('unsupported.py'), b"yy") def test_dots_in_version(self): path = prepare_target(Session('test', api_version='v4.0')) compare(path, expected=self.dir.getpath('v40.py')) self.dir.compare(expected=[])
class TestPrepareTarget(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) replace = Replacer() replace('workfront.generate.TARGET_ROOT', self.dir.path) self.addCleanup(replace.restore) self.session = Session('test') def test_from_scratch(self): path = prepare_target(self.session) compare(path, expected=self.dir.getpath('unsupported.py')) self.dir.compare(expected=[]) def test_everything(self): self.dir.write('unsupported.py', b'yy') path = prepare_target(self.session) compare(path, expected=self.dir.getpath('unsupported.py')) self.dir.compare(expected=['unsupported.py']) compare(self.dir.read('unsupported.py'), b"yy") def test_dots_in_version(self): path = prepare_target(Session('test', api_version='v4.0')) compare(path, expected=self.dir.getpath('v40.py')) self.dir.compare(expected=[])
class GitHelper(object): repo = 'local/' def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) def git(self, command, repo=None): repo_path = self.dir.getpath(repo or self.repo) try: return check_output(['git'] + command.split(), cwd=repo_path, stderr=STDOUT) except CalledProcessError as e: self.fail(e.output) def git_rev_parse(self, label, repo=None): return self.git('rev-parse --verify -q --short '+label, repo).strip() def check_tags(self, expected, repo=None): actual = {} for tag in self.git('tag', repo).split(): actual[tag] = self.git_rev_parse(tag, repo) compare(expected, actual=actual) def make_repo_with_content(self, repo): if not os.path.exists(self.dir.getpath(repo)): self.dir.makedir(repo) self.git('init', repo) self.dir.write(repo + 'a', 'some content') self.dir.write(repo + 'b', 'other content') self.dir.write(repo + 'c', 'more content') self.git('add .', repo) self.git('commit -m initial', repo)
class HomeDirTest(unittest.TestCase): def setUp(self): self.temp_dir = TempDirectory(create=True) self.home = PathHomeDir(self.temp_dir.path) def tearDown(self): self.temp_dir.cleanup() def test_read(self): self.temp_dir.write("filename", "contents") self.assertEquals(self.home.read("filename"), "contents") def test_write(self): self.temp_dir.write("existing_file", "existing_contents") self.home.write("new_file", "contents") self.home.write("existing_file", "new_contents") self.assertEquals(self.temp_dir.read("existing_file"), "new_contents") self.assertEquals(self.temp_dir.read("new_file"), "contents") def test_config_file(self): with collect_outputs() as outputs: self.home.write_config_file("new config") self.temp_dir.check(".cosmosrc") self.assertEquals(self.home.read_config_file(), "new config") self.assertIn("Settings saved", outputs.stdout.getvalue()) file_mode = os.stat(self.temp_dir.getpath(".cosmosrc")).st_mode self.assertEquals(file_mode, stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR) def test_override_config_file(self): with collect_outputs(): other_config = self.temp_dir.write("path/other", "config") self.assertEquals( self.home.read_config_file(filename_override=other_config), "config") def test_warn_on_unprotected_config_file(self): with collect_outputs() as outputs: self.home.write_config_file("new config") config_path = self.temp_dir.getpath(".cosmosrc") os.chmod(config_path, 0777) self.home.read_config_file() assertFunc = (self.assertNotIn if os.name == 'nt' else self.assertIn) assertFunc("WARNING", outputs.stderr.getvalue()) def test_last_cluster(self): self.home.write_last_cluster("0000000") self.temp_dir.check(".cosmoslast") self.assertEquals(self.home.read_last_cluster(), "0000000")
def test_allergy_combine_transform(dir: TempDirectory, mocker: MockerFixture) -> None: """ Tests the allergy combine transform """ with open("tests/resources/allergys.csv") as f_csv, open(dir.getpath("allergys.json"), "w") as f_json: allergyToJson = AllergyToJson(combine=True) allergyToJson.csv_to_json(f_csv, f_json) with open(dir.getpath("allergys.json")) as f_json: lines = f_json.readlines() assert len(lines) == 1 _json = json.loads(lines[0]) assert _json["allergys"][0]["onset"] == "20180724" assert _json["allergys"][1]["onset"] == "20110101"
def test_problem_combine_transform(dir: TempDirectory, mocker: MockerFixture) -> None: """ Tests the problem combine transform """ with open("tests/resources/problems.csv") as f_csv, open(dir.getpath("problems.json"), "w") as f_json: problemToJson = ProblemToJson(combine=True) problemToJson.csv_to_json(f_csv, f_json) with open(dir.getpath("problems.json")) as f_json: lines = f_json.readlines() assert len(lines) == 1 _json = json.loads(lines[0]) assert _json["problems"][0]["annotated_display"] == "This is the annotated display." assert _json["problems"][1]["annotated_display"] == "This is another annotated display."
class HomeDirTest(unittest.TestCase): def setUp(self): self.temp_dir = TempDirectory(create=True) self.home = PathHomeDir(self.temp_dir.path) def tearDown(self): self.temp_dir.cleanup() def test_read(self): self.temp_dir.write("filename", "contents") self.assertEquals(self.home.read("filename"), "contents") def test_write(self): self.temp_dir.write("existing_file", "existing_contents") self.home.write("new_file", "contents") self.home.write("existing_file", "new_contents") self.assertEquals(self.temp_dir.read("existing_file"), "new_contents") self.assertEquals(self.temp_dir.read("new_file"), "contents") def test_config_file(self): with collect_outputs() as outputs: self.home.write_config_file("new config") self.temp_dir.check(".cosmosrc") self.assertEquals(self.home.read_config_file(), "new config") self.assertIn("Settings saved", outputs.stdout.getvalue()) file_mode = os.stat(self.temp_dir.getpath(".cosmosrc")).st_mode self.assertEquals(file_mode, stat.S_IFREG | stat.S_IRUSR | stat.S_IWUSR) def test_override_config_file(self): with collect_outputs(): other_config = self.temp_dir.write("path/other", "config") self.assertEquals( self.home.read_config_file(filename_override=other_config), "config") def test_warn_on_unprotected_config_file(self): with collect_outputs() as outputs: self.home.write_config_file("new config") config_path = self.temp_dir.getpath(".cosmosrc") os.chmod(config_path, 0777) self.home.read_config_file() assertFunc = (self.assertNotIn if os.name=='nt' else self.assertIn) assertFunc("WARNING", outputs.stderr.getvalue()) def test_last_cluster(self): self.home.write_last_cluster("0000000") self.temp_dir.check(".cosmoslast") self.assertEquals(self.home.read_last_cluster(), "0000000")
class TestPathSource(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) def test_abc(self): self.assertTrue(issubclass(Plugin, Source)) def test_schema_ok(self): p1 = self.dir.write('foo', b'f') p2 = self.dir.write('bar', b'b') compare( dict(type='paths', values=[p1, p2], repo='config'), Plugin.schema( dict(type='paths', values=[p1, p2], repo='config') )) def test_schema_wrong_type(self): text = "not a valid value for dictionary value @ data['type']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='bar', values=['/'])) def test_schema_extra_keys(self): with ShouldFailSchemaWith("extra keys not allowed @ data['foo']"): Plugin.schema(dict(type='paths', foo='bar')) def test_name_supplied(self): text = "not a valid value for dictionary value @ data['name']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='paths', name='foo')) def test_no_paths(self): text = "length of value must be at least 1 for dictionary value " \ "@ data['values']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='paths', values=[])) def test_path_not_string(self): text = "invalid list value @ data['values'][0]" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='paths', values=[1])) def test_path_not_starting_with_slash(self): text = "invalid list value @ data['values'][0]" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='paths', values=['foo'])) def test_path_not_there(self): text = "invalid list value @ data['values'][0]" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='paths', values=[self.dir.getpath('bad')])) def test_interface(self): plugin = Plugin('source', name=None, repo='config', values=['/foo/bar']) compare(plugin.type, 'source') compare(plugin.name, None) compare(plugin.repo, 'config') compare(plugin.source_paths, ['/foo/bar'])
def setUpModule(): """ """ import os d = TempDirectory() if os.getenv('SQLALCHEMY_URL'): settings['sqlalchemy.url'] = os.getenv('SQLALCHEMY_URL') else: sqlalchemy_url = "sqlite:///%(here)s/sns.sqlite" % dict(here=d.path) settings['sqlalchemy.url'] = sqlalchemy_url from alembic.config import Config from alembic import command alembic_cfg = Config("alembic.ini") alembic_cfg.set_main_option('sqlalchemy.url', sqlalchemy_url) command.upgrade(alembic_cfg, "head") if os.getenv('USE_REDIS'): settings['cache.registration.backend'] = 'dogpile.cache.redis' settings['pyramid.includes'].append('pyramid_redis_sessions') else: from sns.testing import FakeLock settings['cache.registration.backend'] = "dogpile.cache.dbm" settings['cache.registration.arguments.filename'] = d.getpath( 'registration.dbm') settings['cache.registration.arguments.lock_factory'] = FakeLock settings['pyramid.includes'].append('sns.signed_cookie_session')
class MultiFileMultiRegexTestCase(unittest.TestCase): def setUp(self): self.d1 = TempDirectory() self.d1_path = self.d1.getpath('') self.d1.write('foo.txt', 'some nonsense 1') self.d1.write('bar.txt', 'some nonsense 1') self.d1.write('baz.txt', 'some nonsense 2') self.all_files = [self.d1_path + i for i in os.listdir(self.d1_path)] def test_2_files_match(self): mfmr = MultiFileMultiRegex(self.all_files, ['some nonsense 1']) match_groups = mfmr.find_all() self.assertItemsEqual( match_groups.keys(), [self.d1_path + i for i in ['foo.txt', 'bar.txt']]) self.assertItemsEqual(match_groups.values(), [{ 'some nonsense 1': ['some nonsense 1'] }, { 'some nonsense 1': ['some nonsense 1'] }]) def test_all_files_match(self): mfmr = MultiFileMultiRegex(self.all_files, ['some nonsense \d']) match_groups = mfmr.find_all() self.assertItemsEqual(match_groups.keys(), self.all_files) self.assertItemsEqual(match_groups.values(), [{ 'some nonsense \d': ['some nonsense 1'] }, { 'some nonsense \d': ['some nonsense 1'] }, { 'some nonsense \d': ['some nonsense 2'] }])
class Test_SnapshotArchive_Repository(TestCase): def setUp(self): store = MemoryCommitStorage() self.repo = BBRepository(store) self.tempdir = TempDirectory() self.setup_archive_a_snapshot() def setup_archive_a_snapshot(self): archive_name = 'somearchive.tgz' self.archive_contents = '123' self.archive_path = self.tempdir.write(archive_name, self.archive_contents) self.tag = generate_tag() self.first_WAL = '01234' self.last_WAL = '45678' commit_snapshot_to_repository(self.repo, self.archive_path, self.tag, self.first_WAL, self.last_WAL) def tearDown(self): self.tempdir.cleanup() def test_can_retrieve_snapshot_contents_with_tag(self): commit = [i for i in self.repo][-1] restore_path = self.tempdir.getpath('restorearchive.tgz') commit.get_contents_to_filename(restore_path) self.assertEqual(self.archive_contents, open(restore_path, 'rb').read()) def test_get_first_WAL_file_for_archived_snapshot_with_tag(self): self.assertEqual(self.first_WAL, get_first_WAL(self.repo, self.tag)) def test_get_last_WAL_file_for_archived_snapshot_with_tag(self): self.assertEqual(self.last_WAL, get_last_WAL(self.repo, self.tag))
def test_with_user(self, repo: Repo, tmpdir: TempDirectory): repo.commit_content('a') git = Git.clone(repo.path, tmpdir.getpath('clone'), User(name='Foo Bar', email='*****@*****.**')) config = (git.path / '.git' / 'config').read_text() assert 'name = Foo Bar' in config assert 'email = [email protected]' in config
class BaseTestCase(unittest.TestCase): """Checkout unittest.""" def setUp(self): self.d1 = TempDirectory() self.first_file_name = b'foo.py' self.parse_file_1_name = b'bar.py' self.d1.write(self.first_file_name, self.parse_file_1_name) self.first_file_path = self.d1.getpath(self.first_file_name) self.d1.write(self.parse_file_1_name, b'Some nonsense') self.parse_file_1_path = self.d1.getpath(self.parse_file_1_name) def tearDown(self): TempDirectory.cleanup_all()
def test_repo(self, repo: Repo, tmpdir: TempDirectory): repo.commit_content('a') source = Git(repo.path) git = Git.clone(source, tmpdir.getpath('clone')) commit, = git('log', '--format=%h').split() compare(git('show', '--pretty=format:%s', '--stat', commit), expected=('a commit\n' ' a | 1 +\n' ' 1 file changed, 1 insertion(+)\n'))
def test_problem_transform(dir: TempDirectory, mocker: MockerFixture) -> None: """ Tests the problem transform """ with open("tests/resources/problem.csv") as f_csv, open(dir.getpath("problem.json"), "w") as f_json: problemToJson = ProblemToJson() spy = mocker.spy(problemToJson, 'transform') problemToJson.csv_to_json(f_csv, f_json) assert spy.spy_return assert spy.spy_return["patient"]["birth_date"] == "19500701143000" assert len(spy.spy_return["problems"]) == 1 assert spy.spy_return["problems"][0]["annotated_display"] == "This is the annotated display." with open(dir.getpath("problem.json")) as f_json: lines = f_json.readlines() assert len(lines) == 1 _json = json.loads(lines[0]) assert _json["patient"]["birth_date"] == "19500701143000" assert _json["problems"][0]["annotated_display"] == "This is the annotated display."
def test_allergy_transform(dir: TempDirectory, mocker: MockerFixture) -> None: """ Tests the allergy transform """ with open("tests/resources/allergy.csv") as f_csv, open(dir.getpath("allergy.json"), "w") as f_json: allergyToJson = AllergyToJson() spy = mocker.spy(allergyToJson, 'transform') allergyToJson.csv_to_json(f_csv, f_json) assert spy.spy_return assert spy.spy_return["patient"]["birth_date"] == "19500701143000" assert len(spy.spy_return["allergys"]) == 1 assert spy.spy_return["allergys"][0]["onset"] == "20180724" with open(dir.getpath("allergy.json")) as f_json: lines = f_json.readlines() assert len(lines) == 1 _json = json.loads(lines[0]) assert _json["patient"]["birth_date"] == "19500701143000" assert _json["allergys"][0]["onset"] == "20180724"
class RealParserWalkerTestCase(BaseTestCase): """Test when first_file and parse files are not in the same directory.""" def setUp(self): super(RealParserWalkerTestCase, self).setUp() self.d2 = TempDirectory() self.dirname = self.d2.getpath('') self.parse_file_2_name = b'baz.py' self.d2.write(self.parse_file_1_name, self.parse_file_2_name) self.d2.write(self.parse_file_2_name, b'Some nonsense') self.parse_file_1_path = self.d2.getpath(self.parse_file_1_name) self.parse_file_2_path = self.d2.getpath(self.parse_file_2_name) self.pw = ParserWalker(self.first_file_path, self.dirname) def test_dif(self): """Test second condition. Buy one get one free fruit tea.""" expected = self.pw.parse_files() self.assertItemsEqual(list(expected), [ self.first_file_path, self.parse_file_1_path, self.parse_file_2_path ])
def test_minimal(self, repo: Repo, tmpdir: TempDirectory): hash = repo.commit_content('a') git = Git.clone(repo.path, tmpdir.getpath('clone')) commit, = git('log', '--format=%h').split() compare(hash, expected=commit) compare(git.git('show', '--pretty=format:%s', '--stat', commit), expected=('a commit\n' ' a | 1 +\n' ' 1 file changed, 1 insertion(+)\n')) compare(git('remote', '-v').split(), expected=[ 'origin', str(repo.path), '(fetch)', 'origin', str(repo.path), '(push)' ])
class TestHelpers(object): def setUp(self): self.dir = TempDirectory() self.missing = self.dir.getpath('missing') self.path = search_path() def tearDown(self): self.dir.cleanup() def run_main(self, args=(), output='', return_code=0): # so we don't leave log handlers lying around... # ...level is so that we check the log level is correctly set # in setup_logging with LogCapture(level=100): with Replacer() as r: # set argv argv = ['x'] + args r.replace('sys.argv', argv) r.replace('picky.main.datetime', test_datetime(2001, 1, 2, 3, 4, 5)) # set PATH env variable r.replace('os.environ.PATH', self.path) # change to tempdir cwd = os.getcwd() try: os.chdir(self.dir.path) # get the exit code with ShouldRaise(SystemExit) as s: # capture output with OutputCapture() as actual: main() finally: os.chdir(cwd) # compare output, with timestamp subbed out captured = re.sub('[\d\- :]{19}', '(ts)', actual.captured) compare(output, captured) # compare return code compare(return_code, s.raised.code)
class DatabaseHandlerTests(TestCase): def setUp(self): self.dir = TempDirectory() self.db_path = self.dir.getpath('test.db') self.conn = sqlite3.connect(self.db_path) self.conn.execute('create table notes ' '(filename varchar, text varchar)') self.conn.commit() self.log = LogCapture() def tearDown(self): self.log.uninstall() self.dir.cleanup() def test_normal(self): with DatabaseHandler(self.db_path) as handler: handler.conn.execute('insert into notes values (?, ?)', ('test.txt', 'a note')) handler.conn.commit() # check the row was inserted and committed curs = self.conn.cursor() curs.execute('select * from notes') self.assertEqual(curs.fetchall(), [('test.txt', 'a note')]) # check there was no logging self.log.check() def test_exception(self): with ShouldRaise(Exception('foo')): with DatabaseHandler(self.db_path) as handler: handler.conn.execute('insert into notes values (?, ?)', ('test.txt', 'a note')) raise Exception('foo') # check the row not inserted and the transaction was rolled back curs = handler.conn.cursor() curs.execute('select * from notes') self.assertEqual(curs.fetchall(), []) # check the error was logged self.log.check(('root', 'ERROR', 'Something went wrong'))
def setUpModule(): """ """ import os d = TempDirectory() if os.getenv('SQLALCHEMY_URL'): settings['sqlalchemy.url'] = os.getenv('SQLALCHEMY_URL') else: sqlalchemy_url = "sqlite:///%(here)s/sns.sqlite" % dict(here=d.path) settings['sqlalchemy.url'] = sqlalchemy_url from alembic.config import Config from alembic import command alembic_cfg = Config("alembic.ini") alembic_cfg.set_main_option('sqlalchemy.url', sqlalchemy_url) command.upgrade(alembic_cfg, "head") if os.getenv('USE_REDIS'): settings['cache.registration.backend'] = 'dogpile.cache.redis' settings['pyramid.includes'].append('pyramid_redis_sessions') else: from sns.testing import FakeLock settings['cache.registration.backend'] = "dogpile.cache.dbm" settings['cache.registration.arguments.filename'] = d.getpath('registration.dbm') settings['cache.registration.arguments.lock_factory'] = FakeLock settings['pyramid.includes'].append('sns.signed_cookie_session')
class DatabaseHandlerTests(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) self.db_path = self.dir.getpath('test.db') self.conn = sqlite3.connect(self.db_path) self.conn.execute('create table notes ' '(filename varchar, text varchar)') self.conn.commit() self.log = LogCapture() self.addCleanup(self.log.uninstall) def test_normal(self): with DatabaseHandler(self.db_path) as handler: handler.conn.execute('insert into notes values (?, ?)', ('test.txt', 'a note')) handler.conn.commit() # check the row was inserted and committed curs = self.conn.cursor() curs.execute('select * from notes') self.assertEqual(curs.fetchall(), [('test.txt', 'a note')]) # check there was no logging self.log.check() def test_exception(self): with ShouldRaise(Exception('foo')): with DatabaseHandler(self.db_path) as handler: handler.conn.execute('insert into notes values (?, ?)', ('test.txt', 'a note')) raise Exception('foo') # check the row not inserted and the transaction was rolled back curs = handler.conn.cursor() curs.execute('select * from notes') self.assertEqual(curs.fetchall(), []) # check the error was logged self.log.check(('root', 'ERROR', 'Something went wrong'))
class BcolzDailyBarTestCase(TestCase): @classmethod def setUpClass(cls): all_trading_days = TradingEnvironment().trading_days cls.trading_days = all_trading_days[all_trading_days.get_loc( TEST_CALENDAR_START):all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1] def setUp(self): self.asset_info = EQUITY_INFO self.writer = SyntheticDailyBarWriter( self.asset_info, self.trading_days, ) self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('daily_equity_pricing.bcolz') def tearDown(self): self.dir_.cleanup() @property def assets(self): return self.asset_info.index def trading_days_between(self, start, end): return self.trading_days[self.trading_days.slice_indexer(start, end)] def asset_start(self, asset_id): return self.writer.asset_start(asset_id) def asset_end(self, asset_id): return self.writer.asset_end(asset_id) def dates_for_asset(self, asset_id): start, end = self.asset_start(asset_id), self.asset_end(asset_id) return self.trading_days_between(start, end) def test_write_ohlcv_content(self): result = self.writer.write(self.dest, self.trading_days, self.assets) for column in SyntheticDailyBarWriter.OHLCV: idx = 0 data = result[column][:] multiplier = 1 if column == 'volume' else 1000 for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual( SyntheticDailyBarWriter.expected_value( asset_id, date, column) * multiplier, data[idx], ) idx += 1 self.assertEqual(idx, len(data)) def test_write_day_and_id(self): result = self.writer.write(self.dest, self.trading_days, self.assets) idx = 0 ids = result['id'] days = result['day'] for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual(ids[idx], asset_id) self.assertEqual(date, seconds_to_timestamp(days[idx])) idx += 1 def test_write_attrs(self): result = self.writer.write(self.dest, self.trading_days, self.assets) expected_first_row = { '1': 0, '2': 5, # Asset 1 has 5 trading days. '3': 12, # Asset 2 has 7 trading days. '4': 33, # Asset 3 has 21 trading days. '5': 44, # Asset 4 has 11 trading days. '6': 49, # Asset 5 has 5 trading days. } expected_last_row = { '1': 4, '2': 11, '3': 32, '4': 43, '5': 48, '6': 57, # Asset 6 has 9 trading days. } expected_calendar_offset = { '1': 0, # Starts on 6-01, 1st trading day of month. '2': 15, # Starts on 6-22, 16th trading day of month. '3': 1, # Starts on 6-02, 2nd trading day of month. '4': 0, # Starts on 6-01, 1st trading day of month. '5': 9, # Starts on 6-12, 10th trading day of month. '6': 10, # Starts on 6-15, 11th trading day of month. } self.assertEqual(result.attrs['first_row'], expected_first_row) self.assertEqual(result.attrs['last_row'], expected_last_row) self.assertEqual( result.attrs['calendar_offset'], expected_calendar_offset, ) assert_index_equal( self.trading_days, DatetimeIndex(result.attrs['calendar'], tz='UTC'), ) def _check_read_results(self, columns, assets, start_date, end_date): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) results = reader.load_raw_arrays(columns, start_date, end_date, assets) dates = self.trading_days_between(start_date, end_date) for column, result in zip(columns, results): assert_array_equal( result, self.writer.expected_values_2d( dates, assets, column.name, )) @parameterized.expand([ ([USEquityPricing.open], ), ([USEquityPricing.close, USEquityPricing.volume], ), ([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low], ), (USEquityPricing.columns, ), ]) def test_read(self, columns): self._check_read_results( columns, self.assets, TEST_QUERY_START, TEST_QUERY_STOP, ) def test_start_on_asset_start(self): """ Test loading with queries that starts on the first day of each asset's lifetime. """ columns = [USEquityPricing.high, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_start(asset), end_date=self.trading_days[-1], ) def test_start_on_asset_end(self): """ Test loading with queries that start on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_end(asset), end_date=self.trading_days[-1], ) def test_end_on_asset_start(self): """ Test loading with queries that end on the first day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_start(asset), ) def test_end_on_asset_end(self): """ Test loading with queries that end on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_end(asset), ) def test_unadjusted_spot_price(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # At beginning price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'), 'close') # Synthetic writes price for date. self.assertEqual(135630.0, price) # Middle price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), 'close') self.assertEqual(135631.0, price) # End price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'), 'close') self.assertEqual(135634.0, price) # Another sid at beginning. price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'), 'close') self.assertEqual(235651.0, price) # Ensure that volume does not have float adjustment applied. volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), 'volume') self.assertEqual(145631, volume) def test_unadjusted_spot_price_no_data(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # before with self.assertRaises(NoDataOnDate): reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close') # after with self.assertRaises(NoDataOnDate): reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close') def test_unadjusted_spot_price_empty_value(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # A sid, day and corresponding index into which to overwrite a zero. zero_sid = 1 zero_day = Timestamp('2015-06-02', tz='UTC') zero_ix = reader.sid_day_index(zero_sid, zero_day) # Write a zero into the synthetic pricing data at the day and sid, # so that a read should now return -1. # This a little hacky, in lieu of changing the synthetic data set. reader._spot_col('close')[zero_ix] = 0 close = reader.spot_price(zero_sid, zero_day, 'close') self.assertEqual(-1, close)
def test_init_with_user(self, tmpdir: TempDirectory): Git(tmpdir.getpath('foo')).init( User(name='Foo Bar', email='*****@*****.**')) config = tmpdir.read('foo/.git/config') assert b'name = Foo Bar' in config assert b'email = [email protected]' in config
def test_init(self, tmpdir: TempDirectory): tmpdir.makedir('foo') Git(tmpdir.getpath('foo')).init() assert os.path.exists(tmpdir.getpath('foo/.git'))
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open all_market_closes = cls.env.open_and_closes.market_close indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.market_closes = all_market_closes[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window( fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)). view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)). view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)). view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)). view(float64), 'volume': full(9, 0), }, index=[minutes]) self.writer.write(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = self.reader.unadjusted_window( fields, minutes[0], minutes[-1], [sid]) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window( columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame(data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = reader.unadjusted_window(columns, minutes[0], minutes[-1], sids) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j])
class BcolzDailyBarTestCase(TestCase): @classmethod def setUpClass(cls): all_trading_days = TradingEnvironment().trading_days cls.trading_days = all_trading_days[ all_trading_days.get_loc(TEST_CALENDAR_START): all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1 ] def setUp(self): self.asset_info = EQUITY_INFO self.writer = SyntheticDailyBarWriter( self.asset_info, self.trading_days, ) self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('daily_equity_pricing.bcolz') def tearDown(self): self.dir_.cleanup() @property def assets(self): return self.asset_info.index def trading_days_between(self, start, end): return self.trading_days[self.trading_days.slice_indexer(start, end)] def asset_start(self, asset_id): return self.writer.asset_start(asset_id) def asset_end(self, asset_id): return self.writer.asset_end(asset_id) def dates_for_asset(self, asset_id): start, end = self.asset_start(asset_id), self.asset_end(asset_id) return self.trading_days_between(start, end) def test_write_ohlcv_content(self): result = self.writer.write(self.dest, self.trading_days, self.assets) for column in SyntheticDailyBarWriter.OHLCV: idx = 0 data = result[column][:] multiplier = 1 if column == 'volume' else 1000 for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual( SyntheticDailyBarWriter.expected_value( asset_id, date, column ) * multiplier, data[idx], ) idx += 1 self.assertEqual(idx, len(data)) def test_write_day_and_id(self): result = self.writer.write(self.dest, self.trading_days, self.assets) idx = 0 ids = result['id'] days = result['day'] for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual(ids[idx], asset_id) self.assertEqual(date, seconds_to_timestamp(days[idx])) idx += 1 def test_write_attrs(self): result = self.writer.write(self.dest, self.trading_days, self.assets) expected_first_row = { '1': 0, '2': 5, # Asset 1 has 5 trading days. '3': 12, # Asset 2 has 7 trading days. '4': 33, # Asset 3 has 21 trading days. '5': 44, # Asset 4 has 11 trading days. '6': 49, # Asset 5 has 5 trading days. } expected_last_row = { '1': 4, '2': 11, '3': 32, '4': 43, '5': 48, '6': 57, # Asset 6 has 9 trading days. } expected_calendar_offset = { '1': 0, # Starts on 6-01, 1st trading day of month. '2': 15, # Starts on 6-22, 16th trading day of month. '3': 1, # Starts on 6-02, 2nd trading day of month. '4': 0, # Starts on 6-01, 1st trading day of month. '5': 9, # Starts on 6-12, 10th trading day of month. '6': 10, # Starts on 6-15, 11th trading day of month. } self.assertEqual(result.attrs['first_row'], expected_first_row) self.assertEqual(result.attrs['last_row'], expected_last_row) self.assertEqual( result.attrs['calendar_offset'], expected_calendar_offset, ) assert_index_equal( self.trading_days, DatetimeIndex(result.attrs['calendar'], tz='UTC'), ) def _check_read_results(self, columns, assets, start_date, end_date): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) results = reader.load_raw_arrays(columns, start_date, end_date, assets) dates = self.trading_days_between(start_date, end_date) for column, result in zip(columns, results): assert_array_equal( result, self.writer.expected_values_2d( dates, assets, column.name, ) ) @parameterized.expand([ ([USEquityPricing.open],), ([USEquityPricing.close, USEquityPricing.volume],), ([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],), (USEquityPricing.columns,), ]) def test_read(self, columns): self._check_read_results( columns, self.assets, TEST_QUERY_START, TEST_QUERY_STOP, ) def test_start_on_asset_start(self): """ Test loading with queries that starts on the first day of each asset's lifetime. """ columns = [USEquityPricing.high, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_start(asset), end_date=self.trading_days[-1], ) def test_start_on_asset_end(self): """ Test loading with queries that start on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_end(asset), end_date=self.trading_days[-1], ) def test_end_on_asset_start(self): """ Test loading with queries that end on the first day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_start(asset), ) def test_end_on_asset_end(self): """ Test loading with queries that end on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_end(asset), )
class PluginWithTempDirTests(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) def run_actions(self, path=None, **kw): with LogCapture() as log: plugin = make_git_repo(path=path or self.dir.path, **kw) with Replacer() as r: r.replace("archivist.repos.git.datetime", test_datetime()) plugin.actions() return log def git(self, command, repo_path=None): return run(["git"] + command.split(), cwd=repo_path or self.dir.path) def make_repo_with_content(self, repo=""): repo_path = self.dir.getpath(repo) if repo else None self.git("init", repo_path) self.dir.write(repo + "a", "some content") self.dir.write(repo + "b", "other content") self.dir.write(repo + "c", "more content") self.git("add .", repo_path) self.git("commit -m initial", repo_path) return repo def make_local_changes(self, repo=""): self.dir.write(repo + "b", "changed content") os.remove(self.dir.getpath(repo + "c")) self.dir.write(repo + "d", "new content") def status_log_entry(self, lines, repo_path=None): return ( "archivist.repos.git", "INFO", "\n".join(l.format(repo=repo_path or self.dir.path) for l in lines) + "\n", ) def check_git_log(self, lines, repo_path=None): compare("\n".join(lines) + "\n", self.git("log --pretty=format:%s --stat", repo_path)) def get_dummy_source(self, name): class DummySource(Source): schema = Schema({}) def __init__(self, type, name, repo): super(DummySource, self).__init__(type, name, repo) def process(self, path): pass return DummySource("dummy", name, "repo") def test_path_for_with_name(self): compare( self.dir.getpath("dummy/the_name"), make_git_repo(path=self.dir.path).path_for(self.get_dummy_source("the_name")), ) self.assertTrue(os.path.exists(self.dir.getpath("dummy/the_name"))) def test_path_for_no_name(self): compare(self.dir.getpath("dummy"), make_git_repo(path=self.dir.path).path_for(self.get_dummy_source(name=None))) self.assertTrue(os.path.exists(self.dir.getpath("dummy"))) def test_not_there(self): repo_path = self.dir.getpath("var") log = self.run_actions(repo_path) log.check(("archivist.repos.git", "INFO", "creating git repo at " + repo_path)) self.assertTrue(self.dir.getpath("var/.git")) def test_there_not_git(self): repo_path = self.dir.makedir("var") log = self.run_actions(repo_path) log.check(("archivist.repos.git", "INFO", "creating git repo at " + repo_path)) self.assertTrue(self.dir.getpath("var/.git")) def test_no_changes(self): self.git("init") log = self.run_actions() log.check() # no logging def test_just_log_changes(self): self.make_repo_with_content() self.make_local_changes() log = self.run_actions(commit=False) log.check(self.status_log_entry(["changes found in git repo at {repo}:", " M b", " D c", "?? d"])) self.check_git_log(["initial", " a | 1 +", " b | 1 +", " c | 1 +", " 3 files changed, 3 insertions(+)"]) def test_commit_changes(self): self.make_repo_with_content() self.make_local_changes() log = self.run_actions(commit=True) log.check( self.status_log_entry(["changes found in git repo at {repo}:", " M b", " D c", "?? d"]), ("archivist.repos.git", "INFO", "changes committed"), ) compare("", self.git("status --porcelain")) self.check_git_log( [ "Recorded by archivist at 2001-01-01 00:00", " b | 2 +-", " c | 1 -", " d | 1 +", " 3 files changed, 2 insertions(+), 2 deletions(-)", "", "initial", " a | 1 +", " b | 1 +", " c | 1 +", " 3 files changed, 3 insertions(+)", ] ) def test_push_changes(self): origin_path = self.dir.makedir("origin") self.make_repo_with_content(repo="origin/") self.git("config --local --add receive.denyCurrentBranch ignore", origin_path) self.git("clone -q " + origin_path + " local") self.make_local_changes(repo="local/") local_path = self.dir.getpath("local") log = self.run_actions(commit=True, push=True, path=local_path) log.check( self.status_log_entry( ["changes found in git repo at {repo}:", " M b", " D c", "?? d"], repo_path=local_path ), ("archivist.repos.git", "INFO", "changes committed"), ("archivist.repos.git", "INFO", "changes pushed"), ) self.check_git_log( [ "Recorded by archivist at 2001-01-01 00:00", " b | 2 +-", " c | 1 -", " d | 1 +", " 3 files changed, 2 insertions(+), 2 deletions(-)", "", "initial", " a | 1 +", " b | 1 +", " c | 1 +", " 3 files changed, 3 insertions(+)", ], repo_path=origin_path, ) def test_push_no_changes(self): self.git("init") log = self.run_actions(commit=True, push=True) log.check() # no logging def test_default_repo_config(self): # can't test actions due to default path GitRepo(**GitRepo.schema(default_repo_config))
class TestJenkinsSource(TestCase): def setUp(self): self.dir = TempDirectory() self.addCleanup(self.dir.cleanup) def test_abc(self): self.assertTrue(issubclass(Plugin, Source)) def test_schema_max(self): compare( dict(type='jenkins', name='core', repo='config', path=self.dir.path), actual=Plugin.schema( dict(type='jenkins', name='core', repo='config', path=self.dir.path) )) def test_schema_min(self): compare( dict(type='jenkins', name='jenkins', repo='config', path=self.dir.path), actual=Plugin.schema( dict(type='jenkins', repo='config', path=self.dir.path) )) def test_schema_wrong_type(self): text = "expected str for dictionary value @ data['path']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='bar', path=['/'])) def test_schema_extra_keys(self): with ShouldFailSchemaWith("extra keys not allowed @ data['foo']"): Plugin.schema(dict(type='jenkins', foo='bar')) def test_invalid_name(self): text = "expected str for dictionary value @ data['name']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='jenkins', name=[])) def test_no_path(self): text = "'' does not exist for dictionary value @ data['path']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='jenkins', path='')) def test_path_not_string(self): text = "expected str for dictionary value @ data['path']" with ShouldFailSchemaWith(text): Plugin.schema(dict(type='jenkins', path=1)) def test_path_not_there(self): invalid = self.dir.getpath('foo') text = "'%s' does not exist for dictionary value @ data['path']" % ( invalid ) with ShouldFailSchemaWith(text): Plugin.schema(dict(type='jenkins', path=invalid)) def test_interface(self): plugin = Plugin('source', name='jenkins', repo='config', path='root') compare(plugin.type, 'source') compare(plugin.name, 'jenkins') compare(plugin.repo, 'config') compare(plugin.source_paths, 'root')
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price)
class BcolzDailyBarTestCase(TestCase): @classmethod def setUpClass(cls): all_trading_days = TradingEnvironment().trading_days cls.trading_days = all_trading_days[ all_trading_days.get_loc(TEST_CALENDAR_START): all_trading_days.get_loc(TEST_CALENDAR_STOP) + 1 ] def setUp(self): self.asset_info = EQUITY_INFO self.writer = SyntheticDailyBarWriter( self.asset_info, self.trading_days, ) self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('daily_equity_pricing.bcolz') def tearDown(self): self.dir_.cleanup() @property def assets(self): return self.asset_info.index def trading_days_between(self, start, end): return self.trading_days[self.trading_days.slice_indexer(start, end)] def asset_start(self, asset_id): return self.writer.asset_start(asset_id) def asset_end(self, asset_id): return self.writer.asset_end(asset_id) def dates_for_asset(self, asset_id): start, end = self.asset_start(asset_id), self.asset_end(asset_id) return self.trading_days_between(start, end) def test_write_ohlcv_content(self): result = self.writer.write(self.dest, self.trading_days, self.assets) for column in SyntheticDailyBarWriter.OHLCV: idx = 0 data = result[column][:] multiplier = 1 if column == 'volume' else 1000 for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual( SyntheticDailyBarWriter.expected_value( asset_id, date, column ) * multiplier, data[idx], ) idx += 1 self.assertEqual(idx, len(data)) def test_write_day_and_id(self): result = self.writer.write(self.dest, self.trading_days, self.assets) idx = 0 ids = result['id'] days = result['day'] for asset_id in self.assets: for date in self.dates_for_asset(asset_id): self.assertEqual(ids[idx], asset_id) self.assertEqual(date, seconds_to_timestamp(days[idx])) idx += 1 def test_write_attrs(self): result = self.writer.write(self.dest, self.trading_days, self.assets) expected_first_row = { '1': 0, '2': 5, # Asset 1 has 5 trading days. '3': 12, # Asset 2 has 7 trading days. '4': 33, # Asset 3 has 21 trading days. '5': 44, # Asset 4 has 11 trading days. '6': 49, # Asset 5 has 5 trading days. } expected_last_row = { '1': 4, '2': 11, '3': 32, '4': 43, '5': 48, '6': 57, # Asset 6 has 9 trading days. } expected_calendar_offset = { '1': 0, # Starts on 6-01, 1st trading day of month. '2': 15, # Starts on 6-22, 16th trading day of month. '3': 1, # Starts on 6-02, 2nd trading day of month. '4': 0, # Starts on 6-01, 1st trading day of month. '5': 9, # Starts on 6-12, 10th trading day of month. '6': 10, # Starts on 6-15, 11th trading day of month. } self.assertEqual(result.attrs['first_row'], expected_first_row) self.assertEqual(result.attrs['last_row'], expected_last_row) self.assertEqual( result.attrs['calendar_offset'], expected_calendar_offset, ) assert_index_equal( self.trading_days, DatetimeIndex(result.attrs['calendar'], tz='UTC'), ) def _check_read_results(self, columns, assets, start_date, end_date): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) results = reader.load_raw_arrays(columns, start_date, end_date, assets) dates = self.trading_days_between(start_date, end_date) for column, result in zip(columns, results): assert_array_equal( result, self.writer.expected_values_2d( dates, assets, column.name, ) ) @parameterized.expand([ ([USEquityPricing.open],), ([USEquityPricing.close, USEquityPricing.volume],), ([USEquityPricing.volume, USEquityPricing.high, USEquityPricing.low],), (USEquityPricing.columns,), ]) def test_read(self, columns): self._check_read_results( columns, self.assets, TEST_QUERY_START, TEST_QUERY_STOP, ) def test_start_on_asset_start(self): """ Test loading with queries that starts on the first day of each asset's lifetime. """ columns = [USEquityPricing.high, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_start(asset), end_date=self.trading_days[-1], ) def test_start_on_asset_end(self): """ Test loading with queries that start on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.asset_end(asset), end_date=self.trading_days[-1], ) def test_end_on_asset_start(self): """ Test loading with queries that end on the first day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_start(asset), ) def test_end_on_asset_end(self): """ Test loading with queries that end on the last day of each asset's lifetime. """ columns = [USEquityPricing.close, USEquityPricing.volume] for asset in self.assets: self._check_read_results( columns, self.assets, start_date=self.trading_days[0], end_date=self.asset_end(asset), ) def test_unadjusted_spot_price(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # At beginning price = reader.spot_price(1, Timestamp('2015-06-01', tz='UTC'), 'close') # Synthetic writes price for date. self.assertEqual(135630.0, price) # Middle price = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), 'close') self.assertEqual(135631.0, price) # End price = reader.spot_price(1, Timestamp('2015-06-05', tz='UTC'), 'close') self.assertEqual(135634.0, price) # Another sid at beginning. price = reader.spot_price(2, Timestamp('2015-06-22', tz='UTC'), 'close') self.assertEqual(235651.0, price) # Ensure that volume does not have float adjustment applied. volume = reader.spot_price(1, Timestamp('2015-06-02', tz='UTC'), 'volume') self.assertEqual(145631, volume) def test_unadjusted_spot_price_no_data(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # before with self.assertRaises(NoDataOnDate): reader.spot_price(2, Timestamp('2015-06-08', tz='UTC'), 'close') # after with self.assertRaises(NoDataOnDate): reader.spot_price(4, Timestamp('2015-06-16', tz='UTC'), 'close') def test_unadjusted_spot_price_empty_value(self): table = self.writer.write(self.dest, self.trading_days, self.assets) reader = BcolzDailyBarReader(table) # A sid, day and corresponding index into which to overwrite a zero. zero_sid = 1 zero_day = Timestamp('2015-06-02', tz='UTC') zero_ix = reader.sid_day_index(zero_sid, zero_day) # Write a zero into the synthetic pricing data at the day and sid, # so that a read should now return -1. # This a little hacky, in lieu of changing the synthetic data set. reader._spot_col('close')[zero_ix] = 0 close = reader.spot_price(zero_sid, zero_day, 'close') self.assertEqual(-1, close)
class BaseSupervisorTestCase(TestCase): """ Base class for running supervisor tests """ maxDiff = None integration_test = 1 def __init__(self, *args, **kwargs): super(BaseSupervisorTestCase, self).__init__(*args, **kwargs) self.supervisor = None self.logstash = None # store, as it's also used by supervisorctl self._config_file_path = None def setUp(self): self.scratch = TempDirectory() def tearDown(self): self.scratch.cleanup() def run_supervisor(self, overrides, configuration_string): """ Runs Supervisor """ environment = os.environ.copy() environment.update(overrides) working_directory = os.path.dirname(__file__) template_path = os.path.join(working_directory, 'supervisord.template') with open(template_path) as template: configuration = template.read() configuration += configuration_string self.scratch.write('supervisor.conf', configuration, 'utf-8') # store, as it's also used by supervisorctl self._config_file_path = self.scratch.getpath('supervisor.conf') self.supervisor = subprocess.Popen( ['supervisord', '-c', self._config_file_path], env=environment, cwd=os.path.dirname(working_directory), ) def shutdown_supervisor(self): """ Shuts Supervisor down """ self.supervisor.terminate() while self.supervisor.poll() is None: # need to wait while the process kills off it's children and exits # so that it doesn't block the port sleep(1) def run_supervisorctl(self, args): """ Runs supervisorctl using the test suites config file """ command = [ 'supervisorctl', '-c', self._config_file_path, ] command += args return subprocess.call(command) def run_logstash(self): """ Runs a socketserver instance emulating Logstash """ self.logstash = socketserver.UDPServer(('0.0.0.0', 0), LogstashHandler) threading.Thread(target=self.logstash.serve_forever).start() return self.logstash def shutdown_logstash(self): """ Shuts the socketserver instance down """ self.logstash.shutdown() self.logstash.server_close() def messages(self, clear_buffer=False, wait_for=None): """ Returns the contents of the logstash message buffer """ messages = [] if wait_for is not None: while len(messages) < wait_for: sleep(0.1) messages = self.logstash.RequestHandlerClass.messages[:] else: messages = self.logstash.RequestHandlerClass.messages[:] parsed_messages = list(map(strip_volatile, messages)) if clear_buffer: self.clear_message_buffer() return parsed_messages def get_message_buffer(self): """ Returns the raw logstash message buffer """ return self.logstash.RequestHandlerClass.messages[:] def clear_message_buffer(self): """ Clears the logstash message buffer """ self.logstash.RequestHandlerClass.messages = []
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open all_market_closes = cls.env.open_and_closes.market_close indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.market_closes = all_market_closes[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write_sid(sid, data) def test_append_to_same_day(self): """ Test writing data with the same date as existing data in our file. """ sid = 1 first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[first_minute]) self.writer.write_sid(sid, data) # Write data in the same day as the previous minute second_minute = first_minute + Timedelta(minutes=1) new_data = DataFrame(data={ 'open': [5.0], 'high': [10.0], 'low': [3.0], 'close': [7.0], 'volume': [10.0] }, index=[second_minute]) self.writer.write_sid(sid, new_data) open_price = self.reader.get_value(sid, second_minute, 'open') self.assertEquals(5.0, open_price) high_price = self.reader.get_value(sid, second_minute, 'high') self.assertEquals(10.0, high_price) low_price = self.reader.get_value(sid, second_minute, 'low') self.assertEquals(3.0, low_price) close_price = self.reader.get_value(sid, second_minute, 'close') self.assertEquals(7.0, close_price) volume_price = self.reader.get_value(sid, second_minute, 'volume') self.assertEquals(10.0, volume_price) def test_append_on_new_day(self): sid = 1 ohlcv = { 'open': [2.0], 'high': [3.0], 'low': [1.0], 'close': [2.0], 'volume': [10.0] } first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame(data=ohlcv, index=[first_minute]) self.writer.write_sid(sid, data) next_day_minute = first_minute + Timedelta(days=1) new_data = DataFrame(data=ohlcv, index=[next_day_minute]) self.writer.write_sid(sid, new_data) second_minute = first_minute + Timedelta(minutes=1) # The second minute should have been padded with zeros for col in ('open', 'high', 'low', 'close'): assert_almost_equal(nan, self.reader.get_value(sid, second_minute, col)) self.assertEqual(0, self.reader.get_value(sid, second_minute, 'volume')) # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows. # The second day should contain a single row. self.assertEqual( len(self.writer._ensure_ctable(sid)), US_EQUITIES_MINUTES_PER_DAY + 1, ) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sids[0], data) data = DataFrame(data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write_sid(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq day = TEST_CALENDAR_START + freq minute = self.market_opens[day] data = DataFrame(data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) # Check that if we then pad the rest of this day, we end up with # 2 days worth of minutes. self.writer.pad(sid, day) self.assertEqual( len(self.writer._ensure_ctable(sid)), self.writer._minutes_per_day * 2, ) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame(data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list( map( transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame(data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)).view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)).view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)).view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)).view(float64), 'volume': full(9, 0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list( map( transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_cols_mismatch_length(self): dts = date_range(self.market_opens[self.test_calendar_start], periods=2, freq='min').asi8.astype('datetime64[s]') sid = 1 cols = { 'open': array([10.0, 11.0, 12.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0, 33.0, 34.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0, 52.0]) } with self.assertRaises(BcolzMinuteWriterColumnMismatch): self.writer.write_cols(sid, dts, cols) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [ start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j]) def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [ self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min') ] sids = [1, 2] data_1 = DataFrame(data={ 'open': [15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame(data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list( map( transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = self.env.market_minutes.get_loc(minutes[0]) minute_locs = [ self.env.market_minutes.get_loc(minute) - start_minute_loc for minute in minutes ] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs]) def test_adjust_non_trading_minutes(self): start_day = Timestamp('2015-06-01', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') sid = 1 cols = { 'open': arange(1, 781), 'high': arange(1, 781), 'low': arange(1, 781), 'close': arange(1, 781), 'volume': arange(1, 781) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-01 20:00:00', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02 20:00:00', tz='UTC'), 'open'), 780) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-06-02 20:01:00', tz='UTC'), 'open'), 780) def test_adjust_non_trading_minutes_half_days(self): # half day start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-11-30', tz='UTC') sid = 1 cols = { 'open': arange(1, 601), 'high': arange(1, 601), 'low': arange(1, 601), 'close': arange(1, 601), 'volume': arange(1, 601) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-27 18:00:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30 21:00:00', tz='UTC'), 'open'), 600) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-27 18:01:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value(sid, Timestamp('2015-11-30 21:01:00', tz='UTC'), 'open'), 600) def test_set_sid_attrs(self): """Confirm that we can set the attributes of a sid's file correctly. """ sid = 1 start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') attrs = { 'start_day': start_day.value / int(1e9), 'end_day': end_day.value / int(1e9), 'factor': 100, } # Write the attributes self.writer.set_sid_attrs(sid, **attrs) # Read the attributes for k, v in attrs.items(): self.assertEqual(self.reader.get_sid_attr(sid, k), v)
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame(data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer(start=self.test_calendar_start + 1, end=self.test_calendar_start + 3)] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame(data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame(data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data)
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open all_market_closes = cls.env.open_and_closes.market_close indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.market_closes = all_market_closes[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, self.market_closes, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write_sid(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write_sid(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write_sid(sid, data) def test_append_to_same_day(self): """ Test writing data with the same date as existing data in our file. """ sid = 1 first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[first_minute]) self.writer.write_sid(sid, data) # Write data in the same day as the previous minute second_minute = first_minute + Timedelta(minutes=1) new_data = DataFrame( data={ 'open': [5.0], 'high': [10.0], 'low': [3.0], 'close': [7.0], 'volume': [10.0] }, index=[second_minute]) self.writer.write_sid(sid, new_data) open_price = self.reader.get_value(sid, second_minute, 'open') self.assertEquals(5.0, open_price) high_price = self.reader.get_value(sid, second_minute, 'high') self.assertEquals(10.0, high_price) low_price = self.reader.get_value(sid, second_minute, 'low') self.assertEquals(3.0, low_price) close_price = self.reader.get_value(sid, second_minute, 'close') self.assertEquals(7.0, close_price) volume_price = self.reader.get_value(sid, second_minute, 'volume') self.assertEquals(10.0, volume_price) def test_append_on_new_day(self): sid = 1 ohlcv = { 'open': [2.0], 'high': [3.0], 'low': [1.0], 'close': [2.0], 'volume': [10.0] } first_minute = self.market_opens[TEST_CALENDAR_START] data = DataFrame( data=ohlcv, index=[first_minute]) self.writer.write_sid(sid, data) next_day_minute = first_minute + Timedelta(days=1) new_data = DataFrame( data=ohlcv, index=[next_day_minute]) self.writer.write_sid(sid, new_data) second_minute = first_minute + Timedelta(minutes=1) # The second minute should have been padded with zeros for col in ('open', 'high', 'low', 'close'): assert_almost_equal( nan, self.reader.get_value(sid, second_minute, col) ) self.assertEqual( 0, self.reader.get_value(sid, second_minute, 'volume') ) # The first day should contain US_EQUITIES_MINUTES_PER_DAY rows. # The second day should contain a single row. self.assertEqual( len(self.writer._ensure_ctable(sid)), US_EQUITIES_MINUTES_PER_DAY + 1, ) def test_write_multiple_sids(self): """ Test writing multiple sids. Tests both that the data is written to the correct sid, as well as ensuring that the logic for creating the subdirectory path to each sid does not cause issues from attempts to recreate existing paths. (Calling out this coverage, because an assertion of that logic does not show up in the test itself, but is exercised by the act of attempting to write two consecutive sids, which would be written to the same containing directory, `00/00/000001.bcolz` and `00/00/000002.bcolz) Before applying a check to make sure the path writing did not re-attempt directory creation an OSError like the following would occur: ``` OSError: [Errno 17] File exists: '/tmp/tmpR7yzzT/minute_bars/00/00' ``` """ minute = self.market_opens[TEST_CALENDAR_START] sids = [1, 2] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sids[0], data) data = DataFrame( data={ 'open': [25.0], 'high': [27.0], 'low': [21.0], 'close': [25.0], 'volume': [200.0] }, index=[minute]) self.writer.write_sid(sids[1], data) sid = sids[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) sid = sids[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(25.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(27.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(21.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(25.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(200.0, volume_price) def test_pad_data(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq day = TEST_CALENDAR_START + freq minute = self.market_opens[day] data = DataFrame( data={ 'open': [15.0], 'high': [17.0], 'low': [11.0], 'close': [15.0], 'volume': [100.0] }, index=[minute]) self.writer.write_sid(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(15.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(17.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(11.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(15.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(100.0, volume_price) # Check that if we then pad the rest of this day, we end up with # 2 days worth of minutes. self.writer.pad(sid, day) self.assertEqual( len(self.writer._ensure_ctable(sid)), self.writer._minutes_per_day * 2, ) def test_nans(self): """ Test writing empty data. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': full(9, nan), 'high': full(9, nan), 'low': full(9, nan), 'close': full(9, nan), 'volume': full(9, 0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list(map(transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_differing_nans(self): """ Also test nans of differing values/construction. """ sid = 1 last_date = self.writer.last_date_in_output_for_sid(sid) self.assertIs(last_date, NaT) self.writer.pad(sid, TEST_CALENDAR_START) last_date = self.writer.last_date_in_output_for_sid(sid) self.assertEqual(last_date, TEST_CALENDAR_START) freq = self.market_opens.index.freq minute = self.market_opens[TEST_CALENDAR_START + freq] minutes = date_range(minute, periods=9, freq='min') data = DataFrame( data={ 'open': ((0b11111111111 << 52) + arange(1, 10, dtype=int64)). view(float64), 'high': ((0b11111111111 << 52) + arange(11, 20, dtype=int64)). view(float64), 'low': ((0b11111111111 << 52) + arange(21, 30, dtype=int64)). view(float64), 'close': ((0b11111111111 << 52) + arange(31, 40, dtype=int64)). view(float64), 'volume': full(9, 0), }, index=[minutes]) self.writer.write_sid(sid, data) fields = ['open', 'high', 'low', 'close', 'volume'] ohlcv_window = list(map(transpose, self.reader.load_raw_arrays( fields, minutes[0], minutes[-1], [sid], ))) for i, field in enumerate(fields): if field != 'volume': assert_array_equal(full(9, nan), ohlcv_window[i][0]) else: assert_array_equal(zeros(9), ohlcv_window[i][0]) def test_write_cols(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 cols = { 'open': array([10.0, 11.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0]) } dts = array([minute_0, minute_1], dtype='datetime64[s]') self.writer.write_cols(sid, dts, cols) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_cols_mismatch_length(self): dts = date_range(self.market_opens[self.test_calendar_start], periods=2, freq='min').asi8.astype('datetime64[s]') sid = 1 cols = { 'open': array([10.0, 11.0, 12.0]), 'high': array([20.0, 21.0]), 'low': array([30.0, 31.0, 33.0, 34.0]), 'close': array([40.0, 41.0]), 'volume': array([50.0, 51.0, 52.0]) } with self.assertRaises(BcolzMinuteWriterColumnMismatch): self.writer.write_cols(sid, dts, cols) def test_unadjusted_minutes(self): """ Test unadjusted minutes. """ start_minute = self.market_opens[TEST_CALENDAR_START] minutes = [start_minute, start_minute + Timedelta('1 min'), start_minute + Timedelta('2 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [15.0, nan, 15.1], 'high': [17.0, nan, 17.1], 'low': [11.0, nan, 11.1], 'close': [14.0, nan, 14.1], 'volume': [1000, 0, 1001] }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, nan, 25.1], 'high': [27.0, nan, 27.1], 'low': [21.0, nan, 21.1], 'close': [24.0, nan, 24.1], 'volume': [2000, 0, 2001] }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid][col], arrays[i][j]) def test_unadjusted_minutes_early_close(self): """ Test unadjusted minute window, ensuring that early closes are filtered out. """ day_before_thanksgiving = Timestamp('2015-11-25', tz='UTC') xmas_eve = Timestamp('2015-12-24', tz='UTC') market_day_after_xmas = Timestamp('2015-12-28', tz='UTC') minutes = [self.market_closes[day_before_thanksgiving] - Timedelta('2 min'), self.market_closes[xmas_eve] - Timedelta('1 min'), self.market_opens[market_day_after_xmas] + Timedelta('1 min')] sids = [1, 2] data_1 = DataFrame( data={ 'open': [ 15.0, 15.1, 15.2], 'high': [17.0, 17.1, 17.2], 'low': [11.0, 11.1, 11.3], 'close': [14.0, 14.1, 14.2], 'volume': [1000, 1001, 1002], }, index=minutes) self.writer.write_sid(sids[0], data_1) data_2 = DataFrame( data={ 'open': [25.0, 25.1, 25.2], 'high': [27.0, 27.1, 27.2], 'low': [21.0, 21.1, 21.2], 'close': [24.0, 24.1, 24.2], 'volume': [2000, 2001, 2002], }, index=minutes) self.writer.write_sid(sids[1], data_2) reader = BcolzMinuteBarReader(self.dest) columns = ['open', 'high', 'low', 'close', 'volume'] sids = [sids[0], sids[1]] arrays = list(map(transpose, reader.load_raw_arrays( columns, minutes[0], minutes[-1], sids, ))) data = {sids[0]: data_1, sids[1]: data_2} start_minute_loc = self.env.market_minutes.get_loc(minutes[0]) minute_locs = [self.env.market_minutes.get_loc(minute) - start_minute_loc for minute in minutes] for i, col in enumerate(columns): for j, sid in enumerate(sids): assert_almost_equal(data[sid].loc[minutes, col], arrays[i][j][minute_locs]) def test_adjust_non_trading_minutes(self): start_day = Timestamp('2015-06-01', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') sid = 1 cols = { 'open': arange(1, 781), 'high': arange(1, 781), 'low': arange(1, 781), 'close': arange(1, 781), 'volume': arange(1, 781) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-01 20:00:00', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-02 20:00:00', tz='UTC'), 'open'), 780) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-02', tz='UTC'), 'open'), 390) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-06-02 20:01:00', tz='UTC'), 'open'), 780) def test_adjust_non_trading_minutes_half_days(self): # half day start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-11-30', tz='UTC') sid = 1 cols = { 'open': arange(1, 601), 'high': arange(1, 601), 'low': arange(1, 601), 'close': arange(1, 601), 'volume': arange(1, 601) } dts = array(self.env.minutes_for_days_in_range(start_day, end_day)) self.writer.write_cols(sid, dts, cols) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-27 18:00:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-30 21:00:00', tz='UTC'), 'open'), 600) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-27 18:01:00', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-30', tz='UTC'), 'open'), 210) self.assertEqual( self.reader.get_value( sid, Timestamp('2015-11-30 21:01:00', tz='UTC'), 'open'), 600) def test_set_sid_attrs(self): """Confirm that we can set the attributes of a sid's file correctly. """ sid = 1 start_day = Timestamp('2015-11-27', tz='UTC') end_day = Timestamp('2015-06-02', tz='UTC') attrs = { 'start_day': start_day.value / int(1e9), 'end_day': end_day.value / int(1e9), 'factor': 100, } # Write the attributes self.writer.set_sid_attrs(sid, **attrs) # Read the attributes for k, v in attrs.items(): self.assertEqual(self.reader.get_sid_attr(sid, k), v)
class BaseTester(object): # "silent" _multiprocess_can_split_ = True #If a context’s fixtures are re-entrant, set _multiprocess_can_split_ = True in the context, and the plugin will dispatch tests in suites bound to that context as if the context had no fixtures. This means that the fixtures will execute concurrently and multiple times, typically once per test. #_multiprocess_shared_ = True #If a context’s fixtures can be shared by tests running in different processes – such as a package-level fixture that starts an external http server or initializes a shared database – then set _multiprocess_shared_ = True in the context. These fixtures will then execute in the primary nose process, and tests in those contexts will be individually dispatched to run in parallel. def setUp(self): create_test_data() gc.collect( ) ## Start Garbage Collector, before start with the new test case. #### MODI###### #self.mode = "test" #self.mode = "test+s+" #self.mode = "test+s-" #self.mode = "dev" #self.mode = "dev-" #self.mode = "silent" self.mode = "error" #### Set TestsConfiger ##### clear_logger() self.configer = TestsConfiger( mode="silent") # MODE SHOULD BE "test". !!! self.tempdir = TempDirectory() self.path_to_zas_rep_tools = self.configer.path_to_zas_rep_tools #gc.collect() #@classmethod def tearDown(self): #del self.configer t = self.tempdir #self.tempdir.cleanup() del self gc.collect() t.cleanup() del t #del self #gc.collect() @nottest def create_all_test_data(self): self.prj_folder() self.test_dbs() self.blogger_corpus() self.twitter_corpus() self.blogger_lists() @nottest def test_dbs(self): ##################### #### Test DBs######## #######Begin######### self.path_to_testdbs = self.configer.path_to_testdbs self.db_blogger_plaintext_corp_en = self.configer.test_dbs[ "plaintext"]["blogger"]["en"]["corpus"] self.db_blogger_plaintext_corp_de = self.configer.test_dbs[ "plaintext"]["blogger"]["de"]["corpus"] self.db_blogger_plaintext_corp_test = self.configer.test_dbs[ "plaintext"]["blogger"]["test"]["corpus"] self.db_blogger_plaintext_stats_en = self.configer.test_dbs[ "plaintext"]["blogger"]["en"]["stats"] self.db_blogger_plaintext_stats_de = self.configer.test_dbs[ "plaintext"]["blogger"]["de"]["stats"] self.db_blogger_plaintext_stats_test = self.configer.test_dbs[ "plaintext"]["blogger"]["test"]["stats"] self.db_twitter_encrypted_corp_de = self.configer.test_dbs[ "encrypted"]["twitter"]["de"]["corpus"] self.db_twitter_encrypted_stats_de = self.configer.test_dbs[ "encrypted"]["twitter"]["de"]["stats"] ## TempDir self.tempdir.makedir('TestDBs') self.tempdir_testdbs = self.tempdir.getpath('TestDBs') copy_tree( os.path.join(self.path_to_zas_rep_tools, self.path_to_testdbs), self.tempdir_testdbs) #p(self.tempdir_testdbs) #######End########### #### Test DBs######## ##################### def blogger_corpus(self): ##################### # Test Blogger Corpus# #######Begin######### self.path_to_test_sets_for_blogger_Corpus = "data/tests_data/Corpora/BloggerCorpus" #TXT self.txt_blogger_hightrepetativ_set = "txt/HighRepetativSubSet" self.txt_blogger_small_fake_set = "txt/SmallFakeSubset" #self.txt_blogger_small_sub_set = "txt/SmallSubset" #CSV self.csv_blogger_hightrepetativ_set = "csv/HighRepetativSubSet" self.csv_blogger_small_fake_set = "csv/SmallFakeSubset" # #self.csv_blogger_small_sub_set = "csv/SmallSubset" #XML self.xml_blogger_hightrepetativ_set = "xml/HighRepetativSubSet" self.xml_blogger_small_fake_set = "xml/SmallFakeSubset" #self.xml_blogger_small_sub_set = "xml/SmallSubset" #JSON self.json_blogger_hightrepetativ_set = "json/HighRepetativSubSet" self.json_blogger_small_fake_set = "json/SmallFakeSubset" # #self.json_blogger_small_sub_set = "json/SmallSubset" ## TempDir #self.path_to_test_corpora = "data/tests_data/Corpora" self.tempdir.makedir('BloggerCorpus') self.tempdir_blogger_corp = self.tempdir.getpath('BloggerCorpus') copy_tree( os.path.join(self.path_to_zas_rep_tools, self.path_to_test_sets_for_blogger_Corpus), self.tempdir_blogger_corp) #######End########### # Test Blogger Corpus# ##################### def twitter_corpus(self): ##################### # Test Twitter Corpus# #######Begin######### self.path_to_test_sets_for_twitter_Corpus = "data/tests_data/Corpora/TwitterCorpus" self.json_twitter_set = "JSON/zas-rep-tool/" self.csv_twitter_set = "CSV/zas-rep-tool/" self.xml_twitter_set = "XML/zas-rep-tool/" ## TempDir #self.path_to_test_corpora = "data/tests_data/Corpora" self.tempdir.makedir('TwitterCorpus') self.tempdir_twitter_corp = self.tempdir.getpath('TwitterCorpus') copy_tree( os.path.join(self.path_to_zas_rep_tools, self.path_to_test_sets_for_twitter_Corpus), self.tempdir_twitter_corp) #######End########### # Test Twitter Corpus# ##################### def blogger_lists(self): ##################### #### Test Blogger #### #######Begin######### self.input_list_fake_blogger_corpus = [{ 'rowid': '1', 'star_constellation': 'Capricorn', 'text': u'Well, the angel won. I went to work today....after alot of internal struggle with the facts. I calculated sick days left this year,', 'working_area': 'Consulting', 'age': '46', 'id': '324114', 'gender': 'female' }, { 'rowid': '2', 'star_constellation': 'Pisces', 'text': u"urlLink Drawing Game It's PICTIONARY. It's very cool.", 'working_area': 'indUnk', 'age': '24', 'id': '416465', 'gender': 'male' }, { 'rowid': '3', 'star_constellation': 'Virgo', 'text': u'The mango said, "Hi there!!.... \n"Hi there!!.... \n"Hi there!!.... ', 'working_area': 'Non-Profit', 'age': '17', 'id': '322624', 'gender': 'female' }] self.input_list_blogger_corpus_high_repetativ_subset = [{ 'rowid': '1', 'star_constellation': 'Capricorn', 'text': u'@lovelypig #direct_to_haven 67666 8997 -))) -) -P Neeeeeeeeeeeeeeeeiiiiiiinnnnn!!!!! Bitte nicht \U0001f602\U0001f602\U0001f602 \nTest Version von einem Tweeeeeeeeet=)))))))\nnoch einen Tweeeeeeeeet=))))))) \U0001f605\U0001f605', 'working_area': 'Consulting', 'age': '46', 'id': '324114', 'gender': 'female' }, { 'rowid': '2', 'star_constellation': 'Pisces', 'text': u'Einen weiteren Thread eingef\xfcgt!!! juHuuuuuuuu=) \U0001f49b\U0001f49b\U0001f49b\nden vierten Threadddddd!!! wooooowwwwww!!! \u263a\ufe0f \U0001f61c\U0001f61c\U0001f61c\nDas ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen beinhalten sollte. \U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c Das ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen \U0001f61c\U0001f61c\U0001f61c\U0001f61c\nDas ist einnnneeeen Teeeeest Quoted Tweet, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen beinhalten sollte. \U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c\U0001f61c Das ist einnnneeeen Teeeeest Tweeeets, das als "extended" klassifiziert werden sollte!!! Weil es bis 280 Zeichen \U0001f61c\U0001f61c h', 'working_area': 'indUnk', 'age': '24', 'id': '416465', 'gender': 'male' }, { 'rowid': '3', 'star_constellation': 'Virgo', 'text': u'Eine Teeeeeest Diskussion wird er\xf6ffnet!!! @zas-rep-tools \nEinen Test Retweet wird gepostet!!!!! Juhuuuuuu=) \U0001f600\U0001f600\U0001f600\U0001f600\nnoooooooch einen Tweeeeeeeeet=)))))))', 'working_area': 'Non-Profit', 'age': '17', 'id': '322624', 'gender': 'female' }] self.input_list_blogger_corpus_dirty = [{ 'rowid': '1', 'star_constellation': 'Capricorn', 'text': u'@lovelypig #direct_to_haven 67666 8997 -))) -) -P Neeeeeeeeeeeeeeeeiiiiiiinnnnn!!!!! Bitte nicht @lovelypig \U0001f602\U0001f602\U0001f602 \nTest Version von einem Tweeeeeeeeet=)))))))\nnoch einen Tweeeeeeeeet=))))))) 111111 22222 3. 444 \U0001f605\U0001f605', 'working_area': 'Consulting', 'age': '46', 'id': '324114', 'gender': 'female' }, { 'rowid': '2', 'star_constellation': 'Virgo', 'text': u'Eine Teeeeeest Diskussion wird er\xf6ffnet!!! @zas-rep-tools #doit #stay_you \nEinen Test Retweet wird gepostet!!!!! =))))))) #stay_your_self', 'working_area': 'Non-Profit', 'age': '17', 'id': '322624', 'gender': 'female' }] self.fieldnames = self.configer.columns_in_doc_table["blogger"] #######End########### #### Test Blogger #### ##################### def prj_folder(self): ##################### #### Test PrjFolder # #######Begin######### ## TempDir self.tempdir.makedir('ProjectFolder') self.tempdir_project_folder = self.tempdir.getpath('ProjectFolder')
class Skeleton_S3CommitStorage_Against_Mock(TestCase): __test__ = False def setUp(self): self.tempdir = TempDirectory() self.setup_mock_defaults() self.store = S3CommitStorage(self.mock_bucket, self.bucket_prefix) def setup_mock_defaults(self): self.mock_bucket = Mock(spec=Bucket) self.set_bucket_list([]) def tearDown(self): self.tempdir.cleanup() def prefix_key(self, key): return ''.join([self.bucket_prefix or '', key]) def set_bucket_list(self, keynames): prefixed_keynames = [self.prefix_key(key) for key in keynames] key_objs = [Key(None, key) for key in prefixed_keynames] self.mock_bucket.list.return_value = key_objs def test_get_tags_calls_bucket_list__empty(self): self.assertEqual([], self.store.get_tags()) self.mock_bucket.list.assert_called_with(prefix=self.prefix_key('')) def test_get_tags_calls_bucket_list_not_empty(self): self.set_bucket_list(['tag1_msg1', 'tag2_msg2']) self.store.get_tags() self.mock_bucket.list.assert_called_with(prefix=self.prefix_key('')) def test_get_tags_parses_keys_properly(self): self.set_bucket_list(['tag1_msg1', 'tag2_msg2']) self.assertEqual(['tag1', 'tag2'], self.store.get_tags()) def test_get_message_for_tag_calls_bucket_list(self): self.set_bucket_list(['tag1_msg1']) self.store.get_message_for_tag('tag1') prefix = self.prefix_key(''.join(['tag1', '_'])) self.mock_bucket.list.assert_called_with(prefix=prefix) def test_get_message_for_tag_parses_keyname_properly(self): self.set_bucket_list(['tag1_msg1']) self.assertEqual('msg1', self.store.get_message_for_tag('tag1')) def test_add_commit_calls_new_key_with_expected_format(self): filename1 = self.tempdir.write('file1', 'some file contents') self.store.add_commit('tag1', open(filename1, 'rb'), 'some_message') expected_key_name = self.prefix_key(''.join(['tag1', '_', 'some_message'])) self.mock_bucket.new_key.assert_called_with(expected_key_name) def test_add_commit_calls_set_contents_from_filename(self): commit_contents = 'some file contents' filename1 = self.tempdir.write('file1', commit_contents) fp1 = open(filename1, 'rb') self.store.add_commit('tag1', fp1, 'some_message') new_key_mock = self.mock_bucket.new_key.return_value new_key_mock.set_contents_from_file.assert_called_once() mock_last_called_kwargs = new_key_mock.\ set_contents_from_file.call_args[1] mock_actual_headers = mock_last_called_kwargs['headers'] lower_headers = {} for k in mock_actual_headers: lower_headers[k.lower()] = mock_actual_headers[k] expected_headers = { 'Content-Type': 'application/octet-stream', 'Content-Encoding': 'gzip', 'x-amz-meta-fingerprint': md5(commit_contents).hexdigest()} for header in expected_headers: self.assertIn(header.lower(), lower_headers.keys()) self.assertEqual(expected_headers[header], lower_headers[header.lower()]) def test_delete_commit_calls_get_key(self): self.set_bucket_list(['tag1_msg1']) self.store.delete_commit('tag1') self.mock_bucket.get_key.assert_called_with( self.prefix_key('tag1_msg1')) self.mock_bucket.get_key.return_value.delete.assert_called_with() def test_get_commit_contents_calls_get_contents_to_filename(self): self.set_bucket_list(['tag1_msg1']) target_file = self.tempdir.getpath('restored_file') self.store.get_commit_contents_to_filename('tag1', target_file) get_key = self.mock_bucket.get_key.return_value get_key.get_contents_to_filename.assert_called_once() def test_if_no_content_encoding_restores_to_target_file(self): self.set_bucket_list(['tag1_msg1']) target_file = self.tempdir.getpath('restored_file') self.store.get_commit_contents_to_filename('tag1', target_file) get_key = self.mock_bucket.get_key.return_value get_key.get_contents_to_filename.assert_called_with(target_file) def test_if_content_encoding_gzip_calls_gunzip_key_to_filename(self): self.set_bucket_list(['tag1_msg1']) target_file = self.tempdir.getpath('restored_file') self.store._gunzip_key_to_filename = Mock() # set content-encoding on key get_key = self.mock_bucket.get_key.return_value get_key.content_encoding = 'gzip' self.store.get_commit_contents_to_filename('tag1', target_file) get_key.get_contents_to_filename.assert_called_once() self.store._gunzip_key_to_filename.assert_called_with(get_key, target_file) def test_get_commit_contents_raises_Exception_if_file_exists(self): self.set_bucket_list(['tag1_msg1']) file1 = self.tempdir.write('file1', 'some file contents') def will_raise_Exception(): self.store.get_commit_contents_to_filename('tag1', file1) self.assertRaises(FileAlreadyExistsError, will_raise_Exception) def test_dictionary_interface_returns_a_commit_object(self): self.set_bucket_list(['tag1_msg1']) commit = self.store['tag1'] self.assertEqual('tag1', commit.tag) self.assertEqual('msg1', commit.message) def test_dictionary_interface_uses_custom_header_to_get_fingerprint(self): self.set_bucket_list(['tag1_msg1']) self.mock_bucket.get_key.return_value.get_metadata.return_value = '123' commit = self.store['tag1'] self.assertEqual('tag1', commit.tag) self.assertEqual('msg1', commit.message) self.assertEqual('123', commit.fingerprint) def test_dictionary_interface_raises_Exception_if_unknown_tag(self): def will_raise_UnknownTagError(): self.store['tag1'] self.assertRaises(UnknownTagError, will_raise_UnknownTagError) def test_contains_interface_calls_bucket_list_with_prefix(self): self.set_bucket_list(['tag1_msg1']) 'tag1' in self.store self.mock_bucket.list.assert_called_with( prefix=self.prefix_key('tag1_')) def test_contains_interface_returns_true_for_tags_in_bucket(self): self.set_bucket_list(['tag1_msg1']) self.assertTrue('tag1' in self.store) def test_contains_interface_returns_false_for_tags_not_in_bucket(self): self.assertFalse('tag2' in self.store) def test_raises_Exception_if_prefix_has_leading_slash(self): def will_raise_Exception(): S3CommitStorage(self.mock_bucket, '/illegal_prefix') self.assertRaises(Exception, will_raise_Exception)
class BaseSupervisorTestCase(TestCase): """ Base class for running supervisor tests """ maxDiff = None def __init__(self, *args, **kwargs): super(BaseSupervisorTestCase, self).__init__(*args, **kwargs) self.supervisor = None self.logstash = None # store, as it's also used by supervisorctl self._config_file_path = None def setUp(self): self.scratch = TempDirectory() def tearDown(self): self.scratch.cleanup() def run_supervisor(self, overrides, configuration_string): """ Runs Supervisor """ environment = os.environ.copy() environment.update(overrides) working_directory = os.path.dirname(__file__) template_path = os.path.join(working_directory, 'supervisord.template') with open(template_path) as template: configuration = template.read() configuration += configuration_string self.scratch.write('supervisor.conf', configuration, 'utf-8') # store, as it's also used by supervisorctl self._config_file_path = self.scratch.getpath('supervisor.conf') self.supervisor = subprocess.Popen( ['supervisord', '-c', self._config_file_path], env=environment, cwd=os.path.dirname(working_directory), ) def shutdown_supervisor(self): """ Shuts Supervisor down """ self.supervisor.terminate() while self.supervisor.poll() is None: # need to wait while the process kills off it's children and exits # so that it doesn't block the port sleep(1) def run_supervisorctl(self, args): """ Runs supervisorctl using the test suites config file """ command = [ 'supervisorctl', '-c', self._config_file_path, ] command += args return subprocess.call(command) def run_logstash(self): """ Runs a socketserver instance emulating Logstash """ self.logstash = socketserver.UDPServer(('0.0.0.0', 0), LogstashHandler) threading.Thread(target=self.logstash.serve_forever).start() return self.logstash def shutdown_logstash(self): """ Shuts the socketserver instance down """ self.logstash.shutdown() self.logstash.server_close() def messages(self, clear_buffer=False, wait_for=None): """ Returns the contents of the logstash message buffer """ messages = [] if wait_for is not None: while len(messages) < wait_for: sleep(0.1) messages = self.logstash.RequestHandlerClass.messages[:] else: messages = self.logstash.RequestHandlerClass.messages[:] parsed_messages = list(map(strip_volatile, messages)) if clear_buffer: self.clear_message_buffer() return parsed_messages def get_message_buffer(self): """ Returns the raw logstash message buffer """ return self.logstash.RequestHandlerClass.messages[:] def clear_message_buffer(self): """ Clears the logstash message buffer """ self.logstash.RequestHandlerClass.messages = []
class Skeleton_Repository_Operations_With_SpecificCommitStorage(TestCase): __test__ = False # to prevent nose from running this skeleton def setUp(self): raise Exception('This is a skeleton for test - you need to provide' ' your own setUp() and tearDown()') def setup_tempdir(self): # call this from your setUp self.tempdir = TempDirectory() self.file1_contents = 'some contents' self.file2_contents = 'some other contents' self.filename1 = self.tempdir.write('file1', self.file1_contents) self.filename2 = self.tempdir.write('file2', self.file2_contents) def teardown_tempdir(self): # call this from your tearDown self.tempdir.cleanup() def setup_repository(self): # call this from your setUp after creating your store self.repo = BBRepository(self.store) def commit_filename1(self, tag, message=None): self.repo.create_commit_from_filename(tag, self.filename1, message) def commit_filename2(self, tag, message=None): self.repo.create_commit_from_filename(tag, self.filename2, message) def test_can_commit_filenames_to_repository(self): self.commit_filename1('some-tag') def test_commit_tag_characters_are_limited(self): def will_raise_Exception(): self.commit_filename1('illegal tag with spaces') self.assertRaises(Exception, will_raise_Exception) def test_commit_tag_must_be_non_empty(self): def will_raise_Exception(): self.commit_filename1('') self.assertRaises(Exception, will_raise_Exception) def test_repo_is_empty_to_start(self): self.assertEqual([], [c for c in self.repo]) def test_can_commit_files_and_list_commits(self): self.commit_filename1('some-tag') self.assertEqual(['some-tag'], [c.tag for c in self.repo]) def test_can_commit_and_retrieve_contents(self): self.commit_filename1('some-tag') commit = self.repo['some-tag'] restore_file = self.tempdir.getpath('file3') commit.get_contents_to_filename(restore_file) self.assertEqual(self.file1_contents, open(restore_file, 'rb').read()) def test_tags_are_unique(self): self.commit_filename1('some-tag') def will_raise_DuplicateTagError(): self.repo.create_commit_from_filename('some-tag', self.filename2) self.assertRaises(DuplicateTagError, will_raise_DuplicateTagError) def test_duplicate_tag_with_identical_contents_okay(self): self.commit_filename1('some-tag') self.commit_filename1('some-tag') commit = self.repo['some-tag'] restore_file = self.tempdir.getpath('file3') commit.get_contents_to_filename(restore_file) self.assertEqual(self.file1_contents, open(restore_file, 'rb').read()) def test_can_get_commit_before_a_given_commit(self): self.commit_filename1('a') self.commit_filename1('b') commit_b = self.repo['b'] self.assertEqual('a', self.repo.get_commit_before(commit_b).tag) def test_commit_before_first_raises_ValueError(self): self.commit_filename1('a') def will_raise_ValueError(): self.repo.get_commit_before(self.repo['a']) self.assertRaises(ValueError, will_raise_ValueError) def test_commits_are_sorted(self): self.commit_filename1('c') self.commit_filename1('a') self.commit_filename1('b') self.assertEqual(['a', 'b', 'c'], [c.tag for c in self.repo]) def test_can_delete_commits_before_a_specified_commit(self): self.commit_filename1('a') self.commit_filename1('b') self.commit_filename1('c') self.repo.delete_commits_before(self.repo['c']) self.assertEqual(['c'], [c.tag for c in self.repo]) def test_can_store_and_retrieve_message_with_commit(self): message = 'some-extra-data' self.commit_filename1('a', message) commit = self.repo['a'] self.assertEqual(message, commit.message) def test_message_characters_limited_to_alphanumeric_and_underscore(self): def will_raise_Exception(): self.commit_filename1('a', 'some illegal message') self.assertRaises(Exception, will_raise_Exception) def test_UTC_iso_datetime_is_a_valid_tag(self): self.commit_filename1(datetime.utcnow().isoformat()) def test_UTC_iso_datetime_is_a_valid_message(self): self.commit_filename1('a', datetime.utcnow().isoformat()) self.commit_filename1(datetime.utcnow().isoformat()) def test_empty_repo_has_zero_size(self): self.assertEqual(0, self.repo.get_repository_size()) def get_expected_size_from_contents(self, file_contents): expected_size = 0 for item in file_contents: expected_size += len(item) return expected_size def test_can_get_repo_size_one_commit(self): self.commit_filename1('a', 'A') self.assertEqual( self.get_expected_size_from_contents(self.file1_contents), self.repo.get_repository_size()) def test_can_get_repo_size_many_different_commits(self): file_contents = [] self.commit_filename1('a', 'A') file_contents.append(self.file1_contents) self.commit_filename2('b', 'B') file_contents.append(self.file2_contents) self.commit_filename1('c', 'C') file_contents.append(self.file1_contents) self.commit_filename2('d', 'D') file_contents.append(self.file2_contents) expected_size = self.get_expected_size_from_contents(file_contents) self.assertEqual(expected_size, self.repo.get_repository_size()) def test_can_get_repo_size_after_delete(self): file_contents = [] self.commit_filename1('a', 'A') file_contents.append(self.file1_contents) self.commit_filename2('b', 'B') file_contents.append(self.file2_contents) self.commit_filename1('c', 'C') file_contents.append(self.file1_contents) self.commit_filename2('d', 'D') file_contents.append(self.file2_contents) self.repo.delete_commits_before(self.repo['d']) file_contents = file_contents[3:] expected_size = \ self.get_expected_size_from_contents(file_contents) self.assertEqual(expected_size, self.repo.get_repository_size()) def test_empty_repo_has_zero_items(self): self.assertEqual(0, self.repo.get_number_of_items()) def test_can_get_number_items_one_commit(self): file_contents = [] self.commit_filename1('a', 'A') file_contents.append(self.file1_contents) items = len(file_contents) self.assertEqual(items, self.repo.get_number_of_items()) def test_can_get_number_of_items_many_different_commits(self): file_contents = [] self.commit_filename1('a', 'A') file_contents.append(self.file1_contents) self.commit_filename2('b', 'B') file_contents.append(self.file2_contents) self.commit_filename1('c', 'C') file_contents.append(self.file1_contents) self.commit_filename2('d', 'D') file_contents.append(self.file2_contents) expected_size = len(file_contents) self.assertEqual(expected_size, self.repo.get_number_of_items()) def test_can_get_number_of_items_after_delete(self): file_contents = [] self.commit_filename1('a', 'A') file_contents.append(self.file1_contents) self.commit_filename2('b', 'B') file_contents.append(self.file2_contents) self.commit_filename1('c', 'C') file_contents.append(self.file1_contents) self.commit_filename2('d', 'D') file_contents.append(self.file2_contents) self.repo.delete_commits_before(self.repo['d']) file_contents = file_contents[3:] expected_size = len(file_contents) self.assertEqual(expected_size, self.repo.get_number_of_items())
class BcolzMinuteBarTestCase(TestCase): @classmethod def setUpClass(cls): cls.env = TradingEnvironment() all_market_opens = cls.env.open_and_closes.market_open indexer = all_market_opens.index.slice_indexer( start=TEST_CALENDAR_START, end=TEST_CALENDAR_STOP ) cls.market_opens = all_market_opens[indexer] cls.test_calendar_start = cls.market_opens.index[0] cls.test_calendar_stop = cls.market_opens.index[-1] def setUp(self): self.dir_ = TempDirectory() self.dir_.create() self.dest = self.dir_.getpath('minute_bars') os.makedirs(self.dest) self.writer = BcolzMinuteBarWriter( TEST_CALENDAR_START, self.dest, self.market_opens, US_EQUITIES_MINUTES_PER_DAY, ) self.reader = BcolzMinuteBarReader(self.dest) def tearDown(self): self.dir_.cleanup() def test_write_one_ohlcv(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_two_bars(self): minute_0 = self.market_opens[self.test_calendar_start] minute_1 = minute_0 + timedelta(minutes=1) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=[minute_0, minute_1]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute_0, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute_0, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute_0, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute_0, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute_0, 'volume') self.assertEquals(50.0, volume_price) open_price = self.reader.get_value(sid, minute_1, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute_1, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute_1, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute_1, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute_1, 'volume') self.assertEquals(51.0, volume_price) def test_write_on_second_day(self): second_day = self.test_calendar_start + 1 minute = self.market_opens[second_day] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) def test_write_empty(self): minute = self.market_opens[self.test_calendar_start] sid = 1 data = DataFrame( data={ 'open': [0], 'high': [0], 'low': [0], 'close': [0], 'volume': [0] }, index=[minute]) self.writer.write(sid, data) open_price = self.reader.get_value(sid, minute, 'open') assert_almost_equal(nan, open_price) high_price = self.reader.get_value(sid, minute, 'high') assert_almost_equal(nan, high_price) low_price = self.reader.get_value(sid, minute, 'low') assert_almost_equal(nan, low_price) close_price = self.reader.get_value(sid, minute, 'close') assert_almost_equal(nan, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') assert_almost_equal(0, volume_price) def test_write_on_multiple_days(self): tds = self.market_opens.index days = tds[tds.slice_indexer( start=self.test_calendar_start + 1, end=self.test_calendar_start + 3 )] minutes = DatetimeIndex([ self.market_opens[days[0]] + timedelta(minutes=60), self.market_opens[days[1]] + timedelta(minutes=120), ]) sid = 1 data = DataFrame( data={ 'open': [10.0, 11.0], 'high': [20.0, 21.0], 'low': [30.0, 31.0], 'close': [40.0, 41.0], 'volume': [50.0, 51.0] }, index=minutes) self.writer.write(sid, data) minute = minutes[0] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(10.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(20.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(30.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(40.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(50.0, volume_price) minute = minutes[1] open_price = self.reader.get_value(sid, minute, 'open') self.assertEquals(11.0, open_price) high_price = self.reader.get_value(sid, minute, 'high') self.assertEquals(21.0, high_price) low_price = self.reader.get_value(sid, minute, 'low') self.assertEquals(31.0, low_price) close_price = self.reader.get_value(sid, minute, 'close') self.assertEquals(41.0, close_price) volume_price = self.reader.get_value(sid, minute, 'volume') self.assertEquals(51.0, volume_price) def test_no_overwrite(self): minute = self.market_opens[TEST_CALENDAR_START] sid = 1 data = DataFrame( data={ 'open': [10.0], 'high': [20.0], 'low': [30.0], 'close': [40.0], 'volume': [50.0] }, index=[minute]) self.writer.write(sid, data) with self.assertRaises(BcolzMinuteOverlappingData): self.writer.write(sid, data)
class SendFileResponseTest(TestCase): u""" Tests ``send_file_response()`` function. Checks that regular files are sent correctly, but sending non-regular or non-existent files raises an exception. Also checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header, the file is sent only if it was changes since then. Finally checks if ``Last-Modified``, ``Content-Disposition`` and ``Content-Length`` headers are set correctly. """ def file_view(request): path = request.GET[u'path'] name = request.GET[u'name'] content_type = request.GET[u'content-type'] return send_file_response(request, path, name, content_type) urls = patterns( u'', url(r'^file/$', file_view), ) def setUp(self): self.tempdir = TempDirectory() def tearDown(self): self.tempdir.cleanup() def _create_file(self, filename=u'myfile.tmp', content=u'Some text.'): self.tempdir.write(filename, content) return self.tempdir.getpath(filename) def _request_file(self, path, name=u'filename.bin', content_type=u'text/plain', **kwargs): params = urlencode({ u'path': path, u'name': name, u'content-type': content_type }) return self.client.get(u'/file/?%s' % params, **kwargs) def _check_response(self, response, klass, status_code): self.assertIs(type(response), klass) self.assertEqual(response.status_code, status_code) def _check_content(self, response, path): with open(path, 'rb') as f: content = f.read() self.assertEqual(u''.join(response.streaming_content), content) def test_regular_file(self): path = self._create_file() response = self._request_file(path) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_directory_raises_exception(self): with self.assertRaisesMessage(OSError, u'Not a regular file: /'): response = self._request_file(u'/') def test_nonexistent_file_raises_exception(self): with self.assertRaisesMessage( OSError, u"[Errno 2] No such file or directory: '/nonexistent.txt'"): response = self._request_file(u'/nonexistent.txt') def test_random_file(self): content = random_string(random.randrange(1000, 2000)) path = self._create_file(content=content) response = self._request_file(path) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_if_modified_since_with_modified_file(self): u""" Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was indeed modified since then, the file is sent. """ modified_timestamp = 1413500000 if_modified_since_timestamp = modified_timestamp + 1000000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file( path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp)) self._check_response(response, HttpResponseNotModified, 304) def test_if_modified_since_with_unmodified_file(self): u""" Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was NOT modified since then, the file is not sent. """ modified_timestamp = 1413500000 if_modified_since_timestamp = modified_timestamp - 1000000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file( path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp)) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_last_modified_response_header(self): modified_timestamp = 1413500000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file(path) self.assertEqual(response[u'Last-Modified'], u'Thu, 16 Oct 2014 22:53:20 GMT') def test_content_length_header(self): path = self._create_file(content=u'1234567890') response = self._request_file(path) self.assertEqual(response[u'Content-Length'], u'10') def test_content_length_header_for_random_file(self): content = random_string(random.randrange(1000, 2000)) path = self._create_file(content=content) response = self._request_file(path) self.assertEqual(response[u'Content-Length'], str(len(content))) def test_content_disposition_header(self): path = self._create_file() response = self._request_file(path, u'thefile.txt') self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''thefile.txt") def test_content_disposition_header_with_space(self): path = self._create_file() response = self._request_file(path, u'the file.txt') self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''the%20file.txt") def test_content_disposition_header_with_diacritic(self): path = self._create_file() response = self._request_file(path, u'ľťéŠÝÄÚ.txt') self.assertEqual( response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%C4%BE%C5%A5%C3%A9%C5%A0%C3%9D%C3%84%C3%9A.txt" ) def test_content_disposition_header_with_random_unicode_junk(self): path = self._create_file() name = random_string( 20, chars= u'BacòԉíρsûϻᏧolrѕìtãmeéӽѵ߀ɭpèлuin.Iüà,ɦëǥhƅɢïêgԁSùúâɑfäbƃdkϳɰյƙyáFХ-åɋw' ) response = self._request_file(path, name) self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%s" % urlquote(name))
class Test_archivepgsql_BasicCommandLineOperation(TestCase): ARCHIVEPGSQL_PATH = os.path.join('bbpgsql', 'cmdline_scripts') CONFIG_FILE = 'config.ini' exe_script = 'archivewal' def setUp(self): self.setup_environment() self.setup_config() self.cmd = [self.exe_script, '--dry-run', '--config', self.config_path] def setup_environment(self): self.env = deepcopy(os.environ) self.env['PATH'] = ''.join([ self.env['PATH'], ':', self.ARCHIVEPGSQL_PATH]) self.tempdir = TempDirectory() def setup_config(self): self.storage_path = self.tempdir.makedir('repo') self.config_path = self.tempdir.getpath(self.CONFIG_FILE) self.log_file = self.tempdir.getpath('bbpgsql.log') self.config_dict = { 'WAL': { 'driver': 'filesystem', 'path': self.storage_path, }, 'General': { 'pgsql_data_directory': self.tempdir.path, }, 'Logging': { 'logfile': self.log_file, 'level': 'DEBUG', }, } write_config_to_filename(self.config_dict, self.config_path) #print '----' #print open(self.config_path, 'rb').read() #print '----' self.pg_xlog_path = 'pg_xlog' self.tempdir.makedir(self.pg_xlog_path) self.wal_basename = '00001' self.wal_filename = os.path.join(self.pg_xlog_path, self.wal_basename) self.tempdir.write(self.wal_filename, '') print 'TEMPDIR', self.tempdir.listdir(recursive=True) def tearDown(self): self.tempdir.cleanup() def test_archivewal_returns_error_with_if_less_than_one_argument(self): proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT) proc.wait() print(proc.stdout.read()) self.assertNotEqual(0, proc.returncode) def test_archivewal_logs_error_with_if_less_than_one_argument(self): proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT) proc.wait() self.assertNotEqual(0, proc.returncode) print proc.stdout.read() log_output = open(self.log_file, 'rb').read() print 'log_output:' print log_output assert 'ERROR' in log_output def test_archivewal_success_with_file(self): self.cmd.append(self.wal_filename) proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT) proc.wait() print proc.stdout.read() self.assertEqual(0, proc.returncode) def test_archivewal_actually_archives_file(self): self.cmd.append(self.wal_filename) proc = Popen(self.cmd, env=self.env, stdout=PIPE, stderr=STDOUT) proc.wait() print proc.stdout.read() self.assertEqual(0, proc.returncode) archives = os.listdir(self.storage_path) print archives self.assertTrue(archives[0].startswith(''.join([ self.wal_basename, '_'])))
class SendFileResponseTest(TestCase): u""" Tests ``send_file_response()`` function. Checks that regular files are sent correctly, but sending non-regular or non-existent files raises an exception. Also checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header, the file is sent only if it was changes since then. Finally checks if ``Last-Modified``, ``Content-Disposition`` and ``Content-Length`` headers are set correctly. """ def file_view(request): path = request.GET[u'path'] name = request.GET[u'name'] content_type = request.GET[u'content-type'] return send_file_response(request, path, name, content_type) urls = patterns(u'', url(r'^file/$', file_view), ) def setUp(self): self.tempdir = TempDirectory() def tearDown(self): self.tempdir.cleanup() def _create_file(self, filename=u'myfile.tmp', content=u'Some text.'): self.tempdir.write(filename, content) return self.tempdir.getpath(filename) def _request_file(self, path, name=u'filename.bin', content_type=u'text/plain', **kwargs): params = urlencode({u'path': path, u'name': name, u'content-type': content_type}) return self.client.get(u'/file/?%s' % params, **kwargs) def _check_response(self, response, klass, status_code): self.assertIs(type(response), klass) self.assertEqual(response.status_code, status_code) def _check_content(self, response, path): with open(path, 'rb') as f: content = f.read() self.assertEqual(u''.join(response.streaming_content), content) def test_regular_file(self): path = self._create_file() response = self._request_file(path) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_directory_raises_exception(self): with self.assertRaisesMessage(OSError, u'Not a regular file: /'): response = self._request_file(u'/') def test_nonexistent_file_raises_exception(self): with self.assertRaisesMessage(OSError, u"[Errno 2] No such file or directory: '/nonexistent.txt'"): response = self._request_file(u'/nonexistent.txt') def test_random_file(self): content = random_string(random.randrange(1000, 2000)) path = self._create_file(content=content) response = self._request_file(path) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_if_modified_since_with_modified_file(self): u""" Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was indeed modified since then, the file is sent. """ modified_timestamp = 1413500000 if_modified_since_timestamp = modified_timestamp + 1000000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file(path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp)) self._check_response(response, HttpResponseNotModified, 304) def test_if_modified_since_with_unmodified_file(self): u""" Checks that if the request has ``HTTP_IF_MODIFIED_SINCE`` header and the file was NOT modified since then, the file is not sent. """ modified_timestamp = 1413500000 if_modified_since_timestamp = modified_timestamp - 1000000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file(path, HTTP_IF_MODIFIED_SINCE=http_date(if_modified_since_timestamp)) self._check_response(response, FileResponse, 200) self._check_content(response, path) def test_last_modified_response_header(self): modified_timestamp = 1413500000 path = self._create_file() os.utime(path, (modified_timestamp, modified_timestamp)) response = self._request_file(path) self.assertEqual(response[u'Last-Modified'], u'Thu, 16 Oct 2014 22:53:20 GMT') def test_content_length_header(self): path = self._create_file(content=u'1234567890') response = self._request_file(path) self.assertEqual(response[u'Content-Length'], u'10') def test_content_length_header_for_random_file(self): content = random_string(random.randrange(1000, 2000)) path = self._create_file(content=content) response = self._request_file(path) self.assertEqual(response[u'Content-Length'], str(len(content))) def test_content_disposition_header(self): path = self._create_file() response = self._request_file(path, u'thefile.txt') self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''thefile.txt") def test_content_disposition_header_with_space(self): path = self._create_file() response = self._request_file(path, u'the file.txt') self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''the%20file.txt") def test_content_disposition_header_with_diacritic(self): path = self._create_file() response = self._request_file(path, u'ľťéŠÝÄÚ.txt') self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%C4%BE%C5%A5%C3%A9%C5%A0%C3%9D%C3%84%C3%9A.txt") def test_content_disposition_header_with_random_unicode_junk(self): path = self._create_file() name = random_string(20, chars=u'BacòԉíρsûϻᏧolrѕìtãmeéӽѵ߀ɭpèлuin.Iüà,ɦëǥhƅɢïêgԁSùúâɑfäbƃdkϳɰյƙyáFХ-åɋw') response = self._request_file(path, name) self.assertEqual(response[u'Content-Disposition'], u"attachment; filename*=UTF-8''%s" % urlquote(name))