def test_get_all_date_file_to_task_instances(self): ADay = manager.load_a_task_by_name("ADay") BDay = manager.load_a_task_by_name("BDay") files = manager.get_all_date_file_to_task_instances( "20140901-20140903", [ADay, BDay]) self.assertEqual([ '/foobar/2014-09-01/a_day.json', '/foobar/2014-09-01/b_day.json', '/foobar/2014-09-02/a_day.json', '/foobar/2014-09-02/b_day.json', '/foobar/2014-09-03/a_day.json', '/foobar/2014-09-03/b_day.json' ], sorted(files.keys()))
def test_get_all_date_file_to_task_instances(self): ADay = manager.load_a_task_by_name("ADay") BDay = manager.load_a_task_by_name("BDay") files = manager.get_all_date_file_to_task_instances("20140901-20140903", [ADay, BDay]) self.assertEqual(['/foobar/2014-09-01/a_day.json', '/foobar/2014-09-01/b_day.json', '/foobar/2014-09-02/a_day.json', '/foobar/2014-09-02/b_day.json', '/foobar/2014-09-03/a_day.json', '/foobar/2014-09-03/b_day.json'], sorted(files.keys()))
def test_Loader(self): self.assertEqual( manager.load_a_task_by_name("ADay"), manager.load_a_task_by_name("a_day"), ) self.assertRaises( AssertionError, lambda: manager.load_a_task_by_name("not_exists_day"), ) os.chdir(RootDir)
def test_multiple_luiti_tasks(self): DDay = manager.load_a_task_by_name("DDay") HDay = manager.load_a_task_by_name("HDay") DDay_task = DDay(day_arrow) self.assertEqual(DDay_task.HDay, HDay) self.assertEqual(DDay_task.total_count, 12) # hash is luigi's test task unique method method = hash self.assertEqual(method(DDay_task.HDay_task), method(HDay(day_arrow)))
def test_find_dep_on_tasks(self): # simple case # ADay is dep on BDay, ADay is inputed into BDay. BDay = manager.load_a_task_by_name("BDay") dep_tasks_by_BDay = manager.find_dep_on_tasks(BDay, manager.ld.all_task_classes) self.assertEqual(len(dep_tasks_by_BDay), 1) self.assertEqual(dep_tasks_by_BDay[0].__name__, "ADay") # complex case # MultipleDependentDay => HDay => DDay # delete MultipleDependentDay, and delete HDay and DDay. MultipleDependentDay = manager.load_a_task_by_name("MultipleDependentDay") dep_tasks_by_MultipleDependentDay = manager.find_dep_on_tasks(MultipleDependentDay, manager.ld.all_task_classes) self.assertEqual(len(dep_tasks_by_MultipleDependentDay), 2) self.assertEqual(sorted(map(lambda i1: i1.__name__, dep_tasks_by_MultipleDependentDay)), ["DDay", "HDay"])
def test_Table(self): # TODO add more tests from luiti.manager.table import Table ADay = manager.load_a_task_by_name("ADay") self.assertEqual(Table.print_task_info(ADay), ([['Tasks self dep on', "['BDay', 'CDay']"], ['Tasks dep on self', '[]']], ['Task name', 'ADay'])) from luiti.manager.lazy_data import ld self.assertTrue(len(Table.print_all_tasks(ld.result)[0]) > 6, """Example data is ([[1, 'ADay', 'project_A'], [2, 'BDay', 'project_A'], [3, 'CDay', 'project_A'], [4, 'DDay', 'project_A'], [5, 'FoobarDay', 'project_A'], [6, 'HDay', 'project_B'], [7, 'ImportPackagesDay', 'project_A'], [8, 'MultipleDependentDay', 'project_A'], ['total', 8, '']], ['', 'All Tasks', 'luiti_package'])""")
def test_ref_tasks(self): ADay = manager.load_a_task_by_name("ADay") BDay = manager.load_a_task_by_name("BDay") CDay = manager.load_a_task_by_name("CDay") ADay_task = ADay(day_arrow) self.assertEqual(ADay_task.BDay, BDay) self.assertEqual(ADay_task.CDay, CDay) self.assertEqual(ADay_task.count, 1) self.assertEqual(ADay_task.BDay_task.count, 2) self.assertEqual(ADay_task.CDay_task.count, 3) self.assertEqual(ADay_task.total_count, 6) self.assertEqual(ADay_task.date_value, ADay_task.BDay_task.date_value) self.assertEqual(ADay_task.date_value, ADay_task.CDay_task.date_value)
def test_load_all_tasks(self): all_tasks = manager.load_all_tasks() self.assertEqual(manager.ld.result, all_tasks) # cause they'are linked. HDay = manager.load_a_task_by_name("HDay") self.assertTrue(HDay in manager.ld.all_task_classes, "project B is also loaded.")
def test_egg_zip_python_package(self): """ TODO improve tests, now maybe it's meanless. """ ImportPackagesDay = manager.load_a_task_by_name("ImportPackagesDay") self.assertTrue("zip_package_by_luiti" in ImportPackagesDay( day_arrow).egg_library.__path__[0]) import zip_package_by_luiti import zip_package_by_luiti.subfold zip_package_by_luiti.subfold
def test_read_all_required_tasks(self): BetaReportDay = manager.load_a_task_by_name("BetaReportDay") ss = SensorSchedule(BetaReportDay, "2014-09-01", False) result = map(lambda i1: i1.task_clsname, ss.ordered_task_instances_list) self.assertEqual(result, [ 'DumpBrowserMapDay', 'DumpWebLogDay', 'CleanWebLogDay', 'CounterVisitorByBrowserDay', 'CounterVisitorByRegionDay', 'CounterVisitorDay', 'BetaReportDay' ])
def test_egg_zip_python_package(self): """ TODO improve tests, now maybe it's meanless. """ ImportPackagesDay = manager.load_a_task_by_name("ImportPackagesDay") self.assertTrue( "zip_package_by_luiti" in ImportPackagesDay(day_arrow).egg_library.__path__[0]) import zip_package_by_luiti import zip_package_by_luiti.subfold zip_package_by_luiti.subfold
def serialize_and_unserialize_a_task_instance(cls_name, serialize): task_cls = manager.load_a_task_by_name(cls_name) task_instance = task_cls(day_arrow) task_instance_2 = serialize.loads(serialize.dumps(task_instance)) # already set when in serialize.laod package_name_2 = getattr(task_instance_2, "package_name") self.assertEqual(package_name_2, "project_A") self.assertEqual(hash(task_instance), hash(task_instance_2)) for ref_task_name_3 in task_cls._ref_tasks: self.assertEqual(getattr(task_instance, ref_task_name_3), getattr(task_instance_2, ref_task_name_3)) self.assertEqual( hash(getattr(task_instance, ref_task_name_3 + "_task")), hash(getattr(task_instance_2, ref_task_name_3 + "_task")))
def serialize_and_unserialize_a_task_instance(cls_name, serialize): task_cls = manager.load_a_task_by_name(cls_name) task_instance = task_cls(day_arrow) task_instance_2 = serialize.loads(serialize.dumps(task_instance)) # already set when in serialize.laod package_name_2 = getattr(task_instance_2, "package_name") self.assertEqual(package_name_2, "project_A") self.assertEqual(hash(task_instance), hash(task_instance_2)) for ref_task_name_3 in task_cls._ref_tasks: self.assertEqual( getattr(task_instance, ref_task_name_3), getattr(task_instance_2, ref_task_name_3)) self.assertEqual( hash(getattr(task_instance, ref_task_name_3 + "_task")), hash(getattr(task_instance_2, ref_task_name_3 + "_task")))
def test_run_python_on_distributed_system(self): # 1. setup env import luigi.hadoop tar_dir = "/tmp/luiti_tests/tmp" tar_name = "project_A.tar" tar_file = tar_dir + "/" + tar_name os.system("mkdir -p %s" % tar_dir) DDay = manager.load_a_task_by_name("DDay") DDay_task = DDay("2014-09-01") # 2. package it import luiti import etl_utils import zip_package_by_luiti # mimic luigi.hadoop.create_packages_archive new_packages = list( luiti_config.luiti_tasks_packages) + \ [__import__(DDay_task.__module__, None, None, 'dummy')] + \ [luigi, luiti, etl_utils, zip_package_by_luiti] luigi.hadoop.create_packages_archive(new_packages, tar_file) # 3. unpackage it # mimic luigi.mrrunner.Runner.extract_packages_archive os.chdir(tar_dir) import tarfile tar = tarfile.open(tar_name) for tarinfo in tar: tar.extract(tarinfo) tar.close() # 4. test unziped_items = os.listdir('.') self.assertTrue("etl_utils" in unziped_items) self.assertTrue("luigi" in unziped_items) self.assertTrue("luiti" in unziped_items) self.assertTrue("project_A" in unziped_items) self.assertTrue("project_B" in unziped_items) self.assertTrue("zip_package_by_luiti" in unziped_items) self.assertTrue("subfold" not in unziped_items) # it's a subfold # 5. clean up os.system("rm -rf /tmp/luiti_tests")
def test_read_all_required_tasks(self): BetaReportDay = manager.load_a_task_by_name("BetaReportDay") ss = SensorSchedule(BetaReportDay, "2014-09-01", False) result = map(lambda i1: i1.task_clsname, ss.ordered_task_instances_list) self.assertEqual(result, ['DumpBrowserMapDay', 'DumpWebLogDay', 'CleanWebLogDay', 'CounterVisitorByBrowserDay', 'CounterVisitorByRegionDay', 'CounterVisitorDay', 'BetaReportDay'])