Beispiel #1
0
 def test_get_all_date_file_to_task_instances(self):
     ADay = manager.load_a_task_by_name("ADay")
     BDay = manager.load_a_task_by_name("BDay")
     files = manager.get_all_date_file_to_task_instances(
         "20140901-20140903", [ADay, BDay])
     self.assertEqual([
         '/foobar/2014-09-01/a_day.json', '/foobar/2014-09-01/b_day.json',
         '/foobar/2014-09-02/a_day.json', '/foobar/2014-09-02/b_day.json',
         '/foobar/2014-09-03/a_day.json', '/foobar/2014-09-03/b_day.json'
     ], sorted(files.keys()))
Beispiel #2
0
 def test_get_all_date_file_to_task_instances(self):
     ADay = manager.load_a_task_by_name("ADay")
     BDay = manager.load_a_task_by_name("BDay")
     files = manager.get_all_date_file_to_task_instances("20140901-20140903", [ADay, BDay])
     self.assertEqual(['/foobar/2014-09-01/a_day.json',
                       '/foobar/2014-09-01/b_day.json',
                       '/foobar/2014-09-02/a_day.json',
                       '/foobar/2014-09-02/b_day.json',
                       '/foobar/2014-09-03/a_day.json',
                       '/foobar/2014-09-03/b_day.json'],
                      sorted(files.keys()))
Beispiel #3
0
    def test_Loader(self):
        self.assertEqual(
            manager.load_a_task_by_name("ADay"),
            manager.load_a_task_by_name("a_day"),
        )

        self.assertRaises(
            AssertionError,
            lambda: manager.load_a_task_by_name("not_exists_day"),
        )
        os.chdir(RootDir)
    def test_multiple_luiti_tasks(self):
        DDay = manager.load_a_task_by_name("DDay")
        HDay = manager.load_a_task_by_name("HDay")

        DDay_task = DDay(day_arrow)
        self.assertEqual(DDay_task.HDay, HDay)
        self.assertEqual(DDay_task.total_count, 12)

        # hash is luigi's test task unique method
        method = hash
        self.assertEqual(method(DDay_task.HDay_task), method(HDay(day_arrow)))
Beispiel #5
0
    def test_multiple_luiti_tasks(self):
        DDay = manager.load_a_task_by_name("DDay")
        HDay = manager.load_a_task_by_name("HDay")

        DDay_task = DDay(day_arrow)
        self.assertEqual(DDay_task.HDay, HDay)
        self.assertEqual(DDay_task.total_count, 12)

        # hash is luigi's test task unique method
        method = hash
        self.assertEqual(method(DDay_task.HDay_task), method(HDay(day_arrow)))
Beispiel #6
0
    def test_Loader(self):
        self.assertEqual(
            manager.load_a_task_by_name("ADay"),
            manager.load_a_task_by_name("a_day"),
        )

        self.assertRaises(
            AssertionError,
            lambda: manager.load_a_task_by_name("not_exists_day"),
        )
        os.chdir(RootDir)
Beispiel #7
0
    def test_find_dep_on_tasks(self):
        # simple case
        # ADay is dep on BDay, ADay is inputed into BDay.
        BDay = manager.load_a_task_by_name("BDay")
        dep_tasks_by_BDay = manager.find_dep_on_tasks(BDay, manager.ld.all_task_classes)
        self.assertEqual(len(dep_tasks_by_BDay), 1)
        self.assertEqual(dep_tasks_by_BDay[0].__name__, "ADay")

        # complex case
        #   MultipleDependentDay => HDay => DDay
        #   delete MultipleDependentDay, and delete HDay and DDay.
        MultipleDependentDay = manager.load_a_task_by_name("MultipleDependentDay")
        dep_tasks_by_MultipleDependentDay = manager.find_dep_on_tasks(MultipleDependentDay, manager.ld.all_task_classes)
        self.assertEqual(len(dep_tasks_by_MultipleDependentDay), 2)
        self.assertEqual(sorted(map(lambda i1: i1.__name__, dep_tasks_by_MultipleDependentDay)), ["DDay", "HDay"])
Beispiel #8
0
    def test_find_dep_on_tasks(self):
        # simple case
        # ADay is dep on BDay, ADay is inputed into BDay.
        BDay = manager.load_a_task_by_name("BDay")
        dep_tasks_by_BDay = manager.find_dep_on_tasks(BDay, manager.ld.all_task_classes)
        self.assertEqual(len(dep_tasks_by_BDay), 1)
        self.assertEqual(dep_tasks_by_BDay[0].__name__, "ADay")

        # complex case
        #   MultipleDependentDay => HDay => DDay
        #   delete MultipleDependentDay, and delete HDay and DDay.
        MultipleDependentDay = manager.load_a_task_by_name("MultipleDependentDay")
        dep_tasks_by_MultipleDependentDay = manager.find_dep_on_tasks(MultipleDependentDay, manager.ld.all_task_classes)
        self.assertEqual(len(dep_tasks_by_MultipleDependentDay), 2)
        self.assertEqual(sorted(map(lambda i1: i1.__name__, dep_tasks_by_MultipleDependentDay)), ["DDay", "HDay"])
Beispiel #9
0
    def test_Table(self):
        # TODO add more tests
        from luiti.manager.table import Table
        ADay = manager.load_a_task_by_name("ADay")
        self.assertEqual(Table.print_task_info(ADay), ([['Tasks self dep on', "['BDay', 'CDay']"], ['Tasks dep on self', '[]']], ['Task name', 'ADay']))

        from luiti.manager.lazy_data import ld
        self.assertTrue(len(Table.print_all_tasks(ld.result)[0]) > 6, """Example data is ([[1, 'ADay', 'project_A'], [2, 'BDay', 'project_A'], [3, 'CDay', 'project_A'], [4, 'DDay', 'project_A'], [5, 'FoobarDay', 'project_A'], [6, 'HDay', 'project_B'], [7, 'ImportPackagesDay', 'project_A'], [8, 'MultipleDependentDay', 'project_A'], ['total', 8, '']], ['', 'All Tasks', 'luiti_package'])""")
    def test_ref_tasks(self):
        ADay = manager.load_a_task_by_name("ADay")
        BDay = manager.load_a_task_by_name("BDay")
        CDay = manager.load_a_task_by_name("CDay")

        ADay_task = ADay(day_arrow)

        self.assertEqual(ADay_task.BDay, BDay)
        self.assertEqual(ADay_task.CDay, CDay)

        self.assertEqual(ADay_task.count, 1)
        self.assertEqual(ADay_task.BDay_task.count, 2)
        self.assertEqual(ADay_task.CDay_task.count, 3)
        self.assertEqual(ADay_task.total_count, 6)

        self.assertEqual(ADay_task.date_value, ADay_task.BDay_task.date_value)
        self.assertEqual(ADay_task.date_value, ADay_task.CDay_task.date_value)
Beispiel #11
0
    def test_load_all_tasks(self):
        all_tasks = manager.load_all_tasks()
        self.assertEqual(manager.ld.result,
                         all_tasks)  # cause they'are linked.

        HDay = manager.load_a_task_by_name("HDay")
        self.assertTrue(HDay in manager.ld.all_task_classes,
                        "project B is also loaded.")
Beispiel #12
0
    def test_Table(self):
        # TODO add more tests
        from luiti.manager.table import Table
        ADay = manager.load_a_task_by_name("ADay")
        self.assertEqual(Table.print_task_info(ADay), ([['Tasks self dep on', "['BDay', 'CDay']"], ['Tasks dep on self', '[]']], ['Task name', 'ADay']))

        from luiti.manager.lazy_data import ld
        self.assertTrue(len(Table.print_all_tasks(ld.result)[0]) > 6, """Example data is ([[1, 'ADay', 'project_A'], [2, 'BDay', 'project_A'], [3, 'CDay', 'project_A'], [4, 'DDay', 'project_A'], [5, 'FoobarDay', 'project_A'], [6, 'HDay', 'project_B'], [7, 'ImportPackagesDay', 'project_A'], [8, 'MultipleDependentDay', 'project_A'], ['total', 8, '']], ['', 'All Tasks', 'luiti_package'])""")
Beispiel #13
0
    def test_ref_tasks(self):
        ADay = manager.load_a_task_by_name("ADay")
        BDay = manager.load_a_task_by_name("BDay")
        CDay = manager.load_a_task_by_name("CDay")

        ADay_task = ADay(day_arrow)

        self.assertEqual(ADay_task.BDay, BDay)
        self.assertEqual(ADay_task.CDay, CDay)

        self.assertEqual(ADay_task.count, 1)
        self.assertEqual(ADay_task.BDay_task.count, 2)
        self.assertEqual(ADay_task.CDay_task.count, 3)
        self.assertEqual(ADay_task.total_count, 6)

        self.assertEqual(ADay_task.date_value, ADay_task.BDay_task.date_value)
        self.assertEqual(ADay_task.date_value, ADay_task.CDay_task.date_value)
Beispiel #14
0
 def test_egg_zip_python_package(self):
     """
     TODO improve tests, now maybe it's meanless.
     """
     ImportPackagesDay = manager.load_a_task_by_name("ImportPackagesDay")
     self.assertTrue("zip_package_by_luiti" in ImportPackagesDay(
         day_arrow).egg_library.__path__[0])
     import zip_package_by_luiti
     import zip_package_by_luiti.subfold
     zip_package_by_luiti.subfold
Beispiel #15
0
    def test_read_all_required_tasks(self):
        BetaReportDay = manager.load_a_task_by_name("BetaReportDay")
        ss = SensorSchedule(BetaReportDay, "2014-09-01", False)

        result = map(lambda i1: i1.task_clsname,
                     ss.ordered_task_instances_list)
        self.assertEqual(result, [
            'DumpBrowserMapDay', 'DumpWebLogDay', 'CleanWebLogDay',
            'CounterVisitorByBrowserDay', 'CounterVisitorByRegionDay',
            'CounterVisitorDay', 'BetaReportDay'
        ])
 def test_egg_zip_python_package(self):
     """
     TODO improve tests, now maybe it's meanless.
     """
     ImportPackagesDay = manager.load_a_task_by_name("ImportPackagesDay")
     self.assertTrue(
         "zip_package_by_luiti" in
         ImportPackagesDay(day_arrow).egg_library.__path__[0])
     import zip_package_by_luiti
     import zip_package_by_luiti.subfold
     zip_package_by_luiti.subfold
Beispiel #17
0
        def serialize_and_unserialize_a_task_instance(cls_name, serialize):
            task_cls = manager.load_a_task_by_name(cls_name)
            task_instance = task_cls(day_arrow)

            task_instance_2 = serialize.loads(serialize.dumps(task_instance))
            # already set when in serialize.laod
            package_name_2 = getattr(task_instance_2, "package_name")
            self.assertEqual(package_name_2, "project_A")

            self.assertEqual(hash(task_instance), hash(task_instance_2))

            for ref_task_name_3 in task_cls._ref_tasks:
                self.assertEqual(getattr(task_instance, ref_task_name_3),
                                 getattr(task_instance_2, ref_task_name_3))
                self.assertEqual(
                    hash(getattr(task_instance, ref_task_name_3 + "_task")),
                    hash(getattr(task_instance_2, ref_task_name_3 + "_task")))
        def serialize_and_unserialize_a_task_instance(cls_name, serialize):
            task_cls = manager.load_a_task_by_name(cls_name)
            task_instance = task_cls(day_arrow)

            task_instance_2 = serialize.loads(serialize.dumps(task_instance))
            # already set when in serialize.laod
            package_name_2 = getattr(task_instance_2, "package_name")
            self.assertEqual(package_name_2, "project_A")

            self.assertEqual(hash(task_instance), hash(task_instance_2))

            for ref_task_name_3 in task_cls._ref_tasks:
                self.assertEqual(
                    getattr(task_instance, ref_task_name_3),
                    getattr(task_instance_2, ref_task_name_3))
                self.assertEqual(
                    hash(getattr(task_instance, ref_task_name_3 + "_task")),
                    hash(getattr(task_instance_2, ref_task_name_3 + "_task")))
    def test_run_python_on_distributed_system(self):
        # 1. setup env
        import luigi.hadoop
        tar_dir = "/tmp/luiti_tests/tmp"
        tar_name = "project_A.tar"
        tar_file = tar_dir + "/" + tar_name
        os.system("mkdir -p %s" % tar_dir)

        DDay = manager.load_a_task_by_name("DDay")
        DDay_task = DDay("2014-09-01")

        # 2. package it
        import luiti
        import etl_utils
        import zip_package_by_luiti
        # mimic luigi.hadoop.create_packages_archive
        new_packages = list(
            luiti_config.luiti_tasks_packages) + \
            [__import__(DDay_task.__module__, None, None, 'dummy')] + \
            [luigi, luiti, etl_utils, zip_package_by_luiti]
        luigi.hadoop.create_packages_archive(new_packages, tar_file)

        # 3. unpackage it
        #    mimic luigi.mrrunner.Runner.extract_packages_archive
        os.chdir(tar_dir)
        import tarfile
        tar = tarfile.open(tar_name)
        for tarinfo in tar:
            tar.extract(tarinfo)
        tar.close()

        # 4. test
        unziped_items = os.listdir('.')
        self.assertTrue("etl_utils" in unziped_items)
        self.assertTrue("luigi" in unziped_items)
        self.assertTrue("luiti" in unziped_items)
        self.assertTrue("project_A" in unziped_items)
        self.assertTrue("project_B" in unziped_items)

        self.assertTrue("zip_package_by_luiti" in unziped_items)
        self.assertTrue("subfold" not in unziped_items)  # it's a subfold

        # 5. clean up
        os.system("rm -rf /tmp/luiti_tests")
Beispiel #20
0
    def test_run_python_on_distributed_system(self):
        # 1. setup env
        import luigi.hadoop
        tar_dir = "/tmp/luiti_tests/tmp"
        tar_name = "project_A.tar"
        tar_file = tar_dir + "/" + tar_name
        os.system("mkdir -p %s" % tar_dir)

        DDay = manager.load_a_task_by_name("DDay")
        DDay_task = DDay("2014-09-01")

        # 2. package it
        import luiti
        import etl_utils
        import zip_package_by_luiti
        # mimic luigi.hadoop.create_packages_archive
        new_packages = list(
            luiti_config.luiti_tasks_packages) + \
            [__import__(DDay_task.__module__, None, None, 'dummy')] + \
            [luigi, luiti, etl_utils, zip_package_by_luiti]
        luigi.hadoop.create_packages_archive(new_packages, tar_file)

        # 3. unpackage it
        #    mimic luigi.mrrunner.Runner.extract_packages_archive
        os.chdir(tar_dir)
        import tarfile
        tar = tarfile.open(tar_name)
        for tarinfo in tar:
            tar.extract(tarinfo)
        tar.close()

        # 4. test
        unziped_items = os.listdir('.')
        self.assertTrue("etl_utils" in unziped_items)
        self.assertTrue("luigi" in unziped_items)
        self.assertTrue("luiti" in unziped_items)
        self.assertTrue("project_A" in unziped_items)
        self.assertTrue("project_B" in unziped_items)

        self.assertTrue("zip_package_by_luiti" in unziped_items)
        self.assertTrue("subfold" not in unziped_items)  # it's a subfold

        # 5. clean up
        os.system("rm -rf /tmp/luiti_tests")
Beispiel #21
0
    def test_load_all_tasks(self):
        all_tasks = manager.load_all_tasks()
        self.assertEqual(manager.ld.result, all_tasks)  # cause they'are linked.

        HDay = manager.load_a_task_by_name("HDay")
        self.assertTrue(HDay in manager.ld.all_task_classes, "project B is also loaded.")
Beispiel #22
0
    def test_read_all_required_tasks(self):
        BetaReportDay = manager.load_a_task_by_name("BetaReportDay")
        ss = SensorSchedule(BetaReportDay, "2014-09-01", False)

        result = map(lambda i1: i1.task_clsname, ss.ordered_task_instances_list)
        self.assertEqual(result, ['DumpBrowserMapDay', 'DumpWebLogDay', 'CleanWebLogDay', 'CounterVisitorByBrowserDay', 'CounterVisitorByRegionDay', 'CounterVisitorDay', 'BetaReportDay'])