Example #1
0
    def test_get_assets(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class Activities(core.Enum):
            DATETIME = 'Give date/time'

        PythonTime = self.PythonTime
        python = PythonTime('python')
        stepvariant_db_id = cache.id_step_variant(python,
                                                  (Activities.DATETIME, ))
        # 2-elements sources
        SrcCls = core.assetfactory('Source', [
            core.AssetAttr('reference', rnaseq.FASTAFile, ''),
            core.AssetAttr('otherreference', rnaseq.FASTAFile, ''),
            core.AssetAttr('listoffiles', rnaseq.CSVFileSequence, '')
        ])
        sources = SrcCls(
            rnaseq.FASTAFile('foo.fasta'), rnaseq.FASTAFile('bar.fasta'),
            rnaseq.CSVFileSequence(
                (rnaseq.CSVFile('baz.csv'), rnaseq.CSVFile('baz2.csv'))))
        targets = core.AssetSet()  # targets
        parameters = tuple()
        db_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                      parameters)
        for storedthing in cache.get_srcassets(db_id.id):
            thing = storedthing.resurrect(rnaseq)
Example #2
0
    def test_nconcrete_steps(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class Activities(core.Enum):
            DATETIME = 'Give date/time'

        PythonTime = self.PythonTime
        python = PythonTime('python')
        stepvariant_db_id = cache.id_step_variant(python,
                                                  (Activities.DATETIME, ))
        sources = core.AssetSet()  # source
        targets = core.AssetSet()  # targets
        parameters = tuple()
        self.assertEqual(0, cache.nconcrete_steps)
        taskstatuscount = cache.nconcrete_steps_status
        self.assertEqual(0, len(taskstatuscount))
        db_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                      parameters)
        self.assertEqual(1, cache.nconcrete_steps)
        taskstatuscount = cache.nconcrete_steps_status
        self.assertEqual(taskstatuscount[0].label, hortator._TASK_TODO)
        self.assertEqual(taskstatuscount[0].count, 1)
Example #3
0
    def test_id_step_variant_noexecutable(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class Activities(core.Enum):
            NOODLE = 'Noodle'

        class Foo(core.StepAbstract):
            _name = 'foo'
            _default_execpath = None
            activities = (core.DEFAULT_ACTIVITY.MISC, )
            Assets = None

            def __init__(self, executable):
                if executable is not None:
                    raise ValueError('No executable needed. Should be None.')
                self._execpath = None

            @property
            def version(self):
                return '0.1.0'

            def run(self):
                pass

        foo = Foo(None)
        db_id = cache.id_step_variant(foo, (Activities.NOODLE, ))
        self.assertTrue(db_id.new)
        self.assertTrue(isinstance(db_id.id, int))
        db_id_same = cache.id_step_variant(foo, (Activities.NOODLE, ))
        self.assertFalse(db_id_same.new)
        self.assertEqual(db_id.id, db_id_same.id)
Example #4
0
 def setUp(self):
     self.cache_file = tempfile.NamedTemporaryFile()
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     self.cache = hortator.PersistentTaskGraph(self.cache_file.name,
                                               model,
                                               force_create=True)
Example #5
0
    def test_id_step_variant(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class Activities(core.Enum):
            COMPRESS = 'Compress'

        class GZip(core.StepAbstract):
            _name = 'gzip'
            _default_execpath = 'gzip'
            activities = (core.DEFAULT_ACTIVITY.MISC, )
            Assets = None

            def __init__(self, executable=None):
                if executable is None:
                    self._execpath = _default_execpath
                else:
                    self._execpath = executable

            @property
            def version(self):
                res = subprocess.check_output([self._execpath, '--version'])
                version = res.split(linesep)[0]
                return version

            def run(self):
                pass

        gzip = GZip('gzip')
        db_id = cache.id_step_variant(gzip, (Activities.COMPRESS, ))
        self.assertTrue(isinstance(db_id.id, int))
Example #6
0
 def test_statuslist(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     cache = self.cls_to_test(self.cache_file.name,
                              model,
                              force_create=True)
     statuslist = cache.statuslist
     self.assertEqual(
         set((y, x) for x, y in hortator._TASK_STATUS_LIST.items()),
         set(statuslist))
Example #7
0
 def test_gettargetsofactivity(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     cache = self.cls_to_test(self.cache_file.name,
                              model,
                              force_create=True)
     split = Split(None)
     parameters = tuple()
     input_file = tempfile.NamedTemporaryFile(mode='w+')
     input_file.write('123')
     input_file.flush()
     head_file = tempfile.NamedTemporaryFile(mode='w+')
     tail_file = tempfile.NamedTemporaryFile(mode='w+')
     sources = split.Assets.Source(core.File(input_file.name))
     targets = split.Assets.Target(core.File(head_file.name),
                                   core.File(tail_file.name))
     self.assertRaises(ValueError, cache.get_targetsofactivity,
                       ActivitiesSplit.HEADTAIL)
     # ensure that step variant is tracked
     stepvariant_db_id = cache.id_step_variant(split, split.activities)
     self.assertRaises(ValueError, cache.get_targetsofactivity,
                       ActivitiesSplit.FOO)
     # get the targets of the activity (obviously there are not any yet)
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(0, len(res))
     # create a new task
     task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                     parameters)
     # there is only one such
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(2, len(res))
     #
     head_file = tempfile.NamedTemporaryFile(mode='w+')
     tail_file = tempfile.NamedTemporaryFile(mode='w+')
     targets_other = split.Assets.Target(core.File(head_file.name),
                                         core.File(tail_file.name))
     task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                           sources,
                                           targets_other,
                                           parameters=(1, ))
     res = cache.get_targetsofactivity(ActivitiesSplit.HEADTAIL)
     self.assertEqual(4, len(res))
     # --
     cat = Cat(None)
     concat = tempfile.NamedTemporaryFile(mode='w+')
     sources = cat.Assets.Source(
         core.FileSequence((targets.head, targets_other.head)))
     targets = cat.Assets.Target(core.File(concat.name))
     # ensure that step variant is tracked
     stepvariant_db_id = cache.id_step_variant(cat, cat.activities)
     task_id_cat = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                         targets, parameters)
     res = cache.get_targetsofactivity(ActivitiesSplit.MERGE)
     self.assertEqual(1, len(res))
Example #8
0
 def test_PersistentTaskGraph(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     # test the initialization
     cache = self.cls_to_test(self.cache_file.name,
                              model,
                              force_create=True)
     # check that it was created
     self.assertTrue(cache.created)
     # empty set of steps
     s = tuple(cache.iter_steps())
     self.assertEqual(0, len(s))
Example #9
0
 def test_reopen(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     #create
     cache = self.cls_to_test(self.cache_file.name,
                              model,
                              force_create=True)
     #reopen
     cache_2 = self.cls_to_test(self.cache_file.name, model)
     # check that it was not created
     self.assertFalse(cache_2.created)
     # check that the statuslist is matching (as it also means that the inserts
     # to set up the DB were committed.
     self.assertEqual(cache.statuslist, cache_2.statuslist)
Example #10
0
 def test_id_stored_sequence_len1(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     cache = self.cls_to_test(self.cache_file.name,
                              rnaseq,
                              force_create=True)
     db_id = cache.id_stored_sequence(core.FileSequence,
                                      ((core.File, 'hohoho.fastq'), ))
     self.assertTrue(db_id.new)
     self.assertEqual(1, db_id.id)
     # same will give the same id.
     db_id_same = cache.id_stored_sequence(core.FileSequence,
                                           ((core.File, 'hohoho.fastq'), ))
     self.assertFalse(db_id_same.new)
     self.assertEqual(db_id.id, db_id_same.id)
     # different will give a different id.
     db_id_differ = cache.id_stored_sequence(
         core.FileSequence, ((core.File, 'hahaha.fastq'), ))
     self.assertTrue(db_id_differ.new)
     self.assertNotEqual(db_id.id, db_id_differ.id)
Example #11
0
 def test_id_stored_entity(self):
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     cache = self.cls_to_test(self.cache_file.name,
                              rnaseq,
                              force_create=True)
     db_id = cache.id_stored_entity(rnaseq.FASTQPossiblyGzipCompressed,
                                    'hohoho.fastq')
     self.assertTrue(db_id.new)
     self.assertEqual(1, db_id.id)
     # same will give the same id.
     db_id_same = cache.id_stored_entity(rnaseq.FASTQPossiblyGzipCompressed,
                                         'hohoho.fastq')
     self.assertFalse(db_id_same.new)
     self.assertEqual(db_id.id, db_id_same.id)
     # different will give a different id.
     db_id_differ = cache.id_stored_entity(
         rnaseq.FASTQPossiblyGzipCompressed, 'hahaha.fastq')
     self.assertTrue(db_id_differ.new)
     self.assertNotEqual(db_id.id, db_id_differ.id)
Example #12
0
 def test_Step(self):
     #'id label classname entityname'
     fh = tempfile.NamedTemporaryFile(mode='w+')
     fh.write('foobarbaz')
     fh.flush()
     out_fh = tempfile.NamedTemporaryFile(suffix='.csv')
     src = (hortator.StoredEntity(1, 'file', 'File', fh.name), )
     targets = (hortator.StoredEntity(1, 'crc', 'CSVFile', out_fh.name), )
     #id status steptype_id executable clsname version
     sc = hortator.StepConcrete_DbEntry(1, hortator._TASK_TODO, 1, 1, None,
                                        'CRCHeadTail', None, ())
     parameters = tuple()
     step = hortator.Step(sc, src, targets, parameters, rnaseq)
     self.assertFalse(step.iscomplete())
     returncode = step.run()
     self.assertEqual(0, returncode)
     sc = hortator.StepConcrete_DbEntry(1, hortator._TASK_DONE, 1, 1, None,
                                        'CRCHeadTail', None, ())
     model = core.Model(
         tuple())  # getting away with an empty model for this step
     step = hortator.Step(sc, src, targets, parameters, model)
     self.assertTrue(step.iscomplete())
Example #13
0
    def test_gettargetsoftype(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class SplitCSV(Split):
            _name = 'split'
            activities = (core.DEFAULT_ACTIVITY.MISC, )
            version = '0.1'
            _default_execpath = 'None'

            class Assets(core.AssetsStep):
                Source = core.assetfactory('Source', [
                    core.AssetAttr('file', railroadtracks.model.files.CSVFile,
                                   '')
                ])
                Target = core.assetfactory('Target', [
                    core.AssetAttr('head', railroadtracks.model.files.CSVFile,
                                   ''),
                    core.AssetAttr('tail', railroadtracks.model.files.CSVFile,
                                   '')
                ])

            def run(self, assets, parameters=()):
                with open(assets.source.file, 'rb') as fh_in:
                    csv_r = csv.reader(fh_in)
                    with open(assets.target.head, 'wb') as fh_out:
                        csv_w = csv.writer(fh_out)
                        head = next(csv_r)
                        csv_w.writerow(head)
                    with open(assets.target.tail, 'wb') as fh_out:
                        csv_w = csv.writer(fh_out)
                        for row in csv_r:
                            csv_w.writerow(row)
                cmd = None
                returncode = 1
                return cmd, returncode

        split = Split(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        sources = split.Assets.Source(core.File(input_file.name))
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        stepvariant_db_id = cache.id_step_variant(split, split.activities)
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(0, len(res))
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(2, len(res))

        splitcsv = SplitCSV(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        tail_file = tempfile.NamedTemporaryFile(suffix=".csv", mode='w+')
        sources = splitcsv.Assets.Source(
            railroadtracks.model.files.CSVFile(input_file.name))
        targets = splitcsv.Assets.Target(
            railroadtracks.model.files.CSVFile(head_file.name),
            railroadtracks.model.files.CSVFile(tail_file.name))
        stepvariant_db_id = cache.id_step_variant(splitcsv,
                                                  splitcsv.activities)
        res = cache.get_targetsoftype(
            railroadtracks.model.files.CSVFile.__name__)
        self.assertEqual(0, len(res))
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        res = cache.get_targetsoftype(
            railroadtracks.model.files.CSVFile.__name__)
        self.assertEqual(2, len(res))

        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                              sources,
                                              targets,
                                              parameters=(1, ))
        res = cache.get_targetsoftype(core.File.__name__)
        self.assertEqual(4, len(res))
Example #14
0
    def test_get_parenttask_of_storedentity(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)
        split = Split(None)
        parameters = tuple()
        input_file = tempfile.NamedTemporaryFile(mode='w+')
        input_file.write('123')
        input_file.flush()
        head_file = tempfile.NamedTemporaryFile(mode='w+')
        tail_file = tempfile.NamedTemporaryFile(mode='w+')
        sources = split.Assets.Source(core.File(input_file.name))
        targets = split.Assets.Target(core.File(head_file.name),
                                      core.File(tail_file.name))
        # ensure that step variant is tracked
        stepvariant_db_id = cache.id_step_variant(split, split.activities)
        # create a new task
        task_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                        parameters)
        for sa in cache.get_srcassets(task_id):
            # all are root nodes
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertTrue(res is None)
        for sa in cache.get_targetassets(task_id):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id.id, res.id)
        #
        head_file_other = tempfile.NamedTemporaryFile(mode='w+')
        tail_file_other = tempfile.NamedTemporaryFile(mode='w+')
        targets_other = split.Assets.Target(core.File(head_file_other.name),
                                            core.File(tail_file_other.name))
        task_id_other = cache.id_stepconcrete(stepvariant_db_id.id,
                                              split.Assets.Source(
                                                  targets.tail),
                                              targets_other,
                                              parameters=(1, ))
        for sa in cache.get_srcassets(task_id_other):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id.id, res.id)
        for sa in cache.get_targetassets(task_id_other):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id_other.id, res.id)

        # --
        cat = Cat(None)
        sources_cat = cat.Assets.Source(
            core.FileSequence((targets.head, targets_other.head)))
        concat = tempfile.NamedTemporaryFile(mode='w+')
        targets_cat = cat.Assets.Target(core.File(concat.name))
        # ensure that step variant is tracked
        stepvariant_db_id_cat = cache.id_step_variant(cat, cat.activities)
        task_id_cat = cache.id_stepconcrete(stepvariant_db_id_cat.id,
                                            sources_cat, targets_cat,
                                            parameters)
        for sa in cache.get_srcassets(task_id_cat):
            if hasattr(sa, 'iter_storedentities'):
                for sa_sub, t in zip(sa.iter_storedentities(),
                                     (task_id, task_id_other)):
                    res = cache.get_parenttask_of_storedentity(sa_sub)
                    self.assertEqual(t.id, res.id)
            else:
                res = cache.get_parenttask_of_storedentity(sa)
                self.assertEqual(task_id_cat.id, res.id)

        head_file_other = tempfile.NamedTemporaryFile(mode='w+')
        tail_file_other = tempfile.NamedTemporaryFile(mode='w+')
        targets_other = split.Assets.Target(core.File(head_file_other.name),
                                            core.File(tail_file_other.name))
        task_id = cache.id_stepconcrete(
            stepvariant_db_id.id, split.Assets.Source(targets_cat.result),
            targets_other, tuple())
        for sa in cache.get_srcassets(task_id):
            res = cache.get_parenttask_of_storedentity(sa)
            self.assertEqual(task_id_cat.id, res.id)
Example #15
0
    def test_id_stepconcrete(self):
        model = core.Model(
            tuple())  # getting away with an empty model for this step
        cache = self.cls_to_test(self.cache_file.name,
                                 model,
                                 force_create=True)

        class Activities(core.Enum):
            DATETIME = 'Give date/time'

        PythonTime = self.PythonTime

        python = PythonTime('python')
        stepvariant_db_id = cache.id_step_variant(python,
                                                  (Activities.DATETIME, ))
        # empty sources is a special case
        sources = core.AssetSet()  # source
        targets = core.AssetSet()  # targets
        parameters = tuple()
        db_id = cache.id_stepconcrete(stepvariant_db_id.id, sources, targets,
                                      parameters)
        db_id_same = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                           targets, parameters)
        self.assertEqual(db_id.id, db_id_same.id)
        db_id_notthesame = cache.id_stepconcrete(stepvariant_db_id.id,
                                                 sources,
                                                 targets,
                                                 parameters,
                                                 tag=2)
        self.assertNotEqual(db_id.id, db_id_notthesame.id)

        # 1-element sources
        sources = railroadtracks.model.aligners.AssetsIndexer.Source(
            rnaseq.FASTAFile('foo.fasta'))

        db_id_nothesame = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                targets, parameters)
        self.assertNotEqual(db_id.id, db_id_nothesame.id)
        db_id_sameagain = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                targets, parameters)

        self.assertEqual(db_id_sameagain.id, db_id_nothesame.id)
        db_id_nothesameagain = cache.id_stepconcrete(stepvariant_db_id.id,
                                                     sources, targets,
                                                     ("%Y", ))
        self.assertNotEqual(db_id.id, db_id_nothesameagain.id)
        self.assertNotEqual(db_id_sameagain.id, db_id_nothesameagain.id)

        # 1-element sources, several parameters
        db_id_2params = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                              targets, ("%Y", "Z"))
        db_id_same2params = cache.id_stepconcrete(stepvariant_db_id.id,
                                                  sources, targets,
                                                  ("%Y", "Z"))
        self.assertEqual(db_id_2params.id, db_id_same2params.id)

        db_id_2otherparams = cache.id_stepconcrete(stepvariant_db_id.id,
                                                   sources, targets,
                                                   ("%Y", "W"))
        self.assertNotEqual(db_id_2params.id, db_id_2otherparams.id)

        # 2-elements sources
        SrcCls = core.assetfactory('Source', [
            core.AssetAttr('reference', rnaseq.FASTAFile, ''),
            core.AssetAttr('otherreference', rnaseq.FASTAFile, '')
        ])
        sources = SrcCls(rnaseq.FASTAFile('foo.fasta'),
                         rnaseq.FASTAFile('bar.fasta'))
        db_id_notthesame = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                 targets, parameters)
        self.assertNotEqual(db_id.id, db_id_notthesame.id)
        db_id_sameagain = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                targets, parameters)
        self.assertEqual(db_id_sameagain.id, db_id_notthesame.id)

        # 1-element source / 1-element target
        sources = railroadtracks.model.aligners.AssetsIndexer.Source(
            rnaseq.FASTAFile('foo.fasta'))
        targets = railroadtracks.model.aligners.AssetsIndexer.Target(
            rnaseq.FilePattern('foo_idx'))

        foo_sh = rnaseq.Anyscript()
        stepvariant_db_id = cache.id_step_variant(foo_sh,
                                                  (Activities.DATETIME, ))

        db_id_nothesame = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                targets, parameters)
        self.assertNotEqual(db_id.id, db_id_nothesame.id)
        db_id_sameagain = cache.id_stepconcrete(stepvariant_db_id.id, sources,
                                                targets, parameters)
        self.assertEqual(db_id_sameagain.id, db_id_nothesame.id)

        # fail if target assets are suddenly different
        targets_bar = railroadtracks.model.aligners.AssetsIndexer.Target(
            rnaseq.FilePattern('bar_idx'))
        self.assertRaises(ValueError, cache.id_stepconcrete,
                          stepvariant_db_id.id, sources, targets_bar,
                          parameters)