Esempio n. 1
0
def pythonize_goetia(klass, name):
    is_walker, _ = is_template_inst(name, 'UnitigWalker')
    if is_walker:

        # def wrap_walk(wrapped):
        #     def _walk(self, seed=None, mask=None):
        #         if mask is None:
        #             mask = std.set[type(self).hash_type]()
        #         if seed is None:
        #             return wrapped(self, mask)
        #         else:
        #             return wrapped(self, seed, mask)
        #     return _walk

        # klass.walk_left = wrap_walk(klass.walk_left)
        # klass.walk_right = wrap_walk(klass.walk_right)
        # klass.walk = wrap_walk(klass.walk)

        klass.cursor = property(klass.get_cursor)
        klass.cursor = klass.cursor.setter(
            lambda self, seed: self.set_cursor(seed))

        klass.Walk[gbl.goetia.DIR_LEFT].__str__ = klass.Walk[
            gbl.goetia.DIR_LEFT].to_string
        klass.Walk[gbl.goetia.DIR_RIGHT].__str__ = klass.Walk[
            gbl.goetia.DIR_RIGHT].to_string
Esempio n. 2
0
def pythonize_goetia(klass, name):
    if name == 'SourmashSketch':

        def to_sourmash(self):
            try:
                from sourmash import MinHash
            except ImportError:
                print(
                    'Must install python sourmash to convert to sourmash.MinHash',
                    file=sys.stderr)
                return None

            sig = MinHash(self.num(),
                          self.ksize(),
                          is_protein=self.is_protein(),
                          dayhoff=self.dayhoff(),
                          hp=self.hp(),
                          track_abundance=self.track_abundance(),
                          seed=self.seed(),
                          mins=self.mins(),
                          max_hash=self.max_hash())

            return sig

        klass.Sketch.to_sourmash = to_sourmash

    is_inst, template = utils.is_template_inst(name, 'UnikmerSketch')
    if is_inst:

        def to_numpy(self) -> np.ndarray:
            """
            Returns:
                numpy.ndarray: Numpy array with the signature vector.
            """
            buffer = self.get_sketch_as_buffer()
            buffer.reshape((len(self), ))
            return np.frombuffer(buffer, dtype=np.uint64, count=len(self))

        def __len__(self) -> int:
            return self.get_size()

        klass.Sketch.to_numpy = to_numpy
        klass.Sketch.__len__ = __len__

        def wrap_build(build_func):
            def wrapped(W, K, ukhs=None, storage_args=None):
                if ukhs is None:
                    ukhs = klass.ukhs_type.load(W, K)
                if (storage_args is None) or (len(storage_args) == 0):
                    sig = build_func(W, K, ukhs.__smartptr__())
                else:
                    params = klass.storage_type.make_params(*storage_args)
                    sig = build_func(W, K, ukhs.__smartptr__(), params)
                return sig

            return wrapped

        klass.Sketch.build = wrap_build(klass.Sketch.build)
Esempio n. 3
0
def pythonize_goetia(klass, name):
    is_fastx, _ = is_template_inst(name, 'FastxParser')
    if is_fastx:

        def __iter__(self):
            while not self.is_complete():
                record = self.next()
                if record:
                    yield record

        klass.__iter__ = __iter__

    is_split, _ = is_template_inst(name, 'SplitPairedReader')
    if is_split:

        def __iter__(self):
            while not self.is_complete():
                left, right = self.next()
                left, right = left.value() if left else None, right.value(
                ) if right else None
                if left is not None or right is not None:
                    yield left, right

        klass.__iter__ = __iter__
Esempio n. 4
0
def pythonize_goetia(klass, name):

    is_inst, _ = is_template_inst(name, 'FileProcessor')
    if is_inst:
        klass.advance.__release_gil__ = True
        klass.process.__release_gil__ = True

        def chunked_process(self, file, right_file=None):
            if type(file) in (str, bytes):
                from goetia.parsing import FastxParser, SplitPairedReader

                parser_type = FastxParser[type(self).alphabet]

                if right_file is None:
                    parser = parser_type.build(file)
                else:
                    parser = SplitPairedReader[parser_type].build(file,
                                                                  right_file)
            else:
                parser = file
            
            advancing = True
            prev_n_sequences = 0
            while advancing:
                n_sequences, time_elapsed, advancing = self.advance(parser)
                if n_sequences > prev_n_sequences:
                    yield n_sequences, time_elapsed, parser.n_skipped()
                prev_n_sequences = n_sequences


        def wrap_build(build_func):
            def wrapped(*args, interval=10000,
                               verbose=False):
                return build_func(*args, interval,
                                         verbose)
            return wrapped

        
        klass.build = wrap_build(klass.build)
        klass.chunked_process = chunked_process
Esempio n. 5
0
def pythonize_goetia(klass, name):
    is_dbg, _ = is_template_inst(name, 'dBG')
    if is_dbg:

        def add(self, item):
            if not isinstance(item, int) and len(item) < self.K:
                raise ValueError()
            elif isinstance(item, int) or len(item) == self.K:
                return self.insert(item)
            else:
                return self.insert_sequence(item)

        def wrap_query(func):
            def _get(self, item):
                return func(self, item)
            return _get

        def shallow_clone(self):
            return self.clone()

        def hashes(self, sequence):
            it = self.get_hash_iter(sequence)
            while not it.done():
                h = it.next()
                yield h

        def left_degree(self, kmer):
            self.set_cursor(kmer)
            return self.in_degree()

        def right_degree(self, kmer):
            self.set_cursor(kmer)
            return self.out_degree()

        def wrap_get(func):
            def _get(self):
                return func(self)
            return _get

        def wrap_vector_ret(func):
            def wrapped(self, *args, **kwargs):
                return [item for item in func(self, *args, **kwargs)]
            return wrapped

        def wrap_walk(func):
            pass

        klass.add = add
        klass.get_hash = wrap_get(klass.get)
        klass.get = wrap_query(klass.query)
        klass.left_degree = left_degree
        klass.right_degree = right_degree
        klass.shallow_clone = shallow_clone
        klass.hashes = hashes

        klass.left_extensions = wrap_vector_ret(klass.left_extensions)
        klass.right_extensions = wrap_vector_ret(klass.right_extensions)
        #klass.filter_nodes = wrap_vector_ret(klass.filter_nodes)
        #klass.in_neighbors = wrap_vector_ret(klass.in_neighbors)
        #klass.out_neighbors = wrap_vector_ret(klass.out_neighbors)

    is_pdbg, _ = is_template_inst(name, 'PdBG')
    if is_pdbg:
        def wrap_build(build_func):
            def wrapped(W, K, *storage_args, ukhs=None):
                if ukhs is None:
                    ukhs = klass.ukhs_type.load(W, K)
                if len(storage_args) == 0:
                    params = klass.base_storage_type.default_params
                elif len(storage_args) == 1 and \
                     isinstance(storage_args[0], klass.base_storage_type.params_type):
                    params = storage_args[0]
                else:
                    params = klass.base_storage_type.make_params(*storage_args)
                return build_func(W, K, ukhs, params)
            return wrapped

        klass.build = wrap_build(klass.build)
Esempio n. 6
0
def pythonize_goetia(klass, name):

    ukhs_inst, template = is_template_inst(name, 'UKHS')
    if ukhs_inst:

        def parse_unikmers(W, K):
            import gzip
            import os

            valid_W = list(range(20, 210, 10))
            valid_K = list(range(7, 11))
            W = W - (W % 10)

            if not W in valid_W:
                raise ValueError('Invalid UKHS window size.')
            if not K in valid_K:
                raise ValueError('Invalid UKHS K.')

            filename = os.path.join(DATA_DIR,
                                    'res_{0}_{1}_4_0.txt.gz'.format(K, W))
            unikmers = std.vector[std.string]()
            with gzip.open(filename, 'rt') as fp:
                for line in fp:
                    unikmers.push_back(line.strip())

            return unikmers

        klass.parse_unikmers = staticmethod(parse_unikmers)

        def load(W, K):
            key = (W, K, klass.__name__)
            if key in UKHS_CACHE:
                return UKHS_CACHE[key]
            else:
                unikmers = parse_unikmers(W, K)
                ukhs = klass.build(W, K, unikmers)
                UKHS_CACHE[key] = ukhs
                return ukhs

        klass.load = staticmethod(load)

    shifter_inst, template = is_template_inst(name, 'HashShifter')
    if shifter_inst:

        def __getattr__(self, arg):
            attr = getattr(type(self), arg)
            if not attr.__name__.startswith('__'):
                return attr

        klass.__getattr__ = __getattr__

        if 'Unikmer' in name:

            def build(W, K):
                ukhs_type = klass.ukhs_type
                ukhs = ukhs_type.load(W, K)
                shifter = klass(W, K, ukhs.__smartptr__())
                return shifter

            klass.build = staticmethod(build)

        #set_typedef_attrs(klass, ['alphabet', 'hash_type', 'value_type', 'kmer_type'])

    for check_name in [
            'Hash', 'Canonical', 'Wmer', 'Kmer', 'Shift', 'Partitioned'
    ]:

        is_inst, _ = is_template_inst(name, check_name)
        if is_inst:

            klass.value = property(klass.value)
            klass.__lt__ = lambda self, other: self.value < other.value
            klass.__le__ = lambda self, other: self.value <= other.value
            klass.__gt__ = lambda self, other: self.value > other.value
            klass.__ge__ = lambda self, other: self.value >= other.value
            klass.__ne__ = lambda self, other: self.value != other.value
            klass.__repr__ = klass.__str__
            klass.__hash__ = lambda self: hash(self.value)