def pythonize_goetia(klass, name): is_walker, _ = is_template_inst(name, 'UnitigWalker') if is_walker: # def wrap_walk(wrapped): # def _walk(self, seed=None, mask=None): # if mask is None: # mask = std.set[type(self).hash_type]() # if seed is None: # return wrapped(self, mask) # else: # return wrapped(self, seed, mask) # return _walk # klass.walk_left = wrap_walk(klass.walk_left) # klass.walk_right = wrap_walk(klass.walk_right) # klass.walk = wrap_walk(klass.walk) klass.cursor = property(klass.get_cursor) klass.cursor = klass.cursor.setter( lambda self, seed: self.set_cursor(seed)) klass.Walk[gbl.goetia.DIR_LEFT].__str__ = klass.Walk[ gbl.goetia.DIR_LEFT].to_string klass.Walk[gbl.goetia.DIR_RIGHT].__str__ = klass.Walk[ gbl.goetia.DIR_RIGHT].to_string
def pythonize_goetia(klass, name): if name == 'SourmashSketch': def to_sourmash(self): try: from sourmash import MinHash except ImportError: print( 'Must install python sourmash to convert to sourmash.MinHash', file=sys.stderr) return None sig = MinHash(self.num(), self.ksize(), is_protein=self.is_protein(), dayhoff=self.dayhoff(), hp=self.hp(), track_abundance=self.track_abundance(), seed=self.seed(), mins=self.mins(), max_hash=self.max_hash()) return sig klass.Sketch.to_sourmash = to_sourmash is_inst, template = utils.is_template_inst(name, 'UnikmerSketch') if is_inst: def to_numpy(self) -> np.ndarray: """ Returns: numpy.ndarray: Numpy array with the signature vector. """ buffer = self.get_sketch_as_buffer() buffer.reshape((len(self), )) return np.frombuffer(buffer, dtype=np.uint64, count=len(self)) def __len__(self) -> int: return self.get_size() klass.Sketch.to_numpy = to_numpy klass.Sketch.__len__ = __len__ def wrap_build(build_func): def wrapped(W, K, ukhs=None, storage_args=None): if ukhs is None: ukhs = klass.ukhs_type.load(W, K) if (storage_args is None) or (len(storage_args) == 0): sig = build_func(W, K, ukhs.__smartptr__()) else: params = klass.storage_type.make_params(*storage_args) sig = build_func(W, K, ukhs.__smartptr__(), params) return sig return wrapped klass.Sketch.build = wrap_build(klass.Sketch.build)
def pythonize_goetia(klass, name): is_fastx, _ = is_template_inst(name, 'FastxParser') if is_fastx: def __iter__(self): while not self.is_complete(): record = self.next() if record: yield record klass.__iter__ = __iter__ is_split, _ = is_template_inst(name, 'SplitPairedReader') if is_split: def __iter__(self): while not self.is_complete(): left, right = self.next() left, right = left.value() if left else None, right.value( ) if right else None if left is not None or right is not None: yield left, right klass.__iter__ = __iter__
def pythonize_goetia(klass, name): is_inst, _ = is_template_inst(name, 'FileProcessor') if is_inst: klass.advance.__release_gil__ = True klass.process.__release_gil__ = True def chunked_process(self, file, right_file=None): if type(file) in (str, bytes): from goetia.parsing import FastxParser, SplitPairedReader parser_type = FastxParser[type(self).alphabet] if right_file is None: parser = parser_type.build(file) else: parser = SplitPairedReader[parser_type].build(file, right_file) else: parser = file advancing = True prev_n_sequences = 0 while advancing: n_sequences, time_elapsed, advancing = self.advance(parser) if n_sequences > prev_n_sequences: yield n_sequences, time_elapsed, parser.n_skipped() prev_n_sequences = n_sequences def wrap_build(build_func): def wrapped(*args, interval=10000, verbose=False): return build_func(*args, interval, verbose) return wrapped klass.build = wrap_build(klass.build) klass.chunked_process = chunked_process
def pythonize_goetia(klass, name): is_dbg, _ = is_template_inst(name, 'dBG') if is_dbg: def add(self, item): if not isinstance(item, int) and len(item) < self.K: raise ValueError() elif isinstance(item, int) or len(item) == self.K: return self.insert(item) else: return self.insert_sequence(item) def wrap_query(func): def _get(self, item): return func(self, item) return _get def shallow_clone(self): return self.clone() def hashes(self, sequence): it = self.get_hash_iter(sequence) while not it.done(): h = it.next() yield h def left_degree(self, kmer): self.set_cursor(kmer) return self.in_degree() def right_degree(self, kmer): self.set_cursor(kmer) return self.out_degree() def wrap_get(func): def _get(self): return func(self) return _get def wrap_vector_ret(func): def wrapped(self, *args, **kwargs): return [item for item in func(self, *args, **kwargs)] return wrapped def wrap_walk(func): pass klass.add = add klass.get_hash = wrap_get(klass.get) klass.get = wrap_query(klass.query) klass.left_degree = left_degree klass.right_degree = right_degree klass.shallow_clone = shallow_clone klass.hashes = hashes klass.left_extensions = wrap_vector_ret(klass.left_extensions) klass.right_extensions = wrap_vector_ret(klass.right_extensions) #klass.filter_nodes = wrap_vector_ret(klass.filter_nodes) #klass.in_neighbors = wrap_vector_ret(klass.in_neighbors) #klass.out_neighbors = wrap_vector_ret(klass.out_neighbors) is_pdbg, _ = is_template_inst(name, 'PdBG') if is_pdbg: def wrap_build(build_func): def wrapped(W, K, *storage_args, ukhs=None): if ukhs is None: ukhs = klass.ukhs_type.load(W, K) if len(storage_args) == 0: params = klass.base_storage_type.default_params elif len(storage_args) == 1 and \ isinstance(storage_args[0], klass.base_storage_type.params_type): params = storage_args[0] else: params = klass.base_storage_type.make_params(*storage_args) return build_func(W, K, ukhs, params) return wrapped klass.build = wrap_build(klass.build)
def pythonize_goetia(klass, name): ukhs_inst, template = is_template_inst(name, 'UKHS') if ukhs_inst: def parse_unikmers(W, K): import gzip import os valid_W = list(range(20, 210, 10)) valid_K = list(range(7, 11)) W = W - (W % 10) if not W in valid_W: raise ValueError('Invalid UKHS window size.') if not K in valid_K: raise ValueError('Invalid UKHS K.') filename = os.path.join(DATA_DIR, 'res_{0}_{1}_4_0.txt.gz'.format(K, W)) unikmers = std.vector[std.string]() with gzip.open(filename, 'rt') as fp: for line in fp: unikmers.push_back(line.strip()) return unikmers klass.parse_unikmers = staticmethod(parse_unikmers) def load(W, K): key = (W, K, klass.__name__) if key in UKHS_CACHE: return UKHS_CACHE[key] else: unikmers = parse_unikmers(W, K) ukhs = klass.build(W, K, unikmers) UKHS_CACHE[key] = ukhs return ukhs klass.load = staticmethod(load) shifter_inst, template = is_template_inst(name, 'HashShifter') if shifter_inst: def __getattr__(self, arg): attr = getattr(type(self), arg) if not attr.__name__.startswith('__'): return attr klass.__getattr__ = __getattr__ if 'Unikmer' in name: def build(W, K): ukhs_type = klass.ukhs_type ukhs = ukhs_type.load(W, K) shifter = klass(W, K, ukhs.__smartptr__()) return shifter klass.build = staticmethod(build) #set_typedef_attrs(klass, ['alphabet', 'hash_type', 'value_type', 'kmer_type']) for check_name in [ 'Hash', 'Canonical', 'Wmer', 'Kmer', 'Shift', 'Partitioned' ]: is_inst, _ = is_template_inst(name, check_name) if is_inst: klass.value = property(klass.value) klass.__lt__ = lambda self, other: self.value < other.value klass.__le__ = lambda self, other: self.value <= other.value klass.__gt__ = lambda self, other: self.value > other.value klass.__ge__ = lambda self, other: self.value >= other.value klass.__ne__ = lambda self, other: self.value != other.value klass.__repr__ = klass.__str__ klass.__hash__ = lambda self: hash(self.value)