def test_name(self): # tests for Region.from_string are in docs # here we test round-tripping cases = [['contig1:50-100'] * 2, ['contig1:50-'] * 2, ['contig1:-100', 'contig1:0-100'], ['contig1', 'contig1:0-']] for orig, parsed in cases: a = Region.from_string(orig) self.assertEqual(a.name, parsed)
def test_101_spam(self): regions = [get_bam_regions(self.bam)[0]] * 100 regions += [Region.from_string("ref:0-100")] * 1000 # spam self._run_one(19, 250, 0, exp_batches=106, exp_samples=2000, exp_remains=1000, regions=regions)
def test_100_convoluted(self): # the chunk generator will give a larger overlap in the last chunk # to give 4 chunks per region and no remainders for this: regions = [get_bam_regions(self.bam)[0]] * 5 # but augment with 7 small subregions that will fall through # as remainders: regions += [Region.from_string("ref:0-1000")] * 7 self._run_one(2, 1300, 0, exp_batches=10, exp_samples=20, exp_remains=7, regions=regions)
def test_000_split_gap(self): # The gapped bam has: # @SQ SN:ref LN:30 # seq1 0 ref 1 7 10M # seq2 0 ref 15 13 16M # so an alignment from [0:10] and one from [14:30] without insertions chunk_lengths = [10, 16] region = Region.from_string('ref:0-30') results = medaka.features.pileup_counts(region, __gapped_bam__) self.assertEqual(len(results), 2, 'Number of chunks from gapped alignment') for exp_len, chunk in zip(chunk_lengths, results): for i in (0, 1): # check both pileup and positions self.assertEqual(exp_len, len(chunk[i]))
def test_001_cases(self, FastaFile): class Args: def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) files = {1:"utg1190", 2:"scaffold_117"} FastaFile.return_value = Args(references=['scaffold_117', 'utg1190'], lengths=[45079626, 16772114]) temp = tempfile.NamedTemporaryFile() args = Args(draft="", threads=1, output=temp.name, fillgaps=False) for fid, region in files.items(): fname = os.path.join(os.path.dirname(__file__), "data", "test_stitch_{}.hdf".format(fid)) args.inputs = fname args.regions=[Region.from_string(region)] try: medaka.stitch.stitch(args) except Exception as e: self.fail("Stitching raise and Exception:\n {}".format(e))
def main(): # Entry point for testing/checking logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', datefmt='%H:%M:%S', level=logging.INFO) np.set_printoptions(precision=4, linewidth=100) parser = argparse.ArgumentParser('medaka', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('bam', help='alignment file.') parser.add_argument('region', help='alignment region to sample.') parser.add_argument('--print', action='store_true', help='print counts.') parser.add_argument('--dtypes', nargs='+', help='perform a multi-datatype tests.') parser.add_argument('--norm', nargs='+', help='additional normalisation tests. (total, fwd_rev)') args = parser.parse_args() region = Region.from_string(args.region) kwargs={ 'log_min': None, 'max_hp_len': 1, 'is_compressed': False, 'consensus_as_ref': False, 'ref_mode': None, 'with_depth': False, } def _print(samples): if args.print: for p, f in zip(samples.positions, samples.features): print('{}\t{}\t0\t{}\t{}'.format(p[0], p[1], '\t'.join('{:.3f}'.format(x) if x>0.0 else '-' for x in f), sum(f))) dtype_options = [('',)] if args.dtypes is not None: dtype_options.append(args.dtypes) norm_options = [None, ] if args.norm is not None: norm_options.extend(args.norm) for dtypes in dtype_options: kwargs['dtypes'] = dtypes for norm in norm_options: kwargs['normalise'] = norm print("###########################################################") print(kwargs) encoder = FeatureEncoder(**kwargs) # py-style t0=now() samples = encoder.bam_to_sample(args.bam, region, force_py=True)[0] t1=now() if not samples.is_empty: print(samples.features.shape) _print(samples) else: print("Samples is empty") print("---------------------") # C-style t2=now() samples = encoder.bam_to_sample(args.bam, region)[0] t3=now() if not samples.is_empty: print(samples.features.shape) _print(samples) else: print("Samples is empty") print("pysam time:", t1 - t0) print("hts time:", t3 - t2)