Example #1
0
 def test_name(self):
     # tests for Region.from_string are in docs
     # here we test round-tripping
     cases = [['contig1:50-100'] * 2, ['contig1:50-'] * 2,
              ['contig1:-100', 'contig1:0-100'], ['contig1', 'contig1:0-']]
     for orig, parsed in cases:
         a = Region.from_string(orig)
         self.assertEqual(a.name, parsed)
Example #2
0
 def test_101_spam(self):
     regions = [get_bam_regions(self.bam)[0]] * 100
     regions += [Region.from_string("ref:0-100")] * 1000  # spam
     self._run_one(19,
                   250,
                   0,
                   exp_batches=106,
                   exp_samples=2000,
                   exp_remains=1000,
                   regions=regions)
Example #3
0
 def test_100_convoluted(self):
     # the chunk generator will give a larger overlap in the last chunk
     # to give 4 chunks per region and no remainders for this:
     regions = [get_bam_regions(self.bam)[0]] * 5
     # but augment with 7 small subregions that will fall through
     # as remainders:
     regions += [Region.from_string("ref:0-1000")] * 7
     self._run_one(2,
                   1300,
                   0,
                   exp_batches=10,
                   exp_samples=20,
                   exp_remains=7,
                   regions=regions)
Example #4
0
    def test_000_split_gap(self):
        # The gapped bam has:
        # @SQ    SN:ref    LN:30
        # seq1    0    ref    1    7    10M
        # seq2    0    ref    15    13    16M
        # so an alignment from [0:10] and one from [14:30] without insertions
        chunk_lengths = [10, 16]

        region = Region.from_string('ref:0-30')
        results = medaka.features.pileup_counts(region, __gapped_bam__)
        self.assertEqual(len(results), 2, 'Number of chunks from gapped alignment')
        for exp_len, chunk in zip(chunk_lengths, results):
            for i in (0, 1):
                # check both pileup and positions
                self.assertEqual(exp_len, len(chunk[i]))
Example #5
0
    def test_001_cases(self, FastaFile):
        class Args:
            def __init__(self, **kwargs):
                for k, v in kwargs.items():
                    setattr(self, k, v)
        files = {1:"utg1190", 2:"scaffold_117"}
        FastaFile.return_value = Args(references=['scaffold_117', 'utg1190'], lengths=[45079626, 16772114])
        temp = tempfile.NamedTemporaryFile()
        args = Args(draft="", threads=1, output=temp.name, fillgaps=False)

        for fid, region in files.items():
            fname = os.path.join(os.path.dirname(__file__), "data", "test_stitch_{}.hdf".format(fid))
            args.inputs = fname
            args.regions=[Region.from_string(region)]
            try:
                medaka.stitch.stitch(args)
            except Exception as e:
                self.fail("Stitching raise and Exception:\n {}".format(e))
Example #6
0
def main():
    # Entry point for testing/checking
    logging.basicConfig(format='[%(asctime)s - %(name)s] %(message)s', datefmt='%H:%M:%S', level=logging.INFO)
    np.set_printoptions(precision=4, linewidth=100)
    
    
    parser = argparse.ArgumentParser('medaka', formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('bam', help='alignment file.')
    parser.add_argument('region', help='alignment region to sample.')
    parser.add_argument('--print', action='store_true', help='print counts.')
    parser.add_argument('--dtypes', nargs='+', help='perform a multi-datatype tests.')
    parser.add_argument('--norm', nargs='+', help='additional normalisation tests. (total, fwd_rev)')

    args = parser.parse_args()
    
    region = Region.from_string(args.region)
    
    kwargs={
        'log_min': None,
        'max_hp_len': 1,
        'is_compressed': False,
        'consensus_as_ref': False,
        'ref_mode': None,
        'with_depth': False,
    }
    
    def _print(samples):
       if args.print:
           for p, f in zip(samples.positions, samples.features):
               print('{}\t{}\t0\t{}\t{}'.format(p[0], p[1], '\t'.join('{:.3f}'.format(x) if x>0.0 else '-' for x in f), sum(f)))
    
    dtype_options = [('',)]
    if args.dtypes is not None:
        dtype_options.append(args.dtypes)
    norm_options = [None, ]
    if args.norm is not None:
        norm_options.extend(args.norm)
    
    for dtypes in dtype_options:
        kwargs['dtypes'] = dtypes
        for norm in norm_options:
            kwargs['normalise'] = norm
    
            print("###########################################################")
            print(kwargs)
            encoder = FeatureEncoder(**kwargs)
        
            # py-style
            t0=now()
            samples = encoder.bam_to_sample(args.bam, region, force_py=True)[0]
            t1=now()
            if not samples.is_empty:
                print(samples.features.shape)
                _print(samples)
            else:
                print("Samples is empty")
            print("---------------------")
        
            # C-style
            t2=now()
            samples = encoder.bam_to_sample(args.bam, region)[0]
            t3=now()
            if not samples.is_empty:
                print(samples.features.shape)
                _print(samples)
            else:
                print("Samples is empty")
        
            print("pysam time:", t1 - t0)
            print("hts time:", t3 - t2)