Beispiel #1
0
def _make_barcode_map(barcodes, rev_comp_mapping_barcodes):
    barcode_map = {}
    barcode_len = None
    for sample_id, barcode in barcodes.to_series().iteritems():
        if barcode_len is None:
            barcode_len = len(barcode)
        elif len(barcode) != barcode_len:
            raise ValueError('Barcodes of different lengths were detected: '
                             '%d != %d. Variable length barcodes are not '
                             'supported.' % (len(barcode), barcode_len))
        try:
            skbio.DNA(barcode)
        except ValueError as ve:
            if re.match(r'^ValueError\("Invalid characters in sequence[.,'
                        ' \n]*',
                        ve.__repr__()):
                raise ValueError("Invalid characters found in specified "
                                 "barcodes column within metadata file. "
                                 "Please confirm that the column: '%s' "
                                 "contains your per-sample barcodes."
                                 % (barcodes.name))
            else:
                raise

        if rev_comp_mapping_barcodes:
            barcode = str(skbio.DNA(barcode).reverse_complement())

        if barcode in barcode_map:
            raise ValueError('A duplicate barcode was detected. The barcode '
                             '%s was observed for samples %s and %s.'
                             % (barcode, sample_id, barcode_map[barcode]))
        barcode_map[barcode] = sample_id

    return barcode_map, barcode_len
Beispiel #2
0
    def _prepare_sequence_data(self):
        sequences_fp = self.get_data_path('unaligned-dna-sequences-1.fasta')
        sequences = DNAFASTAFormat(sequences_fp, mode='r')
        alignment_fp = self.get_data_path('aligned-dna-sequences-1.fasta')
        alignment = AlignedDNAFASTAFormat(alignment_fp, mode='r')
        exp = skbio.TabularMSA([
            skbio.DNA('AGGGGG-',
                      metadata={
                          'id': 'aln-seq-1',
                          'description': ''
                      }),
            skbio.DNA('AGGGGGG',
                      metadata={
                          'id': 'aln-seq-2',
                          'description': ''
                      }),
            skbio.DNA('AGGGGGG', metadata={
                'id': 'seq1',
                'description': ''
            }),
            skbio.DNA('-GGGGGG', metadata={
                'id': 'seq2',
                'description': ''
            })
        ])

        return alignment, sequences, exp
def _gen_reads(sequence, f_primer, r_primer, trim_right, trunc_len, trim_left,
               identity, min_length, max_length, read_orientation):
    f_primer = skbio.DNA(f_primer)
    r_primer = skbio.DNA(r_primer)
    amp = None
    if read_orientation in ['forward', 'both']:
        amp = _exact_match(sequence, f_primer, r_primer)
    if not amp and read_orientation in ['reverse', 'both']:
        amp = _exact_match(sequence.reverse_complement(), f_primer, r_primer)
    if not amp and read_orientation in ['forward', 'both']:
        amp = _approx_match(sequence, f_primer, r_primer, identity)
    if not amp and read_orientation in ['reverse', 'both']:
        amp = _approx_match(sequence.reverse_complement(), f_primer, r_primer,
                            identity)
    if not amp:
        return
    # we want to filter by max length before trimming
    if max_length > 0 and len(amp) > max_length:
        return
    if trim_right > 0:
        amp = amp[:-trim_right]
    if trunc_len > 0:
        amp = amp[:trunc_len]
    if trim_left > 0:
        amp = amp[trim_left:]
    if min_length > 0 and len(amp) < min_length:
        return
    if not amp:
        return
    return amp
Beispiel #4
0
    def test_sample_from_contig_set(self):
        def mock(start, stop, n):
            if start == 0:
                return np.tile([0, 5, 10, 15, 9, 12], 100)[:n]
            else:
                return np.tile([40, 41, 42, 43], 100)[:n]

        np.random.seed(1234)
        # An integration test
        #                       0123456789012345678901234567890123456789
        sequences = [
            skbio.DNA('ATGCAATTGGCCAAATTTGGGCCCAAAATTTTGGGGCCCC'),
            skbio.DNA('CGTACCGGTT')
        ]
        fullseq = skbio.DNA.concat(sequences)

        depth = 100
        length = 3

        obs = sample_from_contig_set(sequences, depth, length, mock)

        indices = []
        for o in obs:
            remapped = self.remap(o)
            self.assertIn(remapped, fullseq)
            indices.append(fullseq.index(remapped))

        # we expect the both the first and second sequence to be fully
        # represented by our starting indices except in rare stochastic
        # scenario (as on average, 20 reads will come from the second contig)
        self.assertTrue(set(indices) == {0, 5, 10, 15, 9, 12, 40, 41, 42, 43})
def _gen_reads(sequence, f_primer, r_primer, trunc_len, trim_left, identity,
               min_length, max_length):
    f_primer = skbio.DNA(f_primer)
    r_primer = skbio.DNA(r_primer)
    amp = _exact_match(sequence, f_primer, r_primer)
    if not amp:
        amp = _exact_match(sequence.reverse_complement(), f_primer, r_primer)
    if not amp:
        amp = _approx_match(sequence, f_primer, r_primer, identity)
    if not amp:
        amp = _approx_match(
            sequence.reverse_complement(), f_primer, r_primer, identity)
    if not amp:
        return
    # we want to filter by max length before trimming
    if max_length > 0 and len(amp) > max_length:
        return
    if trunc_len > 0:
        amp = amp[:trunc_len]
    if trim_left > 0:
        amp = amp[trim_left:]
    if min_length > 0 and len(amp) < min_length:
        return
    if not amp:
        return
    return amp
Beispiel #6
0
def _denoise_helper(biom_fp, track_fp, hashed_feature_ids):
    _check_featureless_table(biom_fp)
    with open(biom_fp) as fh:
        table = biom.Table.from_tsv(fh, None, None, None)

    df = pd.read_csv(track_fp, sep='\t', index_col=0)
    df.index.name = 'sample-id'
    df = df.rename(index=_filepath_to_sample)
    metadata = qiime2.Metadata(df)

    # Currently the sample IDs in DADA2 are the file names. We make
    # them the sample id part of the filename here.
    sid_map = {
        id_: _filepath_to_sample(id_)
        for id_ in table.ids(axis='sample')
    }
    table.update_ids(sid_map, axis='sample', inplace=True)
    # The feature IDs in DADA2 are the sequences themselves.
    if hashed_feature_ids:
        # Make feature IDs the md5 sums of the sequences.
        fid_map = {
            id_: hashlib.md5(id_.encode('utf-8')).hexdigest()
            for id_ in table.ids(axis='observation')
        }
        table.update_ids(fid_map, axis='observation', inplace=True)

        rep_sequences = DNAIterator(
            (skbio.DNA(k, metadata={'id': v}) for k, v in fid_map.items()))
    else:
        rep_sequences = DNAIterator((skbio.DNA(id_, metadata={'id': id_})
                                     for id_ in table.ids(axis='observation')))
    return table, rep_sequences, metadata
    def test_dereplicate_sequences_prefix(self):
        input_sequences_fp = self.get_data_path('seqs-1')
        input_sequences = QIIME1DemuxDirFmt(input_sequences_fp, 'r')

        exp_table = biom.Table(np.array([[2, 2],
                                        [2, 0]]),
                               ['4574b947a0159c0da35a1f30f989681a1d9f64ef',
                                '16a1263bde4f2f99422630d1bb87935c4236d1ba'],
                               ['s2', 'sample1'])

        with redirected_stdio(stderr=os.devnull):
            obs_table, obs_sequences = dereplicate_sequences(
                sequences=input_sequences, derep_prefix=True)
        # order of identifiers is important for biom.Table equality
        obs_table = \
            obs_table.sort_order(exp_table.ids(axis='observation'),
                                 axis='observation')
        self.assertEqual(obs_table, exp_table)

        # sequences are reverse-sorted by abundance in output
        obs_seqs = list(skbio.io.read(str(obs_sequences),
                        constructor=skbio.DNA, format='fasta'))
        exp_seqs = [skbio.DNA('AAACGTTACGGTTAACTATACATGCAGAAGACTAATCGG',
                              metadata={'id': ('4574b947a0159c0da35a1f30f'
                                               '989681a1d9f64ef'),
                                        'description': 's2_1'}),
                    skbio.DNA('ACGTACGTACGTACGTACGTACGTACGTACGTGCATGGTGCGACCG',
                              metadata={'id': ('16a1263bde4f2f99422630d1bb'
                                               '87935c4236d1ba'),
                                        'description': 's2_42'})]
        self.assertEqual(obs_seqs, exp_seqs)
Beispiel #8
0
 def test_apply_mask_mask_all(self):
     obs = _apply_mask(self.msa1, np.array([True, True, True, True]))
     seqs = [
         skbio.DNA('', metadata=dict(id='s1')),
         skbio.DNA('', metadata=dict(id='s2')),
         skbio.DNA('', metadata=dict(id='s3'))]
     exp = skbio.TabularMSA(seqs, minter='id')
     self.assertEqual(obs, exp)
Beispiel #9
0
def denoise(demultiplexed_seqs: SingleLanePerSampleSingleEndFastqDirFmt,
            pos_ref_filepath: str=None,
            neg_ref_filepath: str=None,
            mean_error: float=0.005,
            indel_prob: float=0.01,
            indel_max: int=3,
            trim_length: int=150,
            min_reads: int=0,
            min_size: int=2,
            negate: bool=False,
            jobs_to_start: int=1,
            hashed_feature_ids: bool=True) -> (biom.Table, DNAIterator):

    with tempfile.TemporaryDirectory() as tmp:
        seqs_fp = str(demultiplexed_seqs)
        cmd = ['deblur', 'workflow',
               '--seqs-fp', seqs_fp,
               '--output-dir', tmp,
               '--mean-error', str(mean_error),
               '--indel-prob', str(indel_prob),
               '--indel-max', str(indel_max),
               '--trim-length', str(trim_length),
               '--min-reads', str(min_reads),
               '--min-size', str(min_size),
               '-w']
        if pos_ref_filepath is not None:
            cmd.append('--pos-ref-db')
            cmd.append(pos_ref_filepath)

        if neg_ref_filepath is not None:
            cmd.append('--neg-ref-db')
            cmd.append(neg_ref_filepath)

        if negate:
            cmd.append('--negate')

        subprocess.run(cmd, check=True)

        # code adapted from q2-dada2
        table = biom.load_table(os.path.join(tmp, 'final.biom'))
        sid_map = {id_: id_.split('_')[0] for id_ in table.ids(axis='sample')}
        table.update_ids(sid_map, axis='sample', inplace=True)

        if hashed_feature_ids:
            # Make feature IDs the md5 sums of the sequences.
            fid_map = {id_: hashlib.md5(id_.encode('utf-8')).hexdigest()
                       for id_ in table.ids(axis='observation')}
            table.update_ids(fid_map, axis='observation', inplace=True)

            rep_sequences = DNAIterator((skbio.DNA(k, metadata={'id': v},
                                                   lowercase='ignore')
                                         for k, v in fid_map.items()))
        else:
            rep_sequences = DNAIterator(
                (skbio.DNA(id_, metadata={'id': id_}, lowercase='ignore')
                 for id_ in table.ids(axis='observation')))

    return (table, rep_sequences)
Beispiel #10
0
def _denoise_helper(biom_fp, track_fp, hashed_feature_ids):
    _check_featureless_table(biom_fp)
    with open(biom_fp) as fh:
        table = biom.Table.from_tsv(fh, None, None, None)

    df = pd.read_csv(track_fp, sep='\t', index_col=0)
    df.index.name = 'sample-id'
    df = df.rename(index=_filepath_to_sample)

    PASSED_FILTER = 'percentage of input passed filter'
    NON_CHIMERIC = 'percentage of input non-chimeric'

    round_cols = {PASSED_FILTER: 2, NON_CHIMERIC: 2}

    df[PASSED_FILTER] = df['filtered'] / df['input'] * 100
    df[NON_CHIMERIC] = df['non-chimeric'] / df['input'] * 100

    col_order = [
        'input', 'filtered', PASSED_FILTER, 'denoised', 'non-chimeric',
        NON_CHIMERIC
    ]

    # only calculate percentage of input merged if paired end
    if 'merged' in df:
        MERGED = 'percentage of input merged'
        round_cols[MERGED] = 2
        df[MERGED] = df['merged'] / df['input'] * 100
        col_order.insert(4, 'merged')
        col_order.insert(5, MERGED)

    df = df[col_order]
    df.fillna(0, inplace=True)
    df = df.round(round_cols)
    metadata = qiime2.Metadata(df)

    # Currently the sample IDs in DADA2 are the file names. We make
    # them the sample id part of the filename here.
    sid_map = {
        id_: _filepath_to_sample(id_)
        for id_ in table.ids(axis='sample')
    }
    table.update_ids(sid_map, axis='sample', inplace=True)
    # The feature IDs in DADA2 are the sequences themselves.
    if hashed_feature_ids:
        # Make feature IDs the md5 sums of the sequences.
        fid_map = {
            id_: hashlib.md5(id_.encode('utf-8')).hexdigest()
            for id_ in table.ids(axis='observation')
        }
        table.update_ids(fid_map, axis='observation', inplace=True)

        rep_sequences = DNAIterator(
            (skbio.DNA(k, metadata={'id': v}) for k, v in fid_map.items()))
    else:
        rep_sequences = DNAIterator((skbio.DNA(id_, metadata={'id': id_})
                                     for id_ in table.ids(axis='observation')))
    return table, rep_sequences, metadata
Beispiel #11
0
    def _prepare_sequence_data(self):
        input_fp = self.get_data_path('unaligned-dna-sequences-1.fasta')
        input_sequences = DNAFASTAFormat(input_fp, mode='r')
        exp = skbio.TabularMSA(
            [skbio.DNA('AGGGGGG', metadata={'id': 'seq1', 'description': ''}),
             skbio.DNA('-GGGGGG', metadata={'id': 'seq2', 'description': ''})]
        )

        return input_sequences, exp
Beispiel #12
0
    def test_empty_return(self):
        inp = pd.Series({
            's1': skbio.DNA('ACGTTGACA', metadata={'id': 's1'}),
            's2': skbio.DNA('AAN', metadata={'id': 's2'})
        })
        exp = pd.Series()
        obs = filter_seqs(inp, min_length=29000)

        self.assertEqual(list(obs.index), list(exp.index))
        self.assertEqual(list(obs), list(exp))
Beispiel #13
0
    def test_too_short_and_too_ambiguous(self):
        inp = pd.Series({
            's1': skbio.DNA('ACGTTGACA', metadata={'id': 's1'}),
            's2': skbio.DNA('AAN', metadata={'id': 's2'})
        })
        exp = pd.Series({'s1': skbio.DNA('ACGTTGACA', metadata={'id': 's1'})})
        obs = filter_seqs(inp, max_proportion_ambiguous=.3, min_length=4)

        self.assertEqual(list(obs.index), list(exp.index))
        self.assertEqual(list(obs), list(exp))
Beispiel #14
0
    def test_too_long(self):
        inp = pd.Series({
            's1': skbio.DNA('ACGTTGACA', metadata={'id': 's1'}),
            's2': skbio.DNA('AA', metadata={'id': 's2'})
        })
        exp = pd.Series({'s2': skbio.DNA('AA', metadata={'id': 's2'})})
        obs = filter_seqs(inp, max_length=3)

        self.assertEqual(list(obs.index), list(exp.index))
        self.assertEqual(list(obs), list(exp))
Beispiel #15
0
    def test_no_filter(self):
        exp = pd.Series({
            's1': skbio.DNA('ACGTTNGACA', metadata={'id': 's1'}),
            's2': skbio.DNA('A', metadata={'id': 's2'}),
            's3': skbio.DNA('NNNNNN', metadata={'id': 's3'})
        })
        obs = filter_seqs(exp)

        self.assertEqual(list(obs.index), list(exp.index))
        self.assertEqual(list(obs), list(exp))
Beispiel #16
0
    def test_error_on_empty_alignment_conservation_boundary(self):
        alignment1 = skbio.TabularMSA(
            [skbio.DNA('A', metadata={'id': 'seq1', 'description': ''}),
             skbio.DNA('C', metadata={'id': 'seq2', 'description': ''}),
             skbio.DNA('G', metadata={'id': 'seq3', 'description': ''})])

        self.assertRaisesRegex(ValueError,
                               " 0.00% of positions were retained by the con",
                               mask, alignment1, max_gap_frequency=1.0,
                               min_conservation=0.5)
Beispiel #17
0
    def test_create_position_map_all_gaps(self):
        seqs = [
            skbio.DNA('ACGT', metadata=dict(id='s1')),
            skbio.DNA('AG-T', metadata=dict(id='s2')),
            skbio.DNA('----', metadata=dict(id='s3'))]
        msa = skbio.TabularMSA(seqs, minter='id')

        obs = _create_position_map(msa, 's3')
        exp = np.array([])
        npt.assert_array_equal(obs, exp)
Beispiel #18
0
 def test_get_iterator(self):
     tab = biom.Table(np.ones((3, 2)), ['ATCC', 'ATGG', 'CACA'],
                      ['S1', 'S2'])
     exp = [
         skbio.DNA('ATCC', metadata={'id': 'ATCC'}),
         skbio.DNA('ATGG', metadata={'id': 'ATGG'}),
         skbio.DNA('CACA', metadata={'id': 'CACA'})
     ]
     obs = list(_get_featuredata_from_table(tab))
     self.assertEqual(obs, exp)
Beispiel #19
0
    def test_apply_mask_mask_some(self):
        obs = _apply_mask(self.msa1, np.array([False, True, True, True]))
        seqs = [
            skbio.DNA('A', metadata=dict(id='s1')),
            skbio.DNA('A', metadata=dict(id='s2')),
            skbio.DNA('-', metadata=dict(id='s3'))
        ]
        exp = skbio.TabularMSA(seqs, minter='id')
        self.assertEqual(obs, exp)

        obs = _apply_mask(self.msa1, np.array([False, True, True, False]))
        seqs = [
            skbio.DNA('AT', metadata=dict(id='s1')),
            skbio.DNA('AT', metadata=dict(id='s2')),
            skbio.DNA('-T', metadata=dict(id='s3'))
        ]
        exp = skbio.TabularMSA(seqs, minter='id')
        self.assertEqual(obs, exp)

        obs = _apply_mask(self.msa1, np.array([False, True, False, False]))
        seqs = [
            skbio.DNA('AGT', metadata=dict(id='s1')),
            skbio.DNA('A-T', metadata=dict(id='s2')),
            skbio.DNA('--T', metadata=dict(id='s3'))
        ]
        exp = skbio.TabularMSA(seqs, minter='id')
        self.assertEqual(obs, exp)
Beispiel #20
0
    def test_create_terminal_gap_mask_one_chrome(self):
        obs = _create_terminal_gap_mask(self.msa1, self.mask5)
        npt.assert_array_equal(obs, [True, False, False, False])

        seqs = [
            skbio.DNA('ACG-', metadata=dict(id='s1')),
            skbio.DNA('AG-T', metadata=dict(id='s2')),
            skbio.DNA('-C-T', metadata=dict(id='s3'))]
        msa = skbio.TabularMSA(seqs, minter='id')
        obs = _create_terminal_gap_mask(msa, self.mask5)
        npt.assert_array_equal(obs, [True, False, False, False])
Beispiel #21
0
 def test_invalid_conservation_threshold(self):
     alignment = skbio.TabularMSA(
         [skbio.DNA('-', metadata={'id': 'seq1', 'description': ''}),
          skbio.DNA('-', metadata={'id': 'seq2', 'description': ''}),
          skbio.DNA('-', metadata={'id': 'seq3', 'description': ''})]
     )
     eps = np.finfo(float).eps
     with self.assertRaises(ValueError):
         mask(alignment, min_conservation=0.0 - eps)
     with self.assertRaises(ValueError):
         mask(alignment, min_conservation=1.0 + eps)
Beispiel #22
0
    def test_empty_input(self):
        alignment = skbio.TabularMSA(
            [skbio.DNA('', metadata={'id': 'seq1', 'description': ''}),
             skbio.DNA('', metadata={'id': 'seq2', 'description': ''}),
             skbio.DNA('', metadata={'id': 'seq3', 'description': ''})]
            )
        with self.assertRaises(ValueError):
            mask(alignment)

        alignment = skbio.TabularMSA([])
        with self.assertRaises(ValueError):
            mask(alignment)
    def test_basic(self):
        seqs = DNAIterator(
            (s for s in (skbio.DNA('ACGT', metadata={'id': 'seq1'}),
                         skbio.DNA('AAAA', metadata={'id': 'seq2'}))))

        with tempfile.TemporaryDirectory() as output_dir:
            tabulate_seqs(output_dir, seqs)

            expected_fp = os.path.join(output_dir, 'index.html')
            self.assertTrue(os.path.exists(expected_fp))
            self.assertTrue('ACGT</a>' in open(expected_fp).read())
            self.assertTrue('<td>seq2</td>' in open(expected_fp).read())
Beispiel #24
0
    def test_join_contigs(self):
        sequences = [
            skbio.DNA('AATTGG'),
            skbio.DNA('CCTTAA'),
            skbio.DNA('ATAT')
        ]

        #                    0123456789012345
        exp_seq = skbio.DNA('AATTGGCCTTAAATAT')
        exp_breaks = np.array([0, 6, 12, 16])
        obs_seq, obs_breaks = join_contigs(sequences)
        self.assertEqual(obs_seq, exp_seq)
        npt.assert_equal(obs_breaks, exp_breaks)
Beispiel #25
0
 def setUp(self):
     self.seq_block = [
         pd.DataFrame(data=[list('CATS'),
                            list('WANT'),
                            list("CANS")],
                      index=['0', '1', '2'])
     ]
     self.skbio_series = pd.Series(
         data={
             "0": skbio.DNA('CATS', metadata={'id': '0'}),
             "1": skbio.DNA('WANT', metadata={'id': '1'}),
             "2": skbio.DNA('CANS', metadata={'id': '2'}),
         })
     self.seq_artifact = Artifact.import_data('FeatureData[Sequence]',
                                              self.skbio_series, pd.Series)
Beispiel #26
0
 def _validate_seq(self, seq):
     if seq:
         # Will raise a `ValueError` on invalid DNA characters.
         skbio.DNA(seq, validate=True)
     else:
         # Empty sequence.
         raise Exception()
Beispiel #27
0
    def test_descriptive_stats_integration(self):
        seqs = DNAIterator(skbio.DNA(a, metadata=b)for a, b in (
            ('A', {'id': 'seq01'}),
            ('AA', {'id': 'seq02'}),
            ('AAA', {'id': 'seq03'}),
            ('AAAA', {'id': 'seq04'}),
            ('AAAA', {'id': 'seq05'}),
            ('AAA', {'id': 'seq06'}),
            ('AA', {'id': 'seq07'}),
            ('AAAAAAAAAA', {'id': 'seq08'})))

        with tempfile.TemporaryDirectory() as output_dir:
            tabulate_seqs(output_dir, seqs)

            expected_fp = os.path.join(output_dir, 'index.html')

        # all expected values are unique. If they all render in index.html, our
        # function likely worked as expected.
            with open(expected_fp) as fh:
                file_text = fh.read()
                self.assertTrue('<td>8</td>' in file_text)
                self.assertTrue('<td>1</td>' in file_text)
                self.assertTrue('<td>10</td>' in file_text)
                self.assertTrue('<td>3.62</td>' in file_text)
                self.assertTrue('<td>9</td>' in file_text)
                self.assertTrue('<td>1</td>' in file_text)
                self.assertTrue('<td>1</td>' in file_text)
                self.assertTrue('<td>2</td>' in file_text)
                self.assertTrue('<td>3</td>' in file_text)
                self.assertTrue('<td>4</td>' in file_text)
                self.assertTrue('<td>6</td>' in file_text)
                self.assertTrue('<td>9</td>' in file_text)
Beispiel #28
0
def _16(data: pd.Series) -> DNAFASTAFormat:
    ff = DNAFASTAFormat()
    with ff.open() as f:
        for id_, seq in data.iteritems():
            sequence = skbio.DNA(seq, metadata={'id': id_})
            skbio.io.write(sequence, format='fasta', into=f)
    return ff
Beispiel #29
0
def _get_featuredata_from_table(table):
    """Extract the observations and interpret as skbio.DNA"""
    if table.is_empty():
        raise ValueError("No features")

    it = (skbio.DNA(i, metadata={'id': i})
          for i in table.ids(axis='observation'))
    return DNAIterator(it)
Beispiel #30
0
    def test_sample_from_contig_set_one_short(self):
        randfunc = np.random.randint

        np.random.seed(1234)
        # An integration test
        #                       0123456789012345678901234567890123456789
        sequences = [
            skbio.DNA('ATGCAATTGGCCAAATTTGGGCCCAAAATTTTGGGGCCCC'),
            skbio.DNA('CGTACCGGTT')
        ]

        depth = 100
        length = 15

        obs = sample_from_contig_set(sequences, depth, length, randfunc)

        self.assertEqual(depth, len(obs))