Esempio n. 1
0
def test_small_input():
    err_mod = kde.KDErrorModel('data/ecoli.npz')
    ref_genome = SeqRecord(
        Seq(str('AAAAACCCCC'),
            IUPAC.unambiguous_dna
            ),
        id='my_genome',
        description='test genome'
    )
    generator.simulate_read(ref_genome, err_mod, 1, 0)
Esempio n. 2
0
def test_kde_short():
    if sys.version_info > (3, ):
        random.seed(42)
        np.random.seed(42)
        err_mod = kde.KDErrorModel('data/ecoli.npz')
        ref_genome = SeqRecord(Seq(str('AAACC' * 100), IUPAC.unambiguous_dna),
                               id='my_genome',
                               description='test genome')
        read_tuple = generator.simulate_read(ref_genome, err_mod, 1)
        big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
        assert big_read == 'ACCAAACCAAACCAAACCAAGGTTTGGTTTGGTTTGGTGT'
Esempio n. 3
0
def test_basic():
    if sys.version_info > (3, ):
        random.seed(42)
        np.random.seed(42)
        err_mod = basic.BasicErrorModel()
        ref_genome = SeqRecord(Seq(str('AAAAACCCCC' * 100),
                                   IUPAC.unambiguous_dna),
                               id='my_genome',
                               description='test genome')
        read_tuple = generator.simulate_read(ref_genome, err_mod, 1)
        big_read = ''.join(str(read_tuple[0].seq) + str(read_tuple[1].seq))
        assert big_read[-15:] == 'TTTTGGGGGTTTTTG'
Esempio n. 4
0
                                                 sigma=community['stdev_log_abund'])
    community['sim_abund'].to_csv(abund_path, sep='\t')
    replicon['sim_copies'] = (replicon['genome_id'].map(community['sim_abund']) *
                              replicon['replicon_type']
                                      .map({'chromosome': 1,
                                            'plasmid': PLASMID_MULTI})
                                      .fillna(1))
    replicon['sim_nucleotides'] = replicon['sim_copies'] * replicon['size']
    replicon['sim_pread'] = (replicon['sim_nucleotides'] /
                             replicon['sim_nucleotides'].sum())
    replicon['sim_nreads'] = np.random.multinomial(nreads, replicon['sim_pread'])
    print(replicon[['genbank_id', 'size', 'sim_nreads']], file=sys.stderr)

    print('Simulating metagenome reads.', file=sys.stderr)
    model_path = os.path.join(iss.__path__[0], 'profiles', ERR_MODEL_NAME)
    err_model = KDErrorModel(model_path)
    with open(r1_path, 'w') as handle1, open(r2_path, 'w') as handle2:
        reads_so_far = 0
        for genome_id, g in replicon.groupby('genome_id'):
            print(f'Simulating reads for genome {genome_id}', file=sys.stderr)
            genome_path = os.path.join(genome_dir, genome_id + '.fn')
            seqs = seq_file_index(genome_path, 'fasta')
            for replicon_id, r in g.iterrows():
                print(f'Simulating reads for replicon {replicon_id}', file=sys.stderr)
                record = seqs[r.genbank_id]
                for i in tqdm(range(r.sim_nreads), initial=reads_so_far, total=nreads):
                    paired_reads = simulate_read(record, err_model, i)
                    write_seq(paired_reads[0], handle1, 'fastq-sanger')
                    write_seq(paired_reads[1], handle2, 'fastq-sanger')
                reads_so_far += r.sim_nreads