Python parse_sam Examples, hiclib.mapping.parse_sam Python Examples

Example #1

0

Show file

File: imakaev2012nm.py Project: huxihao/BNMF

def step1(hiclib_path, ## the path of hiclib folder on machine
          dataset='Kalhor2012NB', 
          sraid = 'SRR071231', 
          readlen = 40): ## each read with length 40
    ''' 1. Map reads to the genome
        http://mirnylab.bitbucket.org/hiclib/tutorial/01_iterative_mapping.html
    '''

    ## Adopted from hiclib tutorial
    import os
    import logging
    from hiclib import mapping
    from mirnylib import h5dict, genome

    logging.basicConfig(level=logging.DEBUG)

    # A. Map the reads iteratively.
    mapping.iterative_mapping(
        bowtie_path=hiclib_path+'/bin/bowtie2/bowtie2',
        bowtie_index_path=hiclib_path+'/bin/bowtie2/index/hg19',
        fastq_path='../data/SRA/'+dataset+'/'+sraid+'/'+sraid+'.sra',
        out_sam_path='../data/SRA/'+sraid+'_1.bam',
        min_seq_len=25,
        len_step=5,
        seq_start=0,
        seq_end=readlen,
        nthreads=12, # on intel corei7 CPUs 4 threads are as fast as
                     # 8, but leave some room for you other applications
        #max_reads_per_chunk = 10000000,  #optional, on low-memory machines
        temp_dir='../data/SRA/',  # optional, keep temporary files here
        bowtie_flags='--very-sensitive',
        bash_reader=hiclib_path+'/bin/sra/bin/fastq-dump -Z')

    mapping.iterative_mapping(
        bowtie_path=hiclib_path+'/bin/bowtie2/bowtie2',
        bowtie_index_path=hiclib_path+'/bin/bowtie2/index/hg19',
        fastq_path='../data/SRA/'+dataset+'/'+sraid+'/'+sraid+'.sra',
        out_sam_path='../data/SRA/'+sraid+'_2.bam',
        min_seq_len=25,
        len_step=5,
        seq_start=readlen,
        seq_end=2*readlen,
        nthreads=12,  
        #max_reads_per_chunk = 10000000, 
        temp_dir='../data/SRA/',  
        bowtie_flags='--very-sensitive',
        bash_reader=hiclib_path+'/bin/sra/bin/fastq-dump -Z')

    # B. Parse the mapped sequences into a Python data structure,
    #    assign the ultra-sonic fragments to restriction fragments.
    mapped_reads = h5dict.h5dict(sraid + '_mapped_reads.hdf5') ## to local folder
    genome_db    = genome.Genome(hiclib_path+'/fasta/hg19', readChrms=['#', 'X'])

    mapping.parse_sam(
        sam_basename1='../data/SRA/'+sraid+'_1.bam',
        sam_basename2='../data/SRA/'+sraid+'_2.bam',
        out_dict=mapped_reads,
        genome_db=genome_db, 
        enzyme_name='HindIII')

Example #2

0

Show file

def collectMappedReads(bam_read1, bam_read2, mapped_reads, genome_db):
    global options
    global args

    mapping.parse_sam(sam_basename1=bam_read1,
                      sam_basename2=bam_read2,
                      out_dict=mapped_reads,
                      genome_db=genome_db,
                      enzyme_name=options.enzyme)

Example #3

0

Show file

File: hiclibMapping.py Project: BauerLab/ngsane

def collectMappedReads(bam_read1, bam_read2, mapped_reads, genome_db):
	global options
	global args
	
	mapping.parse_sam(
	    sam_basename1=bam_read1,
	    sam_basename2=bam_read2,
	    out_dict=mapped_reads,
	    genome_db=genome_db, 
	    enzyme_name=options.enzyme)

Example #4

0

Show file

File: 01_mapData.py Project: labdevgen/FishHiC

    def doOne(inData, saveSams=True):
        file1, file2, outfile = inData
        print("Mapping {0} and {1} into {2}".format(*inData))

        for onefile in file1, file2:
            a = gzip.open(onefile, 'r')
            a.readline()
            length = len(a.readline()) - 1
            if length < 10:
                raise ValueError(
                    "Length of your sequence is {0}. Something is wrong".
                    format(length))
            minlen, step = calculateStep(length - seqSkipStart, minMapLen)

            mapping.iterative_mapping(
                bowtie_path=bowtiePath,
                bowtie_index_path=bowtieIndex,
                fastq_path=onefile,
                out_sam_path=os.path.join(samFolder,
                                          os.path.split(onefile)[1] + ".sam"),
                seq_start=seqSkipStart,
                min_seq_len=
                minlen,  # for bacteria mimimal mappable length is 15 bp, so I start with something slightly longer
                len_step=step,  # and go with a usualy step
                nthreads=
                threads,  # on intel corei7 CPUs 4 threads are as fast as
                # 8, but leave some room for you other applications
                # max_reads_per_chunk = 10000000,  #optional, on low-memory machines
                temp_dir=tmpDir,
                bowtie_flags=bowtieFlags,
            )

        os.remove(file1)
        os.remove(file2)

        # Second step. Parse the mapped sequences into a Python data structure,
        #    assign the ultra-sonic fragments to restriction fragments.
        mapped_reads = h5dict.h5dict(outfile)
        sf1, sf2 = [
            os.path.join(samFolder,
                         os.path.split(onefile)[1] + ".sam")
            for onefile in [file1, file2]
        ]
        mapping.parse_sam(sam_basename1=sf1,
                          sam_basename2=sf2,
                          out_dict=mapped_reads,
                          genome_db=genome_db,
                          save_seqs=False,
                          maxReads=int(chunkSize * 1.6),
                          IDLen=50)
        for i in os.listdir(samFolder):
            if ((os.path.split(file1)[1] in i) or
                (os.path.split(file2)[1] in i)) and not saveSams:
                print("deleting", i)
                os.remove(os.path.join(samFolder, i))

Example #5

0

Show file

File: 02_2_parse_sams.py Project: Phlya/Hi-CSGE

def func():
    mapping.parse_sam(
        sam_basename1='/exports/eddie/scratch/s1529682/bams/'+basename+'_fixed_1.fq.gz'+chunk,
        sam_basename2='/exports/eddie/scratch/s1529682/bams/'+basename+'_fixed_2.fq.gz'+chunk,
        out_dict=mapped_reads,
        genome_db=genome_db, 
        enzyme_name='DpnII')
    fragments = fragmentHiC.HiCdataset(
        filename=fragments_file,
        genome=genome_db,
        maximumMoleculeLength=700,
        mode='w')
    
    # Load the parsed reads into the HiCdataset. The dangling-end filter is applied
    # at this stage, with maximumMoleculeLength specified at the initiation of the 
    # object.
    fragments.parseInputData(dictLike=reads_file)

Example #6

0

Show file

File: align_reads_hiclib.py Project: lzamparo/HiC-align

def map_reads(first_fq, second_fq, outfile, nice):

    # set the niceness of this sub-process:
    os.nice(nice)

    first_sam = first_fq.split(".fastq.gz")[0] + ".sam"
    second_sam = second_fq.split(".fastq.gz")[0] + ".sam"

    # map the first fastq file -> sam file
    length = check_len(first_fq)
    min_len, step_size = calculate_step(length - seq_skip_start, min_map_len)
    mapping.iterative_mapping(
        bowtie_path=bowtie_path,
        bowtie_index_path=bowtie_index,
        fastq_path=first_fq,
        out_sam_path=os.path.join(args.samdir, first_sam),
        min_seq_len=min_len,
        len_step=step_size,
        seq_start=seq_skip_start,
        nthreads=threads,
        bowtie_flags=bowtie_flags)

    # map the second fastq file -> sam file
    length = check_len(second_fq)
    min_len, step_size = calculate_step(length - seq_skip_start, min_map_len)
    mapping.iterative_mapping(
        bowtie_path=bowtie_path,
        bowtie_index_path=bowtie_index,
        fastq_path=second_fq,
        out_sam_path=os.path.join(args.samdir, second_sam),
        min_seq_len=min_len,
        len_step=step_size,
        seq_start=seq_skip_start,
        nthreads=threads,
        bowtie_flags=bowtie_flags)

    # parse the mapped sequences into a the hdf5 dict structure,
    # assign the ultra-sonic fragments to restriction fragments. <- what the hell does this even mean?
    out_dict = os.path.join(args.samdir, outfile)
    mapped_reads = h5dict.h5dict(out_dict)
    sf1, sf2 = [os.path.join(args.samdir, first_sam), os.path.join(args.samdir, second_sam)]
    mapping.parse_sam(sam_basename1=sf1, sam_basename2=sf2,
                      out_dict=mapped_reads, genome_db=genome_db, save_seqs=False, maxReads=10000000, IDLen=50,
                      enzyme_name='HindIII')

Example #7

0

Show file

def parse_bams(chromosome_names, cell_line, path, genome_version, enzyme):

    if not os.path.exists(path + 'maps/' + cell_line):
        os.mkdir(path + 'maps/' + cell_line)

    for chrm_list in chromosome_names:

        if len(chrm_list) > 1:
            mapped_reads = h5dict.h5dict(path + 'maps/' + cell_line +  '/mapped_reads_full.hdf5')
        else:
            mapped_reads = h5dict.h5dict(path + 'maps/' + cell_line +  '/mapped_reads_' + chrm_list[0] + '.hdf5')
        
        genome_db = genome.Genome('/home/magnitov/data/genomes/' + genome_version, gapFile = 'gap.txt' , readChrms = chrm_list, forceOrder = True)

        mapping.parse_sam(
            sam_basename1 = path + 'bam/' + cell_line + '/' + cell_line + '_R1.bam',
            sam_basename2 = path + 'bam/' + cell_line + '/' + cell_line + '_R2.bam',
            out_dict = mapped_reads,
            genome_db = genome_db,
            enzyme_name = enzyme)

Example #8

0

Show file

#   temp_dir=tmp_folder,  # optional, keep temporary files here
#   bowtie_flags='--very-sensitive',
#   bash_reader=None)#../../bin/sra/bin/fastq-dump -Z')

#mapping.iterative_mapping(
#    bowtie_path='../bin/bowtie2/bowtie2',
#    bowtie_index_path='../bin/bowtie2/index/'+genome_name,
#    fastq_path=FASTQ_fpath,
#    out_sam_path=out_sam_fpath+'_2.bam',
#    min_seq_len=25,
#    len_step=5,
#    seq_start=40,
#    seq_end=79,
#    nthreads=8,
##    max_reads_per_chunk = 10000000,
#    temp_dir=tmp_folder,
#    bowtie_flags='--very-sensitive',
#    bash_reader=None)#../../bin/sra/bin/fastq-dump -Z')

# B. Parse the mapped sequences into a Python data structure,
#    assign the ultra-sonic fragments to restriction fragments.
mapped_reads = h5dict.h5dict(maped_reads_filepath)
genome_db = genome.Genome('../fasta/' + genome_name, readChrms=['#', 'X', 'M'])

mapping.parse_sam(sam_basename1=out_sam_fpath + '_1.bam',
                  sam_basename2=out_sam_fpath + '_2.bam',
                  out_dict=mapped_reads,
                  genome_db=genome_db,
                  enzyme_name='MboI',
                  save_seqs=True)

Example #9

0

Show file

fasta_dir, re_name, out_fname, in_dir = sys.argv[1:5]
in_prefices = sys.argv[5:]
basedir = os.path.split(os.path.abspath(out_fname))[0]

mapped_reads = []
for prefix in in_prefices:
    mapped_reads.append(h5dict.h5dict('%s/%s.hdf5' % (basedir, prefix)))
genome_db = genome.Genome(fasta_dir,
                          readChrms=['#', 'X'],
                          chrmFileTemplate="%s.fa")

for i, name in enumerate(mapped_reads):
    mapping.parse_sam(sam_basename1="%s/%s_1.bam" % (in_dir, in_prefices[i]),
                      sam_basename2="%s/%s_2.bam" % (in_dir, in_prefices[i]),
                      out_dict=name,
                      genome_db=genome_db,
                      enzyme_name=re_name)

for i, name in enumerate(mapped_reads):
    fragments = fragmentHiC.HiCdataset(filename='temp',
                                       genome=genome_db,
                                       maximumMoleculeLength=500,
                                       mode='w',
                                       enzymeName=re_name,
                                       inMemory=True)
    fragments.parseInputData(dictLike="%s/%s.hdf5" % (basedir, prefix))
    if i != len(mapped_reads) - 1:
        fragments.save("%s/%s_data.hdf5" % (basedir, prefix))
    else:
        frag_files = []

Example #10

0

Show file

File: 01_mESC2.py Project: labdevgen/FishHiC

    #max_reads_per_chunk = 10000000,  #optional, on low-memory machines
    temp_dir=tmp_folder,  # optional, keep temporary files here
    bowtie_flags='--very-sensitive',
    bash_reader='../../bin/sra/bin/fastq-dump -Z')

mapping.iterative_mapping(
    bowtie_path='../../bin/bowtie2/bowtie2',
    bowtie_index_path='../../bin/bowtie2/index/' + genome_name,
    fastq_path=FASTQ_fpath,
    out_sam_path=out_sam_fpath + '_2.bam',
    min_seq_len=25,
    len_step=5,
    seq_start=50,
    seq_end=99,
    nthreads=8,
    #max_reads_per_chunk = 10000000,
    temp_dir=tmp_folder,
    bowtie_flags='--very-sensitive',
    bash_reader='../../bin/sra/bin/fastq-dump -Z')

# B. Parse the mapped sequences into a Python data structure,
#    assign the ultra-sonic fragments to restriction fragments.
mapped_reads = h5dict.h5dict(maped_reads_filepath)
genome_db = genome.Genome('../../fasta/' + genome_name, readChrms=['#', 'X'])

mapping.parse_sam(sam_basename1=out_sam_fpath + '_1.bam',
                  sam_basename2=out_sam_fpath + '_2.bam',
                  out_dict=mapped_reads,
                  genome_db=genome_db,
                  enzyme_name='HindIII')

Example #11

0

Show file

File: 01_iterative_mapping.py Project: bxlab/HiFive_Paper

        temp_dir='tmp',  # optional, keep temporary files here
        bowtie_flags='--very-sensitive')

    mapping.iterative_mapping(
        bowtie_path=bowtiePath,
        bowtie_index_path=bowtieIndex,
        fastq_path=file2,
        out_sam_path='sams/%s_2.bam' % expName,
        min_seq_len=10,
        len_step=3,
        seq_start=0,
        seq_end=40,
        nthreads=4,  # on intel corei7 CPUs 4 threads are as fast as
                     # 8, but leave some room for you other applications
        #max_reads_per_chunk = 10000000,  #optional, on low-memory machines
        temp_dir='tmp',  # optional, keep temporary files here
        bowtie_flags='--very-sensitive')

    # B. Parse the mapped sequences into a Python data structure,
    #    assign the ultra-sonic fragments to restriction fragments.
    mapped_reads = h5dict.h5dict('caul/%s' % expName)
    genome_db    = genome.Genome('../data/caul', chrmFileTemplate="%s.fa", readChrms=[])

    mapping.parse_sam(
        sam_basename1='sams/%s_1.bam' % expName,
        sam_basename2='sams/%s_2.bam' % expName,
        out_dict=mapped_reads,
        genome_db=genome_db, 
        enzyme_name='BglII')

Example #12

0

Show file

File: complete.py Project: labdevgen/FishHiC

import os
import logging
from hiclib import mapping
from mirnylib import h5dict, genome

logging.basicConfig(level=logging.DEBUG)

# B. Parse the mapped sequences into a Python data structure,
#    assign the ultra-sonic fragments to restriction fragments.
mapped_reads_Sp1 = h5dict.h5dict('../../data/serov/mapped_reads_Sp1.hdf5')
genome_db    = genome.Genome('../../fasta/mm10', readChrms=['#', 'X'])

mapping.parse_sam(
    sam_basename1='../../data/serov/HiC_Sp1_1.bam',
    sam_basename2='../../data/serov/HiC_Sp1_2.bam',
    out_dict=mapped_reads_Sp1,
    genome_db=genome_db, 
    enzyme_name='HindIII')

Example #13

0

Show file

def step1(
        hiclib_path,  ## the path of hiclib folder on machine
        dataset='Kalhor2012NB',
        sraid='SRR071231',
        readlen=40):  ## each read with length 40
    ''' 1. Map reads to the genome
        http://mirnylab.bitbucket.org/hiclib/tutorial/01_iterative_mapping.html
    '''

    ## Adopted from hiclib tutorial
    import os
    import logging
    from hiclib import mapping
    from mirnylib import h5dict, genome

    logging.basicConfig(level=logging.DEBUG)

    # A. Map the reads iteratively.
    mapping.iterative_mapping(
        bowtie_path=hiclib_path + '/bin/bowtie2/bowtie2',
        bowtie_index_path=hiclib_path + '/bin/bowtie2/index/hg19',
        fastq_path='../data/SRA/' + dataset + '/' + sraid + '/' + sraid +
        '.sra',
        out_sam_path='../data/SRA/' + sraid + '_1.bam',
        min_seq_len=25,
        len_step=5,
        seq_start=0,
        seq_end=readlen,
        nthreads=12,  # on intel corei7 CPUs 4 threads are as fast as
        # 8, but leave some room for you other applications
        #max_reads_per_chunk = 10000000,  #optional, on low-memory machines
        temp_dir='../data/SRA/',  # optional, keep temporary files here
        bowtie_flags='--very-sensitive',
        bash_reader=hiclib_path + '/bin/sra/bin/fastq-dump -Z')

    mapping.iterative_mapping(
        bowtie_path=hiclib_path + '/bin/bowtie2/bowtie2',
        bowtie_index_path=hiclib_path + '/bin/bowtie2/index/hg19',
        fastq_path='../data/SRA/' + dataset + '/' + sraid + '/' + sraid +
        '.sra',
        out_sam_path='../data/SRA/' + sraid + '_2.bam',
        min_seq_len=25,
        len_step=5,
        seq_start=readlen,
        seq_end=2 * readlen,
        nthreads=12,
        #max_reads_per_chunk = 10000000,
        temp_dir='../data/SRA/',
        bowtie_flags='--very-sensitive',
        bash_reader=hiclib_path + '/bin/sra/bin/fastq-dump -Z')

    # B. Parse the mapped sequences into a Python data structure,
    #    assign the ultra-sonic fragments to restriction fragments.
    mapped_reads = h5dict.h5dict(sraid +
                                 '_mapped_reads.hdf5')  ## to local folder
    genome_db = genome.Genome(hiclib_path + '/fasta/hg19',
                              readChrms=['#', 'X'])

    mapping.parse_sam(sam_basename1='../data/SRA/' + sraid + '_1.bam',
                      sam_basename2='../data/SRA/' + sraid + '_2.bam',
                      out_dict=mapped_reads,
                      genome_db=genome_db,
                      enzyme_name='HindIII')

Example #14

0

Show file

File: 01_iterative_mapping.py Project: bxlab/HiFive_Paper

    #max_reads_per_chunk = 10000000,  #optional, on low-memory machines
    temp_dir='../../data/sample/tmp',  # optional, keep temporary files here
    bowtie_flags='--very-sensitive',
    bash_reader='../../bin/sra/bin/fastq-dump -Z')

mapping.iterative_mapping(
    bowtie_path='../../bin/bowtie2/bowtie2',
    bowtie_index_path='../../bin/bowtie2/index/hg19',
    fastq_path='../../data/sample/SRR027956.sra',
    out_sam_path='../../data/sample/SRR027056_2.bam',
    min_seq_len=25,
    len_step=5,
    seq_start=76,
    seq_end=151,
    nthreads=4,
    #max_reads_per_chunk = 10000000,
    temp_dir='../../data/sample/tmp',
    bowtie_flags='--very-sensitive',
    bash_reader='../../bin/sra/bin/fastq-dump -Z')

# B. Parse the mapped sequences into a Python data structure,
#    assign the ultra-sonic fragments to restriction fragments.
mapped_reads = h5dict.h5dict('../../data/sample/mapped_reads.hdf5')
genome_db = genome.Genome('../../fasta/hg19', readChrms=['#', 'X'])

mapping.parse_sam(sam_basename1='../../data/sample/SRR027056_1.bam',
                  sam_basename2='../../data/sample/SRR027056_2.bam',
                  out_dict=mapped_reads,
                  genome_db=genome_db,
                  enzyme_name='HindIII')

Example #15

0

Show file

File: hiclib_data.py Project: bxlab/HiFive_Paper

from hiclib import mapping, fragmentHiC
from mirnylib import h5dict, genome

fasta_dir, re_name, out_fname, in_dir = sys.argv[1:5]
in_prefices = sys.argv[5:]
basedir = os.path.split(os.path.abspath(out_fname))[0]

mapped_reads = []
for prefix in in_prefices:
    mapped_reads.append(h5dict.h5dict('%s/%s.hdf5' % (basedir, prefix)))
genome_db = genome.Genome(fasta_dir, readChrms=['#', 'X'], chrmFileTemplate="%s.fa")

for i, name in enumerate(mapped_reads):
    mapping.parse_sam(
        sam_basename1="%s/%s_1.bam" % (in_dir, in_prefices[i]),
        sam_basename2="%s/%s_2.bam" % (in_dir, in_prefices[i]),
        out_dict=name,
        genome_db=genome_db, 
        enzyme_name=re_name)

for i, name in enumerate(mapped_reads):
    fragments = fragmentHiC.HiCdataset(
        filename='temp',
        genome=genome_db,
        maximumMoleculeLength=500,
        mode='w',
        enzymeName=re_name,
        inMemory=True)
    fragments.parseInputData(dictLike="%s/%s.hdf5" % (basedir, prefix))
    if i != len(mapped_reads) - 1:
        fragments.save("%s/%s_data.hdf5" % (basedir, prefix))
    else:

Example #16

0

Show file

File: parse_results.py Project: khigashi1987/NGSHandson2017

#!/usr/bin/env python

import logging
from hiclib import mapping
from mirnylib import h5dict, genome

logging.basicConfig(level=logging.DEBUG)

mapped_reads = h5dict.h5dict('./mapped_reads.hdf5')
genome_db = genome.Genome('../Ref/hg19', readChrms=['#','X'])

mapping.parse_sam(
    sam_basename1='../data/SRR1658595_10M_1.bam',
    sam_basename2='../data/SRR1658595_10M_2.bam',
    out_dict=mapped_reads,
    genome_db=genome_db,
    enzyme_name='MboI')

Example #17

0

Show file

import sys

from hiclib import mapping, fragmentHiC
from mirnylib import h5dict, genome

basedir = sys.argv[1]

mapped_reads1 = h5dict.h5dict('%s/Data/Timing/mapped_reads1.hdf5' % basedir)
mapped_reads2 = h5dict.h5dict('%s/Data/Timing/mapped_reads2.hdf5' % basedir)
mapped_reads3 = h5dict.h5dict('%s/Data/Timing/mapped_reads3.hdf5' % basedir)
genome_db    = genome.Genome('%s/Data/Genome/mm9_fasta' % basedir, readChrms=['1'], chrmFileTemplate="%s.fa")

mapping.parse_sam(
    sam_basename1='%s/Data/Timing/SRR443886_sub_1.bam' % basedir,
    sam_basename2='%s/Data/Timing/SRR443886_sub_2.bam' % basedir,
    out_dict=mapped_reads1,
    genome_db=genome_db, 
    enzyme_name='NcoI')

mapping.parse_sam(
    sam_basename1='%s/Data/Timing/SRR443887_sub_1.bam' % basedir,
    sam_basename2='%s/Data/Timing/SRR443887_sub_2.bam' % basedir,
    out_dict=mapped_reads2,
    genome_db=genome_db, 
    enzyme_name='NcoI')

mapping.parse_sam(
    sam_basename1='%s/Data/Timing/SRR443888_sub_1.bam' % basedir,
    sam_basename2='%s/Data/Timing/SRR443888_sub_2.bam' % basedir,
    out_dict=mapped_reads3,
    genome_db=genome_db,

Example #18

0

Show file

File: 01_iterative_mapping.py Project: bxlab/HiFive_Paper

    )

    mapping.iterative_mapping(
        bowtie_path=bowtiePath,
        bowtie_index_path=bowtieIndex,
        fastq_path=file1,
        out_sam_path='{0}/{1}_2.bam'.format(samFolder, expName),
        min_seq_len=minlen,
        len_step=step,
        nthreads=threads,  # on intel corei7 CPUs 4 threads are as fast as
        # 8, but leave some room for you other applications
        # max_reads_per_chunk = 10000000,  #optional, on low-memory machines
        temp_dir=tmpDir,
        seq_start=length,
        seq_end=2 * length,
        bash_reader="fastq-dump -Z",
        bowtie_flags=" --very-sensitive ",
    )

    # Second step. Parse the mapped sequences into a Python data structure,
    #    assign the ultra-sonic fragments to restriction fragments.
    mapped_reads = h5dict.h5dict(finalName)

    mapping.parse_sam(sam_basename1='{0}/{1}_1.bam'.format(samFolder, expName),
                      sam_basename2='{0}/{1}_2.bam'.format(samFolder, expName),
                      out_dict=mapped_reads,
                      genome_db=genome_db,
                      save_seqs=False)

    os.remove(lockName)

Example #19

0

Show file

File: 01_iterative_mapping.py Project: bxlab/HiFive_Paper

    mapping.iterative_mapping(
        bowtie_path=bowtiePath,
        bowtie_index_path=bowtieIndex,
        fastq_path=file1,
        out_sam_path='{0}/{1}_2.bam'.format(samFolder, expName),
        min_seq_len=minlen,
        len_step=step,
        nthreads=threads,  # on intel corei7 CPUs 4 threads are as fast as
                     # 8, but leave some room for you other applications
        # max_reads_per_chunk = 10000000,  #optional, on low-memory machines
        temp_dir=tmpDir,
        seq_start=length,
        seq_end=2 * length,
        bash_reader="fastq-dump -Z",
        bowtie_flags=" --very-sensitive ",
        )

    # Second step. Parse the mapped sequences into a Python data structure,
    #    assign the ultra-sonic fragments to restriction fragments.
    mapped_reads = h5dict.h5dict(finalName)

    mapping.parse_sam(
        sam_basename1='{0}/{1}_1.bam'.format(samFolder, expName),
        sam_basename2='{0}/{1}_2.bam'.format(samFolder, expName),
        out_dict=mapped_reads,
        genome_db=genome_db,
	save_seqs=False)

    os.remove(lockName)