Beispiel #1
0
def run(acc, splitNum, splitNo):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # compute window to iterate through
        MAX_ROW = run.getReadCount()
        chunk = MAX_ROW / splitNum
        first = int(round(chunk * (splitNo - 1)))
        next_first = int(round(chunk * (splitNo)))
        if next_first > MAX_ROW:
            next_first = MAX_ROW

        # start iterator on reads
        with run.getReadRange(first + 1, next_first - first, Read.all) as it:
            i = 0
            while it.nextRead():
                i += 1
                print(it.getReadId())
                # iterate through fragments
                while it.nextFragment():
                    bases = it.getFragmentBases()
                    if bases:
                        print("\t{} - {}".format(
                            bases,
                            "aligned" if it.isAligned() else "unaligned"))
                print("\n")
            print("Read {} spots for {}".format(i, run_name))
Beispiel #2
0
def run(acc, splitNum, splitNo):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # compute window to iterate through
        MAX_ROW = run.getReadCount()
        chunk = MAX_ROW/splitNum
        first = int(round(chunk*(splitNo-1)))
        next_first = int(round(chunk*(splitNo)))
        if next_first > MAX_ROW:
            next_first = MAX_ROW

        # start iterator on reads
        with run.getReadRange(first+1, next_first-first, Read.all) as it:
            i = 0
            while it.nextRead():
                i += 1
                print (it.getReadId())
                # iterate through fragments
                while it.nextFragment():
                    bases = it.getFragmentBases()
                    if bases:
                        print ("\t{} - {}".format(bases, "aligned" if it.isAligned() else "unaligned"))
                print ("\n")
            print ("Read {} spots for {}".format(i,  run_name))
Beispiel #3
0
def run(acc, splitNum, splitNo):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()
        
        # compute window to iterate through
        MAX_ROW = run.getAlignmentCount()
        chunk = MAX_ROW / splitNum
        first = int(round(chunk * (splitNo-1)))
        next_first = int(round(chunk * (splitNo)))
        if next_first > MAX_ROW:
            next_first = MAX_ROW

        # start iterator on reads
        with run.getAlignmentRange(first+1, next_first-first, Alignment.primaryAlignment) as it:
            i = 0
            while it.nextAlignment():
                print ("{}\t{}\t{}\t{}\t{}\t{}".format(it.getReadId(),
                    it.getReferenceSpec(),
                    it.getAlignmentPosition(),
                    it.getShortCigar(False),
                    it.getFragmentBases(),
                    ("aligned" if it.isAligned() else "unaligned"),
                ))
                i += 1
            print ("Read {} alignments for {}".format(i, run_name))
Beispiel #4
0
def sra_reader(accn, batch_size=1000, max_reads=None):
    """Iterates through a read collection for a given accession number using
    the ngs-lib python bindings.
    
    Args:
        accn: The accession number
        batch_size: The maximum number of reads to request in each call to SRA
        max_reads: The total number of reads to process, or all reads in the
            SRA run if None
    
    Yields:
        Each pair of reads (see ``sra_read_pair``)
    """
    with NGS.openReadCollection(accn) as run:
        run_name = run.getName()
        read_count = run.getReadCount()
        if max_reads:
            max_reads = min(read_count, max_reads)
        else:
            max_reads = read_count
        for batch_num, first_read in enumerate(range(1, max_reads,
                                                     batch_size)):
            cur_batch_size = min(batch_size, max_reads - first_read + 1)
            with run.getReadRange(first_read, cur_batch_size,
                                  Read.all) as read:
                for read_idx in range(cur_batch_size):
                    read.nextRead()
                    yield sra_read_pair(read)
Beispiel #5
0
 def test_ReadGroupIterator_ThrowBeforeNext(self):
     it = NGS.openReadCollection(PrimaryOnly).getReadGroups()
     try:
         it.getName()
         self.fail()
     except ErrorMsg:
         pass
Beispiel #6
0
def run(acc, splitNum, splitNo): # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # compute window to iterate through
    MAX_ROW = run.getReadCount()
    chunk = MAX_ROW/splitNum
    first = int(round(chunk*(splitNo-1)))
    next_first = int(round(chunk*(splitNo)))
    if next_first > MAX_ROW:
        next_first = MAX_ROW

    # start iterator on reads
    it = run.getReadRange(first+1, next_first-first, Read.all)
    i = 0
    while it.nextRead():
        i += 1
        print (it.getReadId())
        # iterate through fragments
        while it.nextFragment():
            bases = it.getFragmentBases()
            if bases:
                print ("\t" + bases + " - " + ("aligned" if it.isAligned() else "unaligned"))
        print ("\n")
    print ("Read {} spots for {}".format(i,  run_name))
Beispiel #7
0
def run(acc, splitNum, splitNo):  # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # compute window to iterate through
    MAX_ROW = run.getAlignmentCount()
    chunk = MAX_ROW / splitNum
    first = int(round(chunk * (splitNo - 1)))
    next_first = int(round(chunk * (splitNo)))
    if next_first > MAX_ROW:
        next_first = MAX_ROW

    # start iterator on reads
    it = run.getAlignmentRange(first + 1, next_first - first, Alignment.primaryAlignment)
    i = 0
    while it.nextAlignment():
        print(
            it.getReadId()
            + "\t"
            + it.getReferenceSpec()
            + "\t"
            + str(it.getAlignmentPosition())
            + "\t"
            + it.getShortCigar(False)
            + "\t"
            + it.getFragmentBases()
            + "\t"
            + ("aligned" if it.isAligned() else "unaligned")
        )
        i += 1
    print("Read {} alignments for {}".format(i, run_name))
Beispiel #8
0
def run(acc, splitNum,
        splitNo):  # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # compute window to iterate through
    MAX_ROW = run.getReadCount()
    chunk = MAX_ROW / splitNum
    first = int(round(chunk * (splitNo - 1)))
    next_first = int(round(chunk * (splitNo)))
    if next_first > MAX_ROW:
        next_first = MAX_ROW

    # start iterator on reads
    it = run.getReadRange(first + 1, next_first - first, Read.all)
    i = 0
    while it.nextRead():
        i += 1
        print(it.getReadId())
        # iterate through fragments
        while it.nextFragment():
            bases = it.getFragmentBases()
            if bases:
                print("\t" + bases + " - " +
                      ("aligned" if it.isAligned() else "unaligned"))
        print("\n")
    print("Read {} spots for {}".format(i, run_name))
Beispiel #9
0
def run(acc, splitNum, splitNo): # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()
    
    # compute window to iterate through
    MAX_ROW = run.getAlignmentCount()
    chunk = MAX_ROW / splitNum
    first = int(round(chunk * (splitNo-1)))
    next_first = int(round(chunk * (splitNo)))
    if next_first > MAX_ROW:
        next_first = MAX_ROW

    # start iterator on reads
    it = run.getAlignmentRange(first+1, next_first-first, Alignment.primaryAlignment)
    i = 0
    while it.nextAlignment():
        print (
            it.getReadId() + "\t" +
            it.getReferenceSpec() + "\t" +
            str(it.getAlignmentPosition()) + "\t" +
            it.getShortCigar(False) + "\t" +
            it.getFragmentBases()
        )
        i += 1
    print ("Read {} alignments for {}".format(i, run_name))
 def stream_reads(self, acc, event, splitNum=1, splitNo=1):
     '''
     This is a blocking task, it needs to be run in an executor
     '''
     # open requested accession using SRA implementation of the API
     print(f'Streaming {acc}', file=sys.stderr)
     pipe_path = self.get_pipe(acc)
     pipe = open(pipe_path, 'w')        
     event.clear()
     with NGS.openReadCollection(acc) as run:
         run_name = run.getName()
         # compute window to iterate through
         MAX_ROW = run.getReadCount()
         chunk = MAX_ROW/splitNum
         first = int(round(chunk*(splitNo-1)))
         next_first = int(round(chunk*(splitNo)))
         if next_first > MAX_ROW:
             next_first = MAX_ROW
         # start iterator on reads
         with run.getReadRange(first+1, next_first-first, Read.all) as it:
             i = 0
             while it.nextRead():
                 i += 1
                 if i > 20000: 
                     break
                 while it.nextFragment():
                     bases = it.getFragmentBases()
                     qualities=it.getFragmentQualities()
                     ids=it.getFragmentId()
                     if bases:
                         read = f'@{ids}\n{bases}\n+\n{qualities}'
                         print(read,file=pipe)
     os.unlink(pipe_path)
     print(f'Done streaming for {acc}')
     return None
Beispiel #11
0
def run(acc, splitNum, splitNo):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # compute window to iterate through
        MAX_ROW = run.getAlignmentCount()
        chunk = MAX_ROW / splitNum
        first = int(round(chunk * (splitNo - 1)))
        next_first = int(round(chunk * (splitNo)))
        if next_first > MAX_ROW:
            next_first = MAX_ROW

        # start iterator on reads
        with run.getAlignmentRange(first + 1, next_first - first,
                                   Alignment.primaryAlignment) as it:
            i = 0
            while it.nextAlignment():
                print("{}\t{}\t{}\t{}\t{}".format(
                    it.getReadId(),
                    it.getReferenceSpec(),
                    it.getAlignmentPosition(),
                    it.getShortCigar(False),
                    it.getFragmentBases(),
                ))
                i += 1
            print("Read {} alignments for {}".format(i, run_name))
Beispiel #12
0
def run(acc, refName, start, stop):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # get requested reference
        with run.getReference(refName) as ref:
            # start iterator on requested range
            with ref.getPileupSlice(start - 1, stop - start + 1) as it:
                i = 0
                while it.nextPileup():
                    qual = ""
                    base = ""

                    line = "{}\t{}\t{}\t{}".format(
                        it.getReferenceSpec(), it.getReferencePosition() + 1, it.getReferenceBase(), it.getPileupDepth()
                    )
                    while it.nextPileupEvent():
                        e = it.getEventType()
                        if (e & PileupEvent.alignment_start) != 0:
                            base = base + "^"
                            base = base + chr(it.getMappingQuality() + 33)

                        if (e & PileupEvent.insertion) != 0:
                            base = base + "+"
                            ibases = it.getInsertionBases()
                            c = len(ibases)
                            base = base + str(c)

                            if (e & PileupEvent.alignment_minus_strand) != 0:
                                base = base + ibases.lower()
                            else:
                                base = base + ibases

                        evt = e & 7

                        if (e & PileupEvent.alignment_minus_strand) != 0:
                            if evt == PileupEvent.deletion:
                                base = base + "<"
                            elif evt == PileupEvent.match:
                                base = base + ","
                            elif evt == PileupEvent.mismatch:
                                base = base + str(it.getAlignmentBase()).lower()
                        else:
                            if evt == PileupEvent.deletion:
                                base = base + ">"
                            elif evt == PileupEvent.match:
                                base = base + "."
                            elif evt == PileupEvent.mismatch:
                                base = base + str(it.getAlignmentBase()).upper()

                        if (e & PileupEvent.alignment_stop) != 0:
                            base = base + "$"

                        qual = qual + it.getAlignmentQuality()

                    i += 1
                    print("{}\t{}\t{}".format(line, base, qual))
                print("Read {} pileups for {}".format(i, run_name))
Beispiel #13
0
 def test_ReferenceWindow_Slice_Filtered_Category (self):
     it = NGS.openReadCollection(WithSecondary).getReference("gi|169794206|ref|NC_010410.1|").getAlignmentSlice(516000, 100000, Alignment.primaryAlignment) 
     self.assertTrue(it.nextAlignment())  
     self.assertEqual(WithSecondary + ".PA.33", it. getAlignmentId())
     self.assertTrue(it.nextAlignment())  
     self.assertEqual(WithSecondary + ".PA.34", it. getAlignmentId())
     self.assertTrue(it.nextAlignment())  
     self.assertEqual(WithSecondary + ".PA.35", it. getAlignmentId()) # no secondary
     self.assertFalse(it.nextAlignment())
Beispiel #14
0
def search_reads(request):
    """ Searches a genomic interval in the NCBI API and returns a list of converted GA4GH alignments

    Args:
        request: SearchReadsRequest. If `request.page_size` is set, up to this many records are returned.
                 If not set, `_DEFAULT_PAGE_SIZE` is used as the page size.
                 `request.start` can be overridden by providing a greater start position in `request.page_token`.
                 If provided, `request.page_token` is parsed to a long and compared with `request.start`.
                 In that case, the greater of the two is used as the zero-based inclusive interval start.

    Returns:
        Tuple:
            1) List of converted alignments in GA4GH schema
            2) Maximum zero-based exclusive alignment end position over all alignments returned.
               This value can be set as request.page_token (after parsing to a string) for a subsequent
               request; in that case, streaming will pick up where it left off after this request.

    """
    # We are assuming the read group IDs are singleton
    run_accession = request.read_group_ids[0]
    reference_name = request.reference_id
    # Choose the start position between request.start and request.page_token
    try:
        start = max(long(request.page_token), request.start)
    except ValueError:
        start = request.start
    end = request.end
    # Number of alignments to get
    if request.page_size < 1:
        num_aligns = _DEFAULT_PAGE_SIZE
    else:
        num_aligns = request.page_size

    alignments = []
    max_aligned_pos = 0  # Keep track of max zero-based exclusive alignment end
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(run_accession) as run:
        # get requested reference
        with run.getReference(reference_name) as reference:
            # start iterator on requested range
            # We need to find out if it returns overlapping reads, or just
            # those that fit within the slice.
            with reference.getAlignmentSlice(start, end - start + 1,
                                             Alignment.primaryAlignment) as it:
                i = 0
                while it.nextAlignment():
                    # Only get the requested number of alignments
                    if i == num_aligns:
                        break
                    max_aligned_pos = max(
                        max_aligned_pos,
                        it.getAlignmentPosition() + it.getAlignmentLength())
                    ga_alignment = _convert_alignment(it)
                    alignments.append(ga_alignment)
                    i += 1
    return (alignments, max_aligned_pos)
Beispiel #15
0
    def test_ReadGroup_getStatistics(self):
        gr = NGS.openReadCollection(WithGroups).getReadGroup("GS57510-FS3-L03")

        stats = gr.getStatistics()
    
        self.assertEqual(34164461870, stats.getAsU64("BASE_COUNT"))
        self.assertEqual(34164461870, stats.getAsU64("BIO_BASE_COUNT"))
        self.assertEqual(488063741,   stats.getAsU64("SPOT_COUNT"))
        self.assertEqual(5368875807,  stats.getAsU64("SPOT_MAX"))
        self.assertEqual(4880812067,  stats.getAsU64("SPOT_MIN"))
Beispiel #16
0
 def test_ReferenceWindow_Slice_Filtered_Start_Within_Slice (self):
     ref = NGS.openReadCollection(WithCircularRef).getReference("NC_012920.1")
     it = ref.getFilteredAlignmentSlice(0, ref.getLength(), Alignment.all, Alignment.startWithinSlice, 0)
 
     self.assertTrue(it.nextAlignment())
     lastAlignmentPosition = it.getAlignmentPosition()
     while it.nextAlignment():
         currentPosition = it.getAlignmentPosition()
         errorMsg = "Sorting violated. Last position (" + str(lastAlignmentPosition) + ") is higher than current one (" + str(currentPosition) + ")"
         self.assertTrue ( lastAlignmentPosition <= currentPosition, errorMsg )
         lastAlignmentPosition = currentPosition
Beispiel #17
0
 def test_Alignment_isPaired_MultiFragmentsPerSpot(self):
     readCollection = NGS.openReadCollection(PrimaryOnly)
     alignment = readCollection.getAlignment(PrimaryOnly + ".PA.1")
     self.assertTrue(alignment.isPaired())
     
     alignment = readCollection.getAlignment(PrimaryOnly + ".PA.2")
     self.assertTrue(alignment.isPaired())
     
     # has unaligned mate
     alignment = readCollection.getAlignment (PrimaryOnly + ".PA.6")
     self.assertTrue(alignment.isPaired())
Beispiel #18
0
def run(acc, refName=None):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        if refName:
            with run.getReference(refName) as ref:
                process(ref)
        else:
            with run.getReferences() as refs:
                while refs.nextReference():
                    process(refs)
                    print("")
Beispiel #19
0
def run(acc, refName=None):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        if refName:
            with run.getReference(refName) as ref:
                process(ref)
        else:
            with run.getReferences() as refs:
                while refs.nextReference():
                    process(refs)
                    print("")
Beispiel #20
0
 def start(self):
     """Open the read collection.
     """
     self.read_collection = NGS.openReadCollection(self.accn)
     self.run_name = self.read_collection.getName()
     self.read_count = self.read_collection.getReadCount()
     # grab the first read use it to determine whether the dataset
     # is single- or paired-end
     with self.read_collection.getReadRange(1, 1, Read.all) as read:
         read.nextRead()
         self.frag_count = len(sra_reads(read))
Beispiel #21
0
def run(acc):  # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # get requested reference
    it = run.getReferences()

    i = 0
    while it.nextReference():
        print(it.getCommonName() + "\t" + it.getCanonicalName() + "\t" +
              str(it.getLength()) + "\t" +
              ("circular" if it.getIsCircular() else "linear"))
    print("Read {} references for {}".format(i, run_name))
Beispiel #22
0
def main():
    parser = argparse.ArgumentParser(description='produce FastQ using NGS')

    parser.add_argument('accession',
                        default=None,
                        type=str,
                        help='accession to process')
    parser.add_argument('-s',
                        '--start',
                        default=1,
                        type=int,
                        help='first row to use')
    parser.add_argument('-n',
                        '--count',
                        default=10,
                        type=int,
                        help='number of rows to use')
    parser.add_argument('-p',
                        '--split',
                        default=False,
                        action='store_true',
                        help='split the READS')
    parser.add_argument('-r',
                        '--random',
                        default=False,
                        action='store_true',
                        help='get n random rows')

    args = parser.parse_args()
    if args.accession == None:
        print("accession missing!")
    else:
        try:
            with NGS.openReadCollection(args.accession) as run:
                if args.random:
                    if args.split:
                        src = random_fastq_split(run, args.count)
                    else:
                        src = random_fastq(run, args.count)
                else:
                    if args.split:
                        src = fastq_split(run, args.start, args.count)
                    else:
                        src = fastq(run, args.start, args.count)
                for read in src:
                    printfastq(*read)
        except ErrorMsg as e:
            print("error: {}".format(e))
Beispiel #23
0
def run(acc):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # get requested reference
        with run.getReferences() as it:
            i = 0
            while it.nextReference():
                print ("{}\t{}\t{}\t{}".format(it.getCommonName(),
                    it.getCanonicalName(),
                    it.getLength(),
                    "circular" if it.getIsCircular() else "linear",
                ))

            print ("Read {} references for {}".format(i, run_name))
Beispiel #24
0
def run(acc):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # get requested reference
        with run.getReferences() as it:
            i = 0
            while it.nextReference():
                print("{}\t{}\t{}\t{}".format(
                    it.getCommonName(),
                    it.getCanonicalName(),
                    it.getLength(),
                    "circular" if it.getIsCircular() else "linear",
                ))

            print("Read {} references for {}".format(i, run_name))
Beispiel #25
0
def run(acc): # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # get requested reference
    it = run.getReferences()

    i = 0
    while it.nextReference():
        print (
            it.getCommonName() + "\t" +
            it.getCanonicalName() + "\t" +
            str(it.getLength()) + "\t" +
            ("circular" if it.getIsCircular() else "linear")
        )
    print ("Read {} references for {}".format(i, run_name))
Beispiel #26
0
    def test_ReferenceWindow(self):
        it = NGS.openReadCollection(WithSecondary).getReference("gi|169794206|ref|NC_010410.1|").getAlignments(Alignment.all) 
        self.assertTrue(it.nextAlignment())  
    
        # the first 2 secondary alignments' locations on the list: #34, #61
        count = 1;
        while it.nextAlignment():
            if it.getAlignmentCategory() == Alignment.secondaryAlignment:
                break
            count += 1

        self.assertEqual(34, count)    
        while it.nextAlignment():
            if it.getAlignmentCategory() == Alignment.secondaryAlignment:
                break
            count += 1

        self.assertEqual(61, count)
Beispiel #27
0
def run(acc, refName, start, stop):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()
    
        # get requested reference
        with run.getReference(refName) as ref:
            # start iterator on requested range
            with ref.getAlignmentSlice(start, stop-start+1, Alignment.primaryAlignment) as it:
                i = 0
                while it.nextAlignment():
                    print ("{}\t{}\t{}\t{}\t{}".format(
                        it.getReadId(),
                        it.getReferenceSpec(),
                        it.getAlignmentPosition(),
                        it.getLongCigar(False),
                        it.getAlignedFragmentBases(),
                        ))
                    i += 1
                print ("Read {} alignments for {}".format(i, run_name))
Beispiel #28
0
def run(acc, refName, start, stop):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()
    
        # get requested reference
        with run.getReference(refName) as ref:
            # start iterator on requested range
            with ref.getAlignmentSlice(start, stop-start+1, Alignment.primaryAlignment) as it:
                i = 0
                while it.nextAlignment():
                    print ("{}\t{}\t{}\t{}\t{}".format(
                        it.getReadId(),
                        it.getReferenceSpec(),
                        it.getAlignmentPosition(),
                        it.getLongCigar(False),
                        it.getAlignedFragmentBases(),
                        ))
                    i += 1
                print ("Read {} alignments for {}".format(i, run_name))
Beispiel #29
0
def run(acc, refName, start, stop): # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()
    
    # get requested reference
    ref = run.getReference(refName)

    # start iterator on requested range
    it = ref.getAlignmentSlice(start, stop-start+1, Alignment.primaryAlignment)
    i = 0
    while it.nextAlignment():
        print ("%s\t%s\t%d\t%s\t%s" % (
            it.getReadId(),
            it.getReferenceSpec(),
            it.getAlignmentPosition(),
            it.getLongCigar(False),
            it.getAlignedFragmentBases(),
            ))
        i += 1
    print ("Read %d alignments for %s" % (i, run_name))
Beispiel #30
0
def sra_reader(accn, batcher):
    """Iterates through a read collection for a given accession number using
    the ngs-lib python bindings.
    
    Args:
        accn: The accession number
        batch_size: The maximum number of reads to request in each call to SRA
        max_reads: The total number of reads to process, or all reads in the
            SRA run if None
    
    Yields:
        Each pair of reads (see ``sra_read_pair``)
    """
    with NGS.openReadCollection(accn) as run:
        run_name = run.getName()
        read_count = run.getReadCount()
        for batch, start, size in batcher(read_count):
            with run.getReadRange(start + 1, size, Read.all) as read:
                for read_idx in range(size):
                    read.nextRead()
                    yield sra_read(read)
Beispiel #31
0
def run(acc, refName, start,
        stop):  # this function doesn't release NGS objects however it might
    # open requested accession using SRA implementation of the API
    run = NGS.openReadCollection(acc)
    run_name = run.getName()

    # get requested reference
    ref = run.getReference(refName)

    # start iterator on requested range
    it = ref.getAlignmentSlice(start, stop - start + 1,
                               Alignment.primaryAlignment)
    i = 0
    while it.nextAlignment():
        print("%s\t%s\t%d\t%s\t%s" % (
            it.getReadId(),
            it.getReferenceSpec(),
            it.getAlignmentPosition(),
            it.getLongCigar(False),
            it.getAlignedFragmentBases(),
        ))
        i += 1
    print("Read %d alignments for %s" % (i, run_name))
Beispiel #32
0
 def test_ReadCollection_getName(self):
     self.assertEqual(PrimaryOnly, NGS.openReadCollection(PrimaryOnly).getName())
Beispiel #33
0
 def test_ReadGroupIterator_Next(self):
     it = NGS.openReadCollection(PrimaryOnly).getReadGroups();
     self.assertTrue(it.nextReadGroup());
     name = it.getName();
Beispiel #34
0
 def test_ReadCollection_getReadGroups(self):
     it = NGS.openReadCollection(PrimaryOnly).getReadGroups()
Beispiel #35
0
 def test_ReadCollection_getReadGroup(self):
     gr = NGS.openReadCollection(PrimaryOnly).getReadGroup("C1ELY.6")
Beispiel #36
0
 def test_ReadCollection_getReferences(self):
     it = NGS.openReadCollection(PrimaryOnly).getReferences()
Beispiel #37
0
def run(acc, refName, start, stop):
    # open requested accession using SRA implementation of the API
    with NGS.openReadCollection(acc) as run:
        run_name = run.getName()

        # get requested reference
        with run.getReference(refName) as ref:
            # start iterator on requested range
            with ref.getPileupSlice(start - 1, stop - start + 1) as it:
                i = 0
                while it.nextPileup():
                    qual = ""
                    base = ""

                    line = "{}\t{}\t{}\t{}".format(
                        it.getReferenceSpec(),
                        it.getReferencePosition() + 1,
                        it.getReferenceBase(),
                        it.getPileupDepth(),
                    )
                    while it.nextPileupEvent():
                        e = it.getEventType()
                        if (e & PileupEvent.alignment_start) != 0:
                            base = base + '^'
                            base = base + chr(it.getMappingQuality() + 33)

                        if (e & PileupEvent.insertion) != 0:
                            base = base + '+'
                            ibases = it.getInsertionBases()
                            c = len(ibases)
                            base = base + str(c)

                            if (e & PileupEvent.alignment_minus_strand) != 0:
                                base = base + ibases.lower()
                            else:
                                base = base + ibases

                        evt = e & 7

                        if (e & PileupEvent.alignment_minus_strand) != 0:
                            if evt == PileupEvent.deletion:
                                base = base + '<'
                            elif evt == PileupEvent.match:
                                base = base + ','
                            elif evt == PileupEvent.mismatch:
                                base = base + str(
                                    it.getAlignmentBase()).lower()
                        else:
                            if evt == PileupEvent.deletion:
                                base = base + '>'
                            elif evt == PileupEvent.match:
                                base = base + '.'
                            elif evt == PileupEvent.mismatch:
                                base = base + str(
                                    it.getAlignmentBase()).upper()

                        if (e & PileupEvent.alignment_stop) != 0:
                            base = base + '$'

                        qual = qual + it.getAlignmentQuality()

                    i += 1
                    print("{}\t{}\t{}".format(line, base, qual))
                print("Read {} pileups for {}".format(i, run_name))
Beispiel #38
0
 def test_ReadCollection_getAlignmentCount_WithSecondary_Secondary(self):
     self.assertEqual(10, NGS.openReadCollection(WithSecondary).getAlignmentCount(Alignment.secondaryAlignment))
Beispiel #39
0
 def test_ReadCollection_getAlignmentCount_WithSecondary_All(self):
     self.assertEqual(178, NGS.openReadCollection(WithSecondary).getAlignmentCount(Alignment.all))
Beispiel #40
0
 def test_ReadCollection_getAlignmentCount_PrimaryOnly_Secondary(self):
     self.assertEqual(0, NGS.openReadCollection(PrimaryOnly).getAlignmentCount(Alignment.secondaryAlignment))
Beispiel #41
0
 def test_ReadCollection_getAlignmentCount_PrimaryOnly_All(self):
     self.assertEqual(3987701, NGS.openReadCollection(PrimaryOnly).getAlignmentCount(Alignment.all))
Beispiel #42
0
 def test_ReadCollection_getAlignments_all(self):
     alIt = NGS.openReadCollection(PrimaryOnly).getAlignments(Alignment.all)
Beispiel #43
0
 def test_ReadCollection_getAlignments_Secondary(self):
     alIt = NGS.openReadCollection(PrimaryOnly).getAlignments(Alignment.secondaryAlignment)
Beispiel #44
0
 def test_ReadCollection_getAlignment(self):
     al = NGS.openReadCollection(PrimaryOnly).getAlignment(PrimaryOnly + ".PA.1")
Beispiel #45
0
 def test_ReadCollection_hasReference(self):
     assert ( NGS.openReadCollection(PrimaryOnly).hasReference("supercont2.1") )
     assert ( not NGS.openReadCollection(PrimaryOnly).hasReference("non-existent acc") )
Beispiel #46
0
 def test_ReadCollection_getReference(self):
     ref = NGS.openReadCollection(PrimaryOnly).getReference("supercont2.1")