Beispiel #1
0
    def build_index(self, filename, indexfile):
        """
        Recipe from Brad Chapman's blog
        <http://bcbio.wordpress.com/2009/07/26/sorting-genomic-alignments-using-python/>
        """
        indexes = interval_index_file.Indexes()
        in_handle = open(filename)

        reader = maf.Reader(in_handle)
        while True:
            pos = reader.file.tell()
            rec = next(reader)
            if rec is None:
                break
            for c in rec.components:
                indexes.add(
                    c.src,
                    c.forward_strand_start,
                    c.forward_strand_end,
                    pos,
                    max=c.src_size,
                )

        index_handle = open(indexfile, "w")
        indexes.write(index_handle)
        index_handle.close()
def main():

    # Parse command line

    options, args = doc_optparse.parse( __doc__ )

    try:
        maf_file = args[0]
        # If it appears to be a bz2 file, attempt to open with table
        if maf_file.endswith( ".bz2" ):
            table_file = maf_file + "t"
            if not os.path.exists( table_file ):
                doc_optparse.exit( "To index bz2 compressed files first "
                                   "create a bz2t file with bzip-table." )
            # Open with SeekableBzip2File so we have tell support
            maf_in = SeekableBzip2File( maf_file, table_file )
            # Strip .bz2 from the filename before adding ".index"
            maf_file = maf_file[:-4]
        elif maf_file.endswith( ".lzo" ):
            from bx.misc.seeklzop import SeekableLzopFile
            table_file = maf_file + "t"
            if not os.path.exists( table_file ):
                doc_optparse.exit( "To index lzo compressed files first "
                                   "create a lzot file with lzop_build_offset_table." )
            # Open with SeekableBzip2File so we have tell support
            maf_in = SeekableLzopFile( maf_file, table_file )
            # Strip .lzo from the filename before adding ".index"
            maf_file = maf_file[:-4]
        else:
            maf_in = open( maf_file )
        # Determine the name of the index file
        if len( args ) > 1: 
            index_file = args[1]
        else: 
            index_file = maf_file + ".index" 
        if options.species:
            species = options.species.split( "," )
        else:
            species = None
    except:
        doc_optparse.exception()

    maf_reader = bx.align.maf.Reader( maf_in )

    indexes = interval_index_file.Indexes()

    # Need to be a bit tricky in our iteration here to get the 'tells' right
    while 1:
        pos = maf_reader.file.tell()
        block = maf_reader.next()
        if block is None: break
        for c in block.components:
            if species is not None and c.src.split('.')[0] not in species:
                continue
            indexes.add( c.src, c.forward_strand_start, c.forward_strand_end, pos, max=c.src_size )

    out = open( index_file, 'w' )
    indexes.write( out )
    out.close()
Beispiel #3
0
 def __init__( self, axt_filename, index_filename=None, keep_open=False, species1 = None, species2=None, species_to_lengths=None, support_ids=False ):
     if index_filename is None: index_filename = axt_filename + ".index"
     self.indexes = interval_index_file.Indexes( filename=index_filename )
     self.axt_filename = axt_filename
     # nota bene: (self.species1 = species1 or "species1") is incorrect if species1=""
     self.species1 = species1
     if (self.species1 == None): self.species1 = "species1"
     self.species2 = species2
     if (self.species2 == None): self.species2 = "species2"
     self.species_to_lengths = species_to_lengths
     self.support_ids        = support_ids            # for extra text at end of axt header lines
     if keep_open:
         self.f = open( axt_filename )
     else:
         self.f = None
Beispiel #4
0
def index(gff_file, index_file=None):
    index = interval_index_file.Indexes()
    with open(gff_file) as in_handle:
        while 1:
            pos = in_handle.tell()
            line = in_handle.readline()
            if not line:
                break
            if not line.startswith("#"):
                parts = line.split("\t")
                (seqid, gtype, source, start, end) = parts[:5]
                index.add(seqid, int(start), int(end), pos)
    if index_file is None:
        index_file = gff_file + ".index"
    with open(index_file, "w") as index_handle:
        index.write(index_handle)
    return index_file
Beispiel #5
0
def build_index(in_file, index_file):
    indexes = interval_index_file.Indexes()
    with open(in_file) as in_handle:
        reader = maf.Reader(in_handle)
        while 1:
            pos = reader.file.tell()
            rec = reader.next()
            if rec is None:
                break
            for c in rec.components:
                indexes.add(c.src,
                            c.forward_strand_start,
                            c.forward_strand_end,
                            pos,
                            max=c.src_size)

    with open(index_file, "w") as index_handle:
        indexes.write(index_handle)
def main():

    # Parse command line

    options, args = doc_optparse.parse(__doc__)
    if options.version: return

    try:
        wiggle_file = args[0]
        # If it appears to be a bz2 file, attempt to open with table
        if wiggle_file.endswith(".bz2"):
            table_file = wiggle_file + "t"
            if not os.path.exists(table_file):
                doc_optparse.exit("To index bz2 compressed files first "
                                  "create a bz2t file with bzip-table.")
            # Open with SeekableBzip2File so we have tell support
            wiggle_in = SeekableBzip2File(wiggle_file, table_file)
            # Strip .bz2 from the filename before adding ".index"
            wiggle_file = wiggle_file[:-4]
        elif wiggle_file.endswith(".lzo"):
            from bx.misc.seeklzop import SeekableLzopFile
            table_file = wiggle_file + "t"
            if not os.path.exists(table_file):
                doc_optparse.exit("To index lzo compressed files first "
                                  "create a lzot file with bzip-table.")
            # Open with SeekableBzip2File so we have tell support
            wiggle_in = SeekableLzopFile(wiggle_file, table_file)
            # Strip .lzo from the filename before adding ".index"
            wiggle_file = wiggle_file[:-4]
        else:
            wiggle_in = open(wiggle_file)
        # Determine the name of the index file
        if len(args) > 1:
            index_file = args[1]
        else:
            index_file = wiggle_file + ".index"
    except:
        doc_optparse.exception()

    indexes = interval_index_file.Indexes()

    # Can't use the iterator, as there is no next() and thus
    # no way to access the positions. The following code is
    # modified from wiggle.py
    last_chrom = None
    start = None
    end = None
    first_pos = None

    # always for wiggle data
    strand = '+'

    mode = "bed"

    while 1:
        pos = wiggle_in.tell()
        line = wiggle_in.readline()
        if not line: break

        if line.isspace() or line.startswith("track") or line.startswith(
                "#") or line.startswith("browser"):
            continue
        elif line.startswith("bed"):
            indexes.add(fields[0], int(fields[1]), int(fields[2]), pos)
        elif line.startswith("variableStep") or line.startswith("fixedStep"):
            if first_pos != None:
                indexes.add(last_chrom, start, end, first_pos)
            first_pos = pos
            header = bx.wiggle.parse_header(line)
            last_chrom = header['chrom']
            start = int(header['start']) - 1
            end = start
            current_step = None
            if 'span' in header:
                current_span = int(header['span'])
            else:
                current_span = 1
            if 'step' in header:
                current_step = int(header['step'])

            if line.startswith("variableStep"):
                mode = "variableStep"
            else:
                mode = "fixedStep"
        elif mode == "variableStep":
            fields = line.split()
            end = int(fields[0]) - 1 + current_span
        elif mode == "fixedStep":
            end += current_step
        else:
            raise "Unexpected input line: %s" % line.strip()

    out = open(index_file, 'w')
    indexes.write(out)
    out.close()