Beispiel #1
0
    def get_data( self, chrom, start, end, **kwargs ):
        start, end = int(start), int(end)
        chrom = str(chrom)
        source = open( self.original_dataset.file_name )
        index = Indexes( self.converted_dataset.file_name )
        results = []
        
        for start, end, offset in index.find(chrom, start, end):
            source.seek(offset)
            feature = source.readline().split()
            payload = { 'uid': offset, 'start': start, 'end': end, 'name': feature[3] }
            try:
                payload['strand'] = feature[5]
            except IndexError:
                pass
            
            if 'include_blocks' in kwargs:
                try:
                    block_sizes = [ int(n) for n in feature[10].split(',') if n != '']
                    block_starts = [ int(n) for n in feature[11].split(',') if n != '' ]
                    blocks = zip(block_sizes, block_starts)
                    payload['blocks'] = [ (start + block[1], start + block[1] + block[0]) for block in blocks]
                except IndexError:
                    pass
    
                try:
                    payload['thick_start'] = int(feature[6])
                    payload['thick_end'] = int(feature[7])
                except IndexError:
                    pass

            results.append(payload)
        
        return results
def main():
    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    in_file, out_file = args

    # Do conversion.
    index = Indexes()
    reader = galaxy_utils.sequence.vcf.Reader( open( in_file ) )
    offset = reader.metadata_len
    for vcf_line in reader:
        # VCF format provides a chrom and 1-based position for each variant.
        # IntervalIndex expects 0-based coordinates.
        index.add( vcf_line.chrom, vcf_line.pos - 1, vcf_line.pos, offset )
        offset += len( vcf_line.raw_line )

    index.write( open( out_file, "w" ) )
Beispiel #3
0
    def get_data(self, chrom, start, end, **kwargs):
        start, end = int(start), int(end)
        chrom = str(chrom)
        source = open(self.original_dataset.file_name)
        index = Indexes(self.converted_dataset.file_name)
        results = []

        for start, end, offset in index.find(chrom, start, end):
            source.seek(offset)
            feature = source.readline().split()
            payload = {
                'uid': offset,
                'start': start,
                'end': end,
                'name': feature[3]
            }
            try:
                payload['strand'] = feature[5]
            except IndexError:
                pass

            if 'include_blocks' in kwargs:
                try:
                    block_sizes = [
                        int(n) for n in feature[10].split(',') if n != ''
                    ]
                    block_starts = [
                        int(n) for n in feature[11].split(',') if n != ''
                    ]
                    blocks = zip(block_sizes, block_starts)
                    payload['blocks'] = [(start + block[1],
                                          start + block[1] + block[0])
                                         for block in blocks]
                except IndexError:
                    pass

                try:
                    payload['thick_start'] = int(feature[6])
                    payload['thick_end'] = int(feature[7])
                except IndexError:
                    pass

            results.append(payload)

        return results
def main():

    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    # Do conversion.
    index = Indexes()
    offset = 0
    for line in open( input_fname, "r" ):
        chrom, start = line.split()[ 0:2 ]
        # Pileup format is 1-based.
        start = int( start ) - 1
        index.add( chrom, start, start + 1, offset )
        offset += len( line )

    index.write( open(output_fname, "w") )
def main():
    # Arguments
    input_fname, out_fname = sys.argv[1:]

    # Do conversion.
    index = Indexes()
    offset = 0
    reader_wrapper = GFFReaderWrapper( fileinput.FileInput( input_fname ), fix_strand=True )
    for feature in list( reader_wrapper ):
        # Add feature; index expects BED coordinates.
        if isinstance( feature, GenomicInterval ):
            convert_gff_coords_to_bed( feature )
            index.add( feature.chrom, feature.start, feature.end, offset )

        # Always increment offset, even if feature is not an interval and hence
        # not included in the index.
        offset += feature.raw_size

    index.write( open(out_fname, "w") )
def main():
    
    input_fname = sys.argv[1]
    out_fname = sys.argv[2]
    index = Indexes()
    offset = 0
    
    for line in open(input_fname, "r"):
        feature = line.strip().split()
        if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
            offset += len(line)
            continue
        chrom = feature[0]
        chrom_start = int(feature[1])
        chrom_end = int(feature[2])
        index.add( chrom, chrom_start, chrom_end, offset )
        offset += len(line)
    
    index.write( open(out_fname, "w") )
def main():

    # Read options, args.
    parser = optparse.OptionParser()
    parser.add_option('-c', '--chr-col', type='int', dest='chrom_col', default=1)
    parser.add_option('-s', '--start-col', type='int', dest='start_col', default=2)
    parser.add_option('-e', '--end-col', type='int', dest='end_col', default=3)
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    # Make column indices 0-based.
    options.chrom_col -= 1
    options.start_col -= 1
    options.end_col -= 1

    # Do conversion.
    index = Indexes()
    offset = 0
    for line in open(input_fname, "r"):
        feature = line.strip().split()
        if not feature or feature[0].startswith("track") or feature[0].startswith("#"):
            offset += len(line)
            continue
        chrom = feature[options.chrom_col]
        chrom_start = int(feature[options.start_col])
        chrom_end = int(feature[options.end_col])
        index.add(chrom, chrom_start, chrom_end, offset)
        offset += len(line)

    index.write(open(output_fname, "w"))
def main():
    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    in_file, out_file = args

    # Do conversion.
    index = Indexes()
    reader = galaxy_utils.sequence.vcf.Reader(open(in_file))
    offset = reader.metadata_len
    for vcf_line in reader:
        # VCF format provides a chrom and 1-based position for each variant.
        # IntervalIndex expects 0-based coordinates.
        index.add(vcf_line.chrom, vcf_line.pos - 1, vcf_line.pos, offset)
        offset += len(vcf_line.raw_line)

    index.write(open(out_file, "w"))
Beispiel #9
0
def main():

    # Read options, args.
    parser = optparse.OptionParser()
    (options, args) = parser.parse_args()
    input_fname, output_fname = args

    # Do conversion.
    index = Indexes()
    offset = 0
    for line in open(input_fname, "r"):
        chrom, start = line.split()[0:2]
        # Pileup format is 1-based.
        start = int(start) - 1
        index.add(chrom, start, start + 1, offset)
        offset += len(line)

    index.write(open(output_fname, "w"))
Beispiel #10
0
def main():
    # Arguments
    input_fname, out_fname = sys.argv[1:]

    # Do conversion.
    index = Indexes()
    offset = 0
    reader_wrapper = GFFReaderWrapper(fileinput.FileInput(input_fname),
                                      fix_strand=True)
    for feature in list(reader_wrapper):
        # Add feature; index expects BED coordinates.
        if isinstance(feature, GenomicInterval):
            convert_gff_coords_to_bed(feature)
            index.add(feature.chrom, feature.start, feature.end, offset)

        # Always increment offset, even if feature is not an interval and hence
        # not included in the index.
        offset += feature.raw_size

    index.write(open(out_fname, "wb"))
def main():

    input_fname = sys.argv[1]
    out_fname = sys.argv[2]
    index = Indexes()
    offset = 0

    for line in open(input_fname, "r"):
        feature = line.strip().split()
        if not feature or feature[0].startswith(
                "track") or feature[0].startswith("#"):
            offset += len(line)
            continue
        chrom = feature[0]
        chrom_start = int(feature[1])
        chrom_end = int(feature[2])
        index.add(chrom, chrom_start, chrom_end, offset)
        offset += len(line)

    index.write(open(out_fname, "w"))