def main():

    mincols = 1
    upstream_pad = 0
    downstream_pad = 0

    options, args = cookbook.doc_optparse.parse( __doc__ )
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )      
        if options.mincols: mincols = int( options.mincols )
        pieces = bool( options.pieces )
        in_fname, in2_fname, out_fname = args
    except:
        cookbook.doc_optparse.exception()

    g1 = GenomicIntervalReader( fileinput.FileInput( in_fname ),
                                chrom_col=chr_col_1,
                                start_col=start_col_1,
                                end_col=end_col_1,
                                strand_col=strand_col_1)
    g2 = GenomicIntervalReader( fileinput.FileInput( in2_fname ),
                                chrom_col=chr_col_2,
                                start_col=start_col_2,
                                end_col=end_col_2,
                                strand_col=strand_col_2)

    out_file = open( out_fname, "w" )

    for line in intersect([g1,g2], pieces=pieces, mincols=mincols):
        if type( line ) is GenomicInterval:
            print >> out_file, "\t".join( line.fields )
        else:
            print >> out_file, line
Esempio n. 2
0
def main():
    mincols = 1
    upstream_pad = 0
    downstream_pad = 0

    options, args = doc_optparse.parse(__doc__)
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(
            options.cols1)
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg(
            options.cols2)
        if options.mincols: mincols = int(options.mincols)
        pieces = bool(options.pieces)
        in_fname, in2_fname, out_fname = args
    except:
        doc_optparse.exception()

    g1 = NiceReaderWrapper(fileinput.FileInput(in_fname),
                           chrom_col=chr_col_1,
                           start_col=start_col_1,
                           end_col=end_col_1,
                           strand_col=strand_col_1,
                           fix_strand=True)
    g2 = NiceReaderWrapper(fileinput.FileInput(in2_fname),
                           chrom_col=chr_col_2,
                           start_col=start_col_2,
                           end_col=end_col_2,
                           strand_col=strand_col_2,
                           fix_strand=True)

    out_file = open(out_fname, "w")

    try:
        for line in intersect([g1, g2], pieces=pieces, mincols=mincols):
            if type(line) == GenomicInterval:
                out_file.write("%s\n" % "\t".join(line.fields))
            else:
                out_file.write("%s\n" % line)
    except ParseError, e:
        out_file.close()
        fail("Invalid file format: %s" % str(e))
Esempio n. 3
0
def main():

    mincols = 1
    upstream_pad = 0
    downstream_pad = 0

    options, args = cookbook.doc_optparse.parse(__doc__)
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg(
            options.cols1)
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg(
            options.cols2)
        if options.mincols: mincols = int(options.mincols)
        pieces = bool(options.pieces)
        in_fname, in2_fname, out_fname = args
    except:
        cookbook.doc_optparse.exception()

    g1 = GenomicIntervalReader(fileinput.FileInput(in_fname),
                               chrom_col=chr_col_1,
                               start_col=start_col_1,
                               end_col=end_col_1,
                               strand_col=strand_col_1)
    g2 = GenomicIntervalReader(fileinput.FileInput(in2_fname),
                               chrom_col=chr_col_2,
                               start_col=start_col_2,
                               end_col=end_col_2,
                               strand_col=strand_col_2)

    out_file = open(out_fname, "w")

    for line in intersect([g1, g2], pieces=pieces, mincols=mincols):
        if type(line) is GenomicInterval:
            print >> out_file, "\t".join(line.fields)
        else:
            print >> out_file, line
Esempio n. 4
0
def main():
    mincols = 1
    upstream_pad = 0
    downstream_pad = 0

    options, args = doc_optparse.parse( __doc__ )
    try:
        chr_col_1, start_col_1, end_col_1, strand_col_1 = parse_cols_arg( options.cols1 )
        chr_col_2, start_col_2, end_col_2, strand_col_2 = parse_cols_arg( options.cols2 )      
        if options.mincols: mincols = int( options.mincols )
        pieces = bool( options.pieces )
        in1_gff_format = bool( options.gff1 )
        in2_gff_format = bool( options.gff2 )
        in_fname, in2_fname, out_fname = args
    except:
        doc_optparse.exception()
        
    # Set readers to handle either GFF or default format.
    if in1_gff_format:
        in1_reader_wrapper = GFFReaderWrapper
    else:
        in1_reader_wrapper = NiceReaderWrapper
    if in2_gff_format:
        in2_reader_wrapper = GFFReaderWrapper
    else:
        in2_reader_wrapper = NiceReaderWrapper
        
    g1 = in1_reader_wrapper( fileinput.FileInput( in_fname ),
                            chrom_col=chr_col_1,
                            start_col=start_col_1,
                            end_col=end_col_1,
                            strand_col=strand_col_1,
                            fix_strand=True )
    if in1_gff_format:
        # Intersect requires coordinates in BED format.
        g1.convert_to_bed_coord=True
    g2 = in2_reader_wrapper( fileinput.FileInput( in2_fname ),
                            chrom_col=chr_col_2,
                            start_col=start_col_2,
                            end_col=end_col_2,
                            strand_col=strand_col_2,
                            fix_strand=True )
    if in2_gff_format:
        # Intersect requires coordinates in BED format.
        g2.convert_to_bed_coord=True
        
    out_file = open( out_fname, "w" )
    try:
        for feature in intersect( [g1,g2], pieces=pieces, mincols=mincols ):
            if isinstance( feature, GFFFeature ):
                # Convert back to GFF coordinates since reader converted automatically.
                convert_bed_coords_to_gff( feature )
                for interval in feature.intervals:
                    out_file.write( "%s\n" % "\t".join( interval.fields ) )
            elif isinstance( feature, GenomicInterval ):
                out_file.write( "%s\n" % "\t".join( feature.fields ) )
            else:
                out_file.write( "%s\n" % feature )
    except ParseError, e:
        out_file.close()
        fail( "Invalid file format: %s" % str( e ) )