def main():
    region_fname = sys.argv[1]
    mask_fname = sys.argv[2]       
    nsamples = int( sys.argv[3] )
    intervals1_fname = sys.argv[4]       
    intervals2_fnames = sys.argv[5:]       
    nfeatures = len( intervals2_fnames )
    total_actual = zeros( nfeatures )
    # total_lengths1 = 0
    total_lengths2 = zeros( nfeatures )
    total_samples = zeros( ( nsamples, nfeatures ) )
    for line in open( region_fname ):
        # Load lengths for all intervals overlapping region
        fields = line.split()
        print >>sys.stderr, "Processing region:", fields[3]
        r_chr, r_start, r_stop = fields[0], int( fields[1] ), int( fields[2] )
        r_length = r_stop - r_start
        # Load the mask
        mask = overlapping_in_bed( mask_fname, r_chr, r_start, r_stop )
        bits_mask = as_bits( r_start, r_length, mask )
        bits_not_masked = bit_clone( bits_mask ); bits_not_masked.invert()
        # Load the first set
        intervals1 = overlapping_in_bed( intervals1_fname, r_chr, r_start, r_stop )
        bits1 = as_bits( r_start, r_length, intervals1 )
        # Intersect it with the mask 
        bits1.iand( bits_not_masked )
        # Sanity checks
        assert count_overlap( bits1, bits_mask ) == 0
        # For each data set
        for featnum, intervals2_fname in enumerate( intervals2_fnames ):
            print >>sys.stderr, intervals2_fname
            intervals2 = overlapping_in_bed( intervals2_fname, r_chr, r_start, r_stop )
            bits2 = as_bits( r_start, r_length, intervals2 )
            bits2.iand( bits_not_masked )
            assert count_overlap( bits2, bits_mask ) == 0
            # Observed values
            actual_overlap = count_overlap( bits1, bits2 )
            total_actual[featnum] += actual_overlap
            # Sample 
            lengths2 = list( interval_lengths( bits2 ) )
            total_lengths2[ featnum ] += sum( lengths2 )
            for i in range( nsamples ):
                # Build randomly covered bitmask for second set
                random2 = throw_random( lengths2, bits_mask )
                # Find intersection
                random2 &= bits1
                # Print amount intersecting
                total_samples[ i, featnum ] += random2.count_range( 0, random2.size )
                print >>sys.stderr, total_samples[ i, featnum ]
    fraction_overlap = total_samples / total_lengths2
    print "\t".join( intervals2_fnames )
    print "\t".join( map( str, total_actual/total_lengths2 ) )
    for row in fraction_overlap:
        print "\t".join( map( str, row ) )
    #print "total covered by first: %d, second: %d, overlap: %d" % ( total_lengths1, total_lengths2, total_actual )
    print "observed overlap: %d, sample mean: %d, sample stdev: %d" % ( total_actual, stats.amean( total_samples ), stats.asamplestdev( total_samples ) )
    print "z-score:", ( total_actual - stats.amean( total_samples ) ) / stats.asamplestdev( total_samples )
    print "percentile:", sum( total_actual > total_samples ) / nsamples
def main():
    region_fname = sys.argv[1]
    mask_fname = sys.argv[2]       
    nsamples = int( sys.argv[3] )
    intervals1_fname = sys.argv[4]       
    intervals2_fnames = sys.argv[5:]       
    nfeatures = len( intervals2_fnames )
    total_actual = zeros( nfeatures )
    # total_lengths1 = 0
    total_lengths2 = zeros( nfeatures )
    total_samples = zeros( ( nsamples, nfeatures ) )
    for line in open( region_fname ):
        # Load lengths for all intervals overlapping region
        fields = line.split()
        print("Processing region:", fields[3], file=sys.stderr)
        r_chr, r_start, r_stop = fields[0], int( fields[1] ), int( fields[2] )
        r_length = r_stop - r_start
        # Load the mask
        mask = overlapping_in_bed( mask_fname, r_chr, r_start, r_stop )
        bits_mask = as_bits( r_start, r_length, mask )
        bits_not_masked = bit_clone( bits_mask ); bits_not_masked.invert()
        # Load the first set
        intervals1 = overlapping_in_bed( intervals1_fname, r_chr, r_start, r_stop )
        bits1 = as_bits( r_start, r_length, intervals1 )
        # Intersect it with the mask 
        bits1.iand( bits_not_masked )
        # Sanity checks
        assert count_overlap( bits1, bits_mask ) == 0
        # For each data set
        for featnum, intervals2_fname in enumerate( intervals2_fnames ):
            print(intervals2_fname, file=sys.stderr)
            intervals2 = overlapping_in_bed( intervals2_fname, r_chr, r_start, r_stop )
            bits2 = as_bits( r_start, r_length, intervals2 )
            bits2.iand( bits_not_masked )
            assert count_overlap( bits2, bits_mask ) == 0
            # Observed values
            actual_overlap = count_overlap( bits1, bits2 )
            total_actual[featnum] += actual_overlap
            # Sample 
            lengths2 = list( interval_lengths( bits2 ) )
            total_lengths2[ featnum ] += sum( lengths2 )
            for i in range( nsamples ):
                # Build randomly covered bitmask for second set
                random2 = throw_random( lengths2, bits_mask )
                # Find intersection
                random2 &= bits1
                # Print amount intersecting
                total_samples[ i, featnum ] += random2.count_range( 0, random2.size )
                print(total_samples[ i, featnum ], file=sys.stderr)
    fraction_overlap = total_samples / total_lengths2
    print("\t".join( intervals2_fnames ))
    print("\t".join( map( str, total_actual/total_lengths2 ) ))
    for row in fraction_overlap:
        print("\t".join( map( str, row ) ))
    #print "total covered by first: %d, second: %d, overlap: %d" % ( total_lengths1, total_lengths2, total_actual )
    print("observed overlap: %d, sample mean: %d, sample stdev: %d" % ( total_actual, stats.amean( total_samples ), stats.asamplestdev( total_samples ) ))
    print("z-score:", ( total_actual - stats.amean( total_samples ) ) / stats.asamplestdev( total_samples ))
    print("percentile:", sum( total_actual > total_samples ) / nsamples)