Python interval_list 예제들, hg19util.interval_list Python 예제들

예제 #1

0

파일 보기

파일: breakpoint_graph.py 프로젝트: wjx888-star/AmpliconReconstructor

    def __init__(self, segment_list=None, cycle_list=None, ilist=None, file=None, file_content=None):
        if file is not None or file_content is not None:
            self.segment_list = hg.interval_list([])
            self.segment_dict = {}
            self.cycle_dict = {}
            self.ilist = hg.interval_list([])

            if file_content:
                lines = file_content.split('\n')
            else:
                lines = str(open(file).read().decode()).split('\n')
            ll = [l.strip().split() for l in lines if len(l.strip()) > 0]
            for l in ll:
                if 'Segment' == l[0]:
                    s = hg.interval(l[2], int(l[3]), int(l[4]), info=[l[1]])
                    self.segment_dict[l[1]] = s
                    self.segment_list.append(s)
                elif 'Cycle=' in l[0]:
                    ls = l[0].split(';')
                    ci = ls[0].split('=')[1]
                    cn = float(ls[1].split('=')[1])
                    cl = []
                    for s in ls[2].split('=')[1].split(','):
                        if s[-1] == '+':
                            cl.append((s[:-1], 1))
                        else:
                            cl.append((s[:-1], -1))
                    self.cycle_dict[ci] = (ci, cn, cl)
                elif 'Interval' == l[0]:
                    self.ilist.append(hg.interval(l[2], int(l[3]), int(l[4]), info=[l[1]]))
        elif cycle_list is None:
            segment_set = hg.interval_list([hg.interval(ss[0], ss[1], ss[2]) for ss in {(s.chrom, s.start, s.end) for s in segment_list}])
            segment_set.sort()
            self.segment_list = segment_set
            self.segment_dict = {}
            seg_id = {}
            cl = []
            for s in enumerate(segment_set):
                self.segment_dict[str(s[0] + 1)] = s[1]
                seg_id[(s[1].chrom, s[1].start, s[1].end)] = str(s[0] + 1)
            for s in segment_list:
                cl.append((seg_id[(s.chrom, s.start, s.end)], s.strand))
            for ii in range(len(self.segment_list)):
                s = self.segment_list[ii]
                s.info = [seg_id[(s.chrom, s.start, s.end)]]
            self.cycle_dict = {'1':('1', 1, cl)}
            self.ilist = hg.interval_list([s[0] for s in segment_set.merge_clusters(extend=1)])
            for ii in range(len(self.ilist)):
                self.ilist[ii].info = [str(ii)]
        else:
            self.segment_list = segment_list
            self.segment_dict = {s.info[0]: s for s in segment_list}
            self.cycle_dict = {c[0]:c for c in cycle_list}
            if ilist is not None:
                self.ilist = ilist
            else:
                self.ilist = hg.interval_list([s[0] for s in segment_list.merge_clusters(extend=1)])
                for ii in range(len(self.ilist)):
                    self.ilist[ii].info = [str(ii)]

예제 #2

0

파일 보기

파일: find_cycles.py 프로젝트: namphuon/pancancer

def load_aa_result(in_dir, prefix):
    summary_file = "%s/%s_summary.txt" % (in_dir, prefix)
    all_map = {}
    if os.path.exists(summary_file):
        amps = read_summary_file(summary_file)
        if amps is None:
            return None
    else:
        return None
    for (ai, a) in amps.items():
        ints = a['Intervals'].split(',')
        cycle_file = "%s/%s_amplicon%s_cycles.txt" % (in_dir, prefix, ai)
        (segment_map, interval_map, cycle_map) = read_cycle_file(cycle_file)
        all_map.setdefault(ai, {})['segment_map'] = segment_map
        all_map.setdefault(ai, {})['interval_map'] = interval_map
        all_map.setdefault(ai, {})['cycle_map'] = cycle_map
        for cycle in cycle_map.keys():
            segments = hg19.interval_list([
                segment_map[c[0:-1]] for c in cycle_map[cycle]['cycle']
                if c[0] != '0'
            ])
            for s in segments:
                s.info.setdefault('copy_count', 0)
                s.info['copy_count'] += cycle_map[cycle]['copy_count']
    all_map['amplicons'] = amps
    return all_map

예제 #3

0

파일 보기

파일: read_count.py 프로젝트: namphuon/pancancer

def parse_bed(segment_file):
    input = open(segment_file, 'r')
    amplicons = hg19.interval_list()
    for line in input:
        res = line.strip().split('\t')
        amplicons.append(hg19.interval(res[0], int(res[1]), int(res[2])))
    return amplicons

예제 #4

0

파일 보기

 def build_segments(self, bed_data = None):
   if bed_data is None:
     bed_data = self.bed_data
   points_x = []
   points_y = []
   colors = []
   
   fpoints_x = []
   fpoints_y = []
   fcolors = []
   
   previous_end = total_length_with_spacing*(global_rot/360.0)    
   for ind,sp in enumerate(start_points):
     start_point = int(previous_end - sp)
     start_angle = start_point/total_length_with_spacing*360
     end_angle = (start_point - lens[ind])/total_length_with_spacing*360
     
     #segseqD referenced as global variable here because I'm lazy
     segment = segSeqD[cycle[ind][0]] 
     strand = cycle[ind][1]
     hits = [h[0] for h in bed_data.intersection([segment])]
     if self.color_bed is not None:
       color_subhits = hg19.interval_list([h[0] for h in self.color_bed.intersection([segment])])      
     for h in hits:
       for pos in xrange(h.start, h.end, self.point_spacing):
         if pos > segment.end or pos < segment.start:
           continue
         if self.color_bed is not None:
           temp = hg19.interval(h.chrom, pos, pos)
           color_hits = color_subhits.intersection([temp],self.point_spacing)
           if len(color_hits) != 0:
             color = color_hits[0][0].info['color']
           else:
             color = self.color if 'color' not in h.info else h.info['color']
         else:
           color = self.color if 'color' not in h.info else h.info['color']
         if strand == "+":
           normStart = start_point - max(0,pos-segment.start)
           normEnd = start_point - min(segment.end-segment.start,pos-segment.start)
         else:
           normEnd = start_point - min(segment.end-segment.start,segment.end-pos)
           normStart = start_point - max(0,segment.end - pos)
         hvalue = h.info['value'] if h.info['value'] > self.ymin else self.ymin
         hvalue = hvalue if hvalue < self.ymax else self.ymax
         y_scale_value = (1.*hvalue-self.ymin)/(self.ymax-self.ymin)          
         if self.is_log:
           y_scale_value = (math.log10(hvalue)-math.log10(self.ymin))/(math.log10(self.ymax)-math.log10(self.ymin))
         r_scale_value = y_scale_value*(self.track_rmax-self.track_rmin)+self.track_rmin                    
         x_s,y_s = pol2cart(r_scale_value,normStart/total_length_with_spacing*2*np.pi)
         if 'fill' in h.info:
           foo = fpoints_x.append(x_s)
           foo = fpoints_y.append(y_s)
           fcolors.append(color)          
         else:
           foo = points_x.append(x_s)
           foo = points_y.append(y_s)
           colors.append(color)
   foo = ax.scatter(points_x,points_y,marker='o',s=1,linewidths=0.01,facecolors='none',color=colors)  
   foo = ax.scatter(fpoints_x,fpoints_y,marker='*',s=1,linewidths=0.01,color=fcolors)

예제 #5

0

파일 보기

def build_genebed_from_fpkm(fpkm):
  fpkm_bed = hg19.interval_list()
  for (g,f) in fpkm.items():
    if g not in ensembl_grc37_map:
      continue
    gene = ensembl_grc37_map[g]
    fpkm_bed.append(hg19.interval(gene.chrom, gene.start, gene.end, info={'value':f, 'name':gene.info['Name']}))
  fpkm_bed.sort()
  return fpkm_bed

예제 #6

0

파일 보기

파일: find_cycles.py 프로젝트: namphuon/pancancer

def classify_amplicon(amplicon, threshold_copy=4, min_length=50000):
    #Check for cyclic cycle, no threshold
    intervals = hg19.interval_list(
        [i for i in amplicon['interval_map'].values()])
    intervals.sort()
    cycles = amplicon['cycle_map'].keys()
    cycles = sorted(cycles,
                    key=lambda x: amplicon['cycle_map'][x]['copy_count'],
                    reverse=True)
    iscycle = False
    iscomplex = False
    for cycle in cycles:
        segments = hg19.interval_list([
            amplicon['segment_map'][s[0:-1]]
            for s in amplicon['cycle_map'][cycle]['cycle'] if s[0:-1] != '0'
        ])
        segments.sort()
        hits = hg19.interval_list(
            Set([h[0] for h in intervals.intersection(segments)]))
        hits.sort()
        chrs = Set([s.chrom for s in segments])
        if len(chrs) > 1 or len(hits) > 1:
            iscomplex = True
        #Sometimes the length is 0, not sure why
        length = sum([s.end - s.start for s in segments])
        if length == 0:
            continue
        copies = sum([(s.end - s.start) * s.info['copy_count']
                      for s in segments]) / length
        #Check for cycles
        if amplicon['cycle_map'][cycle]['cycle'][0][0] != '0':
            length = sum([s.end - s.start for s in segments])
            #check is coverage across cycle is greater than threshold
            copies = sum([(s.end - s.start) * s.info['copy_count']
                          for s in segments]) / length
            if copies < threshold_copy or length < min_length:
                continue
            iscycle = True
    if iscycle:
        return 'Cyclic'
    elif iscomplex:
        return 'Complex'
    else:
        return 'Amplification'

예제 #7

0

파일 보기

 def load_bed(bed_file, value = None, log = False, sep='\t'):
   bed_data = hg19.interval_list()
   for line in open(bed_file):
     res = line.split(sep)
     if value is None:
       bed_data.append(hg19.interval(res[0], int(res[1]), int(res[2]), info={'value':float(res[3]) if not log else 10**float(res[3])}))
     else:
       bed_data.append(hg19.interval(res[0], int(res[1]), int(res[2]), info={'value':value}))
   bed_data.sort()
   return bed_data

예제 #8

0

파일 보기

def load_ensembl_grc37():
  input = open('/pedigree2/projects/namphuon/data/references/hg19/annotations/Homo_sapiens.GRCh37.64.gtf' ,'r')
  ensemble_data = hg19.interval_list()
  ensembl_grc37_map = {}
  for line in input:
    res = line.split('\t')
    info = dict([r.strip().replace('"','').split(' ') for r in res[-1].strip().split('; ') if len(r.split(' ')) == 2])
    temp = hg19.interval("chr%s" % res[0],int(res[3]),int(res[4]),info={'data':info})
    ensemble_data.append(temp)
    foo = ensembl_grc37_map.setdefault(info['gene_id'],[]).append(temp)
  ensembl_grc37 = hg19.interval_list()    
  for g in ensembl_grc37_map:
    start = min([e.start for e in ensembl_grc37_map[g]])
    end = max([e.end for e in ensembl_grc37_map[g]])
    name = [e.info['data']['gene_name'] for e in ensembl_grc37_map[g] if 'gene_name' in e.info['data']]
    name = name[0] if len(name) >= 1 else g          
    ensembl_grc37.append(hg19.interval("%s" % e.chrom, start, end, info={'intervals':ensembl_grc37_map[g],'GeneID':g,'Name':name}))
  input.close()
  ensembl_grc37.sort()
  for e in ensembl_grc37:
    ensembl_grc37_map[e.info['GeneID']] = e
  return (ensembl_grc37, ensembl_grc37_map)

예제 #9

0

파일 보기

파일: find_cycles.py 프로젝트: namphuon/pancancer

def get_cyclic_path(amplicon, threshold=10000):
    paths = hg19.interval_list()
    for cycle in amplicon['cycle_map'].keys():
        length = sum([
            amplicon['segment_map'][s[0:-1]].end -
            amplicon['segment_map'][s[0:-1]].start
            for s in amplicon['cycle_map'][cycle]['cycle'] if s[0] != '0'
        ])
        if (amplicon['cycle_map'][cycle]['cycle'][0][0] != '0'
                and length >= threshold):
            paths.extend([
                amplicon['segment_map'][s[0:-1]]
                for s in amplicon['cycle_map'][cycle]['cycle']
            ])
    return paths

예제 #10

0

파일 보기

def find_peaks(bed_data, window = 1000):
  bed_data.sort()
  current = bed_data[0]
  keeps = hg19.interval_list()
  start = current
  for i in xrange(0,len(bed_data)):
    if bed_data[i].chrom != start.chrom:
      keeps.append(current)
      current = bed_data[i]
      start = current      
    elif bed_data[i].intersects(start,10000) and bed_data[i].info['value'] > current.info['value']:
      current = bed_data[i]
    elif not bed_data[i].intersects(start,window):
      keeps.append(current)
      current = bed_data[i]
      start = current
  keeps.append(current)
  return keeps

예제 #11

0

파일 보기

        "unable to set AA_DATA_REPO variable. Setting to working directory")
    DATA_REPO = '.'
if DATA_REPO == '.' or DATA_REPO == '':
    logging.warning(
        "#TIME " + '%.3f\t' % (clock() - TSTART) +
        "AA_DATA_REPO not set or empy. Setting to working directory")
    DATA_REPO = '.'

logging.info("#TIME " + '%.3f\t' % (clock() - TSTART) +
             "Loading libraries and reference annotations for: " + args.ref)
import hg19util as hg
import bam_to_breakpoint as b2b

logging.info("#TIME " + '%.3f\t' % (clock() - TSTART) +
             "Initiating bam_to_breakpoint object for: " + args.bam)
rdList0 = hg.interval_list(rdAlts, 'bed', exclude_info_string=True)
rdList = hg.interval_list([r for r in rdList0])
coverage_stats_file = open(hg.DATA_REPO + "/coverage.stats")
cstats = None
cb = bamFile
if cbam is not None:
    cb = cbam
for l in coverage_stats_file:
    ll = l.strip().split()
    if ll[0] == os.path.abspath(cb.filename):
        cstats = tuple(map(float, ll[1:]))
coverage_stats_file.close()
coverage_windows = None
if cbed is not None:
    coverage_windows = hg.interval_list(cbed, 'bed')
    coverage_windows.sort()

예제 #12

0

파일 보기

파일: amplified_intervals.py 프로젝트: jluebeck/AmpliconArchitect

args = parser.parse_args()

global_names.REF = args.ref
import hg19util as hg

if args.bed != '':
    rdAlts = args.bed

if args.out != '':
    outname = args.out + ".bed"
else:
    outname = os.path.splitext(rdAlts)[0] + "_amplified.bed"

GAIN, CNSIZE_MIN = args.gain, args.cnsize_min

rdList0 = hg.interval_list(rdAlts, 'bed')
if rdList0:
    try:
        if len(rdList0[0].info) == 0:
            sys.stderr.write(
                "ERROR: CNV estimate bed file had too few columns.\n"
                "Must contain: chr  pos1  pos2  cnv_estimate\n")
            sys.exit(1)
        _ = float(rdList0[0].info[-1])

    except ValueError:
        sys.stderr.write(
            "ERROR: CNV estimates must be in last column of bed file.\n")
        sys.exit(1)

rdList = hg.interval_list([r for r in rdList0 if float(r.info[-1]) > GAIN])

예제 #13

0

파일 보기

else:
  samp_name = args.sname.rsplit("/")[-1]

fname = samp_name

bed_feat_dict = {}
if args.bed_files:
  for i,j in zip(args.bed_files,args.feature_labels):
    print j,i
    #feature name -> chromosome -> ordered list of positions
    bed_list = parse_bed_file(i)
    bed_feat_dict[j] = feat_bed_to_lookup(bed_list)

outer_bar = max(bed_track_height*(len(bed_feat_dict)+2),10)

bed_data = hg19.interval_list([hg19.interval('chr8', 127638302, 127938302, info={'value':int(random.random()*100)}), hg19.interval('chr8', 128716346,128746346, info={'value':int(random.random()*100)})])
bed_data.sort()

args.prefix_name = '/pedigree2/projects/namphuon/programs/CycleViz/COLO320DM'
args.cycles_file = '/pedigree2/projects/namphuon/data/paul_gbm39/unsorted/COLO320_DM_S270/onco_amplicon1_cycles.txt'
args.fpkm_file = '/pedigree2/projects/namphuon/data/paul_gbm39/unsorted/COLO320_DM_S270/colo320dm.fpkm.csv'
args.wgs_file = '/pedigree2/projects/namphuon/data/paul_gbm39/unsorted/COLO320_DM_S270/colo320dm.wgs.1000.pileup.log.bed'
cycles_numbers = ['6', '9', '10', '12', '13', '14', '15', '16','19']
args.atac_peak_file = '/pedigree2/projects/namphuon/data/paul_gbm39/unsorted/ATAC-seq/SRC1655_summits_250ext_q1e6_nochrM_merged.bed'
args.atac_file = '/pedigree2/projects/namphuon/results/paul_gbm39/ATAC/COLO320DM.atac.1000.pileup.log.bed'


args.prefix_name = '/pedigree2/projects/namphuon/programs/CycleViz/PC3'
args.cycles_file = '/nucleus/pedigree/projects/extrachromosome/data/turner2017/reconstruction/run14/FF-77_amplicon4_cycles.txt'
args.fpkm_file = '/pedigree2/projects/namphuon/results/paul_gbm39/rnaseq/PC3.fpkm.csv'
args.wgs_file = '/pedigree2/projects/namphuon/results/paul_gbm39/PC3/PC3.wgs.1000.pileup.log.bed'

예제 #14

0

파일 보기

파일: amplified_intervals.py 프로젝트: ying7777/AmpliconArchitect

global_names.REF = args.ref
import hg19util as hg


if args.bed != '':
    rdAlts = args.bed

if args.out != '':
    outname= args.out + ".bed"
else:
    outname = os.path.splitext(rdAlts)[0] + "_amplified.bed"

GAIN,CNSIZE_MIN = args.gain,args.cnsize_min

rdList0 = hg.interval_list(rdAlts, 'bed')
if rdList0:
    try:
        if len(rdList0[0].info) == 0:
            sys.stderr.write("ERROR: CNV estimate bed file had too few columns.\n"
                             "Must contain: chr  pos1  pos2  cnv_estimate\n")
            sys.exit(1)
        _ = float(rdList0[0].info[-1])

    except ValueError:
        sys.stderr.write("ERROR: CNV estimates must be in last column of bed file.\n")
        sys.exit(1)

rdList = hg.interval_list([r for r in rdList0 if float(r.info[-1]) > GAIN ])

if args.bam != "":

예제 #15

0

파일 보기

파일: pacbiobam_to_cycles.py 프로젝트: yuanjingnan/AmpliconArchitect

from collections import defaultdict
import pysam

import hg19util as hg

f = pysam.AlignmentFile("/pedigree2/projects/namphuon/data/SCC090/pacbio/merged.bam")

segs = defaultdict(lambda: [], {})
readlen = {}

refi = hg.interval_list([hg.interval(i) for i in f.references])

segi = 1

qi = 0
qindex = {}
qlist = []

for l in f.fetch():
    ref = l.reference_name.split(':')[0]
    ref_start = int(l.reference_name.split(':')[1].split('-')[0]) + l.reference_start
    ref_end = int(l.reference_name.split(':')[1].split('-')[0]) + l.reference_end
    qstart = l.query_alignment_start
    qend = l.query_alignment_end
    if l.query_name not in qindex:
        qindex[l.query_name] = qi
        qlist.append(l.query_name)
        qi += 1
    if l.is_reverse:
        qstart = l.infer_query_length() - l.query_alignment_end
        qend = l.infer_query_length() - l.query_alignment_start

예제 #16

0

파일 보기



coverage_stats_file = open(hg.DATA_REPO + "/coverage.stats")
cstats = None
cb = bamFile
if cbam is not None:
    cb = cbam
for l in coverage_stats_file:
    ll = l.strip().split()
    if ll[0] == os.path.abspath(cb.filename):
        cstats = tuple(map(float, ll[1:]))
coverage_stats_file.close()
coverage_windows=None
if cbed is not None:
    coverage_windows=hg.interval_list(cbed, 'bed')
    coverage_windows.sort()
if cstats is None and cbam is not None:
    cbam2b = b2b.bam_to_breakpoint(cbam, coverage_stats=cstats, coverage_windows=coverage_windows)
    cstats = cbam2b.basic_stats
elif cstats is None:
    bamFileb2b = b2b.bam_to_breakpoint(bamFile, coverage_stats=cstats, coverage_windows=coverage_windows)
    cstats = bamFileb2b.basic_stats


final = args.final

if cstats[0] <= final:
    exit()    
ratio = float(final) / float(cstats[0])

예제 #17

0

파일 보기

파일: amplified_intervals.py 프로젝트: jihe-liu/AmpliconArchitect

    metavar='FILE',
    action='store',
    type=str,
    nargs=1,
    default=[])
args = parser.parse_args()
rdAltsl = []
if args.bed[0] != '':
    rdAltsl.append(args.bed[0])
elif len(args.bedlist) != 0 and args.bedlist[0] != '':
    for l in open(args.bedlist[0]):
        rdAltsl.append(l.strip())

for rdAlts in rdAltsl:

    rdList0 = hg.interval_list(rdAlts, 'bed')
    rdList = hg.interval_list([r for r in rdList0 if float(r.info[1]) > GAIN])

    if args.bam != "":
        import bam_to_breakpoint as b2b
        if os.path.splitext(args.bam[0])[-1] == '.cram':
            bamFile = pysam.Samfile(args.bam[0], 'rc')
        else:
            bamFile = pysam.Samfile(args.bam[0], 'rb')
        coverage_stats_file = open(hg.DATA_REPO + "/coverage.stats")
        cstats = None
        cb = bamFile
        for l in coverage_stats_file:
            ll = l.strip().split()
            if ll[0] == os.path.abspath(cb.filename):
                cstats = tuple(map(float, ll[1:]))

예제 #18

0

파일 보기

파일: AmpliconArchitect.py 프로젝트: wisekh6/AmpliconArchitect

                    metavar='FILE',
                    action='store',
                    type=str,
                    nargs=1)
args = parser.parse_args()
rdAlts = args.rdAlts[0]
bamFile = pysam.Samfile(args.bam[0], 'rb')
outName = args.outName[0]
logging.basicConfig(filename=outName + '.log', level=logging.DEBUG)
logging.info("#TIME " + str(clock()) + " import done")
summary_logger = logging.getLogger('summary')
summary_logger.addHandler(logging.FileHandler(outName + '_summary.txt', 'w'))
graph_logger = logging.getLogger('graph')
cycle_logger = logging.getLogger('cycle')

rdList0 = hg.interval_list(rdAlts, 'bed')
rdList = hg.interval_list([r for r in rdList0])
coverage_stats_file = open(hg.DATA_REPO + "/coverage.stats")
cstats = None
for l in coverage_stats_file:
    ll = l.strip().split()
    if ll[0] == os.path.abspath(bamFile.filename):
        cstats = tuple(map(float, ll[1:]))
coverage_stats_file.close()
coverage_windows = None
# coverage_windows=hg.interval_list('universal_coverage_estimation_rep1_seq_coords_hg19.tsv', 'bed')
# coverage_windows.sort()
bamFileb2b = b2b.bam_to_breakpoint(bamFile,
                                   coverage_stats=cstats,
                                   coverage_windows=coverage_windows)
# exit()

예제 #19

0

파일 보기

    def draw_episome(self, input_files, output_file=None, auto_scale=0):
        cycles_section_top = 30
        cycles_section_size = 0
        space_between_decompositions = 90 / (1 + auto_scale)
        bottoms = []
        for i in range(len(input_files)):
            if i != 0:
                cycles_section_size += space_between_decompositions
            input_content = input_files[i][1]
            intervals, segments, seg_name_to_index_map, cycles, directions, chr_offs, copy_counts, cycles_names = self.readDataFile(
                input_content)

            number_of_element = sum(len(x) for x in cycles)
            cycles_section_size += number_of_element * 10
            for cycle in cycles:
                if cycle[-1] != 0:
                    cycles_section_size += 10
            bottoms.append(cycles_section_top + cycles_section_size)
        tops = [cycles_section_top] + [
            bottom + space_between_decompositions for bottom in bottoms[:-1]
        ]
        cycles_section_bottom = cycles_section_top + cycles_section_size

        # print ('cycle section_top:', cycles_section_top)
        # print ('cycle section_bottom:', cycles_section_bottom)
        for i in range(len(input_files)):
            input_content = input_files[i][1]
            self.file_names.append(
                Text('%s: %s' % (str(i + 1), input_files[i][0]), 0.5,
                     tops[i] - 27))
            # print ('bottom:', bottoms[i])
            # print ('top:', tops[i])
            intervals, segments, seg_name_to_index_map, cycles, directions, chr_offs, copy_counts, cycles_names = self.readDataFile(
                input_content)
            if i == 0:
                self.reconstructed_cycles = [cname for cname in cycles_names]
                self.reconstructed_segments = [
                    segment_count for segment_count in range(len(segments))
                ]

            self.compute_chr_offsets(chr_offs)
            if len(intervals) == 0:
                intervals = self.compute_intervals(segments)
            ilist = hg.interval_list([
                hg.interval(chr_name, start_point, end_point)
                for chr_name, start_point, end_point in intervals
            ])
            maxIntvl = self.findMaxIntervals(segments)
            sortedL = {}
            compact = {}
            span = {}
            for ch in maxIntvl.keys():
                sortedL[ch] = self.makeListOfSegmentEndPoints(segments, ch)
                sortedCopy = list(sortedL[ch])
                mergeL = self.mergeIntervals(sortedCopy)
                compact[ch], span[ch] = self.compactIntervals(
                    mergeL, maxIntvl[ch])

            newsegs = self.convertSegmentCoordinates(segments, span, compact,
                                                     maxIntvl, ilist)

            if i == 0:
                self.drawSections(intervals, ilist, cycles_section_top,
                                  cycles_section_bottom, auto_scale)

            # self.drawAxesLabels(sortedL, compact, maxIntvl, span, chr_offs, bottoms[i])
            # self.drawAxes(span, compact, maxIntvl, chr_offs, bottoms[i])
            # self.drawAxesDottedLines(sortedL, compact, maxIntvl, span, chr_offs, tops[i], bottoms[i])
            self.drawCycles(newsegs, seg_name_to_index_map, cycles, directions,
                            tops[i], chr_offs, i, copy_counts, cycles_names)

예제 #20

0

파일 보기

파일: AmpliconArchitect.py 프로젝트: chaoszhang/AmpliconArchitect

    reffile.close()
except:
    logging.warning(
        "#TIME " + '%.3f\t' % clock() +
        "unable to set reference in $AA_DATA_REPO/reference.txt. Setting in working directory."
    )

logging.info("#TIME " + '%.3f\t' % clock() +
             " Loading libraries and reference annotations for: " + args.ref)
import hg19util as hg
import bam_to_breakpoint as b2b
from breakpoint_graph import *

logging.info("#TIME " + '%.3f\t' % clock() +
             " Initiating bam_to_breakpoint object for: " + args.bam[0])
rdList0 = hg.interval_list(rdAlts, 'bed')
rdList = hg.interval_list([r for r in rdList0])
coverage_stats_file = open(hg.DATA_REPO + "/coverage.stats")
cstats = None
cb = bamFile
if cbam is not None:
    cb = cbam
for l in coverage_stats_file:
    ll = l.strip().split()
    if ll[0] == os.path.abspath(cb.filename):
        cstats = tuple(map(float, ll[1:]))
coverage_stats_file.close()
coverage_windows = None
if cbed is not None:
    coverage_windows = hg.interval_list(cbed, 'bed')
    coverage_windows.sort()

예제 #21

0

파일 보기

파일: load_cycles.py 프로젝트: auberginekenobi/AmpliconArchitect

parser = argparse.\
ArgumentParser(description="Cycles File")
parser.add_argument('--cycles',
                    dest='cycles_file',
                    help="File listing cycles in amplicon",
                    metavar='FILE',
                    action='store',
                    type=str,
                    nargs=1)
args = parser.parse_args()
cycles_file = args.cycles_file[0]
ll = [l.strip().split() for l in open(cycles_file) if len(l.strip()) > 0]

segments = hg.interval_list([
    hg.interval(l[2], int(l[3]), int(l[4]), info=[int(l[1])]) for l in ll
    if l[0] == 'Segment'
])
for s in segments:
    if s.chrom[:3] == 'chr':
        s.info.append('Human')
    else:
        s.info.append('Viral')
segments.sort()
segment_id_dict = {s.info[0]: s for s in segments}

cycles = []
for c in [l[0].split(';') for l in ll if 'Cycle=' in l[0]]:
    c_dict = {cc.split('=')[0]: cc.split('=')[1] for cc in c}
    new_dict = {}
    new_dict['Cycle'] = int(c_dict['Cycle'])
    new_dict['Copy_count'] = float(c_dict['Copy_count'])

예제 #22

0

파일 보기

파일: amplified_intervals.py 프로젝트: chaoszhang/AmpliconArchitect

sys.setrecursionlimit(10000)
import argparse

import hg19util as hg

GAIN = 5
CNSIZE_MIN = 100000

parser = argparse.\
ArgumentParser(description="Filter and merge amplified intervals")
parser.add_argument('--bed', dest='bed',
                    help="Bed file with list of amplified intervals", metavar='FILE',
                    action='store', type=str, nargs=1)
args = parser.parse_args()
rdAlts = args.bed[0]
rdList0 = hg.interval_list(rdAlts, 'bed')
rdList = hg.interval_list([r for r in rdList0 if float(r.info[1]) > GAIN ])

genome_features = hg.oncogene_list

amplicon_listl = rdList
amplicon_listl = hg.interval_list([a for a in amplicon_listl if a.size() > CNSIZE_MIN]) 
amplicon_listl.sort()


cr = hg.conserved_regions
uc_list = hg.interval_list([])
for a in amplicon_listl:
    if (len(hg.interval_list([a]).intersection(cr)) == 0 or
        a.size() > max(1000000, 10 * sum([a.intersection(ci[1]).size() for ci in hg.interval_list([a]).intersection(cr)])) or
       a.size() - sum([a.intersection(ci[1]).size() for ci in hg.interval_list([a]).intersection(cr)]) > 2000000):