Example #1
0
    def as_indel(self, ref_fasta):
        chrom = self.chroms[0].lstrip('chr')
        pos = self.breaks[0]

        ref = alt = None
        size = self.get_size()
        if self.rearrangement == 'del':
            ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1,
                                  self.breaks[1] - 1).upper()
            alt = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1,
                                  self.breaks[0]).upper()

        else:
            ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1,
                                  self.breaks[1]).upper()
            alt = ref + self.novel_seq.upper()

        id = self.id
        qual = '.'
        filter = '.'
        info = {
            'BKPTID': ','.join(self.contigs),
        }

        # read support
        if self.final_support is not None:
            #info['READSUPPORT'] = self.final_support
            info['SPANNING_READS'] = self.support['spanning']

# somatic
        if self.somatic:
            info['SOMATIC'] = 'SOMATIC'

# repeat contraction
        if self.rearrangement == 'del' and self.repeat_seq is not None:
            if self.repeat_seq is not None:
                info['REPEAT_SEQ'] = self.repeat_seq
            if self.repeat_num is not None:
                info['REPEAT_NUM'] = self.repeat_num
            if self.repeat_num_change is not None:
                info['REPEAT_NUM_CHANGE'] = self.repeat_num_change

        if ref is not None and alt is not None:
            fields = [
                chrom, pos, id, ref, alt, qual, filter,
                VCF.info_dict_to_str(info)
            ]
            return '\t'.join(map(str, fields))
Example #2
0
    def as_indel(self, ref_fasta):
        chrom = self.chroms[0].lstrip('chr')
	pos = self.breaks[0]
	
	ref = alt = None
	size = self.get_size()
	if self.rearrangement == 'del':
	    ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1, self.breaks[1] - 1).upper()
	    alt = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1, self.breaks[0]).upper()
	    
	else:
	    ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1, self.breaks[1]).upper()
	    alt = ref + self.novel_seq.upper()
	    
	id = self.id
	qual = '.'
	filter = '.'
	info = {
	        'BKPTID':','.join(self.contigs),
	        }
	
	# read support
	if self.final_support is not None:
	    #info['READSUPPORT'] = self.final_support
	    info['SPANNING_READS'] = self.support['spanning']

	# somatic
	if self.somatic:
	    info['SOMATIC'] = 'SOMATIC'
	    
	# repeat contraction
	if self.rearrangement == 'del' and self.repeat_seq is not None:
	    if self.repeat_seq is not None:
		info['REPEAT_SEQ'] = self.repeat_seq
	    if self.repeat_num is not None:
		info['REPEAT_NUM'] = self.repeat_num
	    if self.repeat_num_change is not None:
		info['REPEAT_NUM_CHANGE'] = self.repeat_num_change

	if ref is not None and alt is not None:
	    fields = [chrom, pos, id, ref, alt, qual, filter, VCF.info_dict_to_str(info)]
	    return '\t'.join(map(str, fields))
Example #3
0
    def as_sv(self, ref_fasta, id_ext=None, info_ext=None, chrom_ext=None, pos_ext=None):
        chrom = self.chroms[0] if chrom_ext is None else chrom_ext
	pos = self.breaks[0] if pos_ext is None else pos_ext
	
	chrom = chrom.lstrip('chr')
	
	alt = None
	ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1, self.breaks[0]).upper()
	sv_len = self.get_size()
	end = None
	if type(sv_len) is int:
	    end = pos + sv_len
	    
	if self.rearrangement == 'del':
	    alt = '<DEL>'
	    sv_type = 'DEL'
	    if type(sv_len) is int:
		sv_len = -1 * sv_len
		end = pos - sv_len
	    
	elif self.rearrangement == 'dup':
	    alt = '<DUP:TANDEM>'
	    sv_type = 'DUP'
	    
	elif self.rearrangement == 'inv':
	    alt = '<INV>'
	    sv_type = 'INV'
	    
	elif self.rearrangement == 'ins':
	    alt = '<INS>'
	    sv_type = 'INS'
	    end = pos
	    
	id = self.id if id_ext is None else id_ext
	qual = '.'
	filter = '.'
	info = {'SVTYPE': sv_type,
	        'END': end,
	        'BKPTID':','.join(self.contigs),
	        }
	if end is not None:
	    info['END'] = end
	if type(sv_len) is int:
	    info['SVLEN'] = sv_len

	if sv_type == 'DUP':
	    if self.repeat_seq is not None:
		info['REPEAT_SEQ'] = self.repeat_seq
	    if self.repeat_num is not None:
		info['REPEAT_NUM'] = self.repeat_num
	    if self.repeat_num_change is not None:
		info['REPEAT_NUM_CHANGE'] = self.repeat_num_change
	
	# read support
	if self.final_support is not None:
	    #info['READSUPPORT'] = self.final_support
	    info['SPANNING_READS'] = self.support['spanning']
	    if self.support['flanking'] is not None:
		info['FLANKING_PAIRS'] = self.support['flanking']
	    
	# somatic
	if self.somatic:
	    info['SOMATIC'] = 'SOMATIC'
    
	cipos = None
	homol_len = None
	homol_seq = None
	if self.homol_seq and self.homol_seq[0] != '-':
	    homol_seq = self.homol_seq[0].upper()
	    homol_len = len(self.homol_seq[0])
	    contig_breaks = self.contig_breaks[0]
	    # e.g. GMAP
	    if contig_breaks[0] + 1 == contig_breaks[1]:
		#print 'gmap', contig_breaks
		pass
	    # e.g. BWA-mem
	    elif contig_breaks[0] >= contig_breaks[1]:
		cipos = '0,%d' % homol_len
		
	if cipos is not None:
	    info['CIPOS'] = cipos
	    info['CIPOS'] = cipos
	if homol_len is not None:
	    info['HOMLEN'] = homol_len
	    info['HOMLEN'] = homol_len
	if homol_seq is not None:
	    info['HOMSEQ'] = homol_seq
	    info['HOMSEQ'] = homol_seq
	    
	# external info - overrides given info
	if info_ext:
	    for key, value in info_ext.iteritems():
		if key == 'SVLEN' and value == 'NA':
		    continue
		info[key] = value
	
	if ref is not None and alt is not None:
	    fields = [chrom, pos, id, ref, alt, qual, filter, VCF.info_dict_to_str(info)]
	    return '\t'.join(map(str, fields))
Example #4
0
    def as_breakends(self, ref_fasta, genomic=True, max_novel_seq_len=50, info_ext=None, parids=None, event=None):
        chroms = map(lambda c: c.lstrip('chr'), self.chroms)
	alt_chroms = chroms[:]
	pos = list(self.breaks)
	alt_pos = pos[:]
	# inserted novel sequences
        inserted_seqs = ['','']
	if self.novel_seq and self.novel_seq != 'NA' and self.novel_seq != '-':
	    if len(self.novel_seq) > max_novel_seq_len:
		alt_chroms[0] = '<%s>' % self.contigs[0]
		alt_chroms[1] = '<%s>' % self.contigs[0]
		alt_pos[1] = self.contig_breaks[0][0] + 1
		alt_pos[0] = self.contig_breaks[0][1] - 1
	    else:
		if len(self.aligns[0]) == 1:
		    inserted_seqs[0] = self.novel_seq if self.aligns[0][0].strand == '+' else reverse_complement(self.novel_seq)
		    inserted_seqs[1] = self.novel_seq if self.aligns[0][0].strand == '+' else reverse_complement(self.novel_seq)
		else:
		    inserted_seqs[0] = self.novel_seq if self.aligns[0][0].strand == '+' else reverse_complement(self.novel_seq)
		    inserted_seqs[1] = self.novel_seq if self.aligns[0][1].strand == '+' else reverse_complement(self.novel_seq)
		
	# microhomology, cipos
	cipos = None
	homol_len = None
	homol_seq = None
	if self.homol_seq and self.homol_seq[0] != '-' and len(self.homol_seq) > 0:
	    homol_seq = self.homol_seq[0].upper()
	    homol_len = len(self.homol_seq[0])
	    contig_breaks = self.contig_breaks[0]
	    # e.g. GMAP
	    if contig_breaks[0] + 1 == contig_breaks[1]:
		pass
	    # e.g. BWA-mem
	    elif contig_breaks[0] >= contig_breaks[1]:
		pos[0] -= homol_len
		alt_pos[1] += homol_len
		cipos = '0,%d' % homol_len
	    
        refs = (ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1, self.breaks[0]).upper(),
                ref_fasta.fetch(self.chroms[1], self.breaks[1] - 1, self.breaks[1]).upper())

        ids = ('%s%s' % (self.id, 'a'),
               '%s%s' % (self.id, 'b'))
	        
        svtype = 'BND' if genomic else 'FND'
        infos = [{'SVTYPE':svtype, 'MATEID':ids[1], 'EVENTTYPE':self.rearrangement.upper()},
                 {'SVTYPE':svtype, 'MATEID':ids[0], 'EVENTTYPE':self.rearrangement.upper()}]
	if cipos is not None:
	    infos[0]['CIPOS'] = cipos
	    infos[1]['CIPOS'] = cipos
	if homol_len is not None:
	    infos[0]['HOMLEN'] = homol_len
	    infos[1]['HOMLEN'] = homol_len
	if homol_seq is not None:
	    infos[0]['HOMSEQ'] = homol_seq
	    infos[1]['HOMSEQ'] = homol_seq
	    
	# read support
	if self.final_support is not None:
	    #infos[0]['READSUPPORT'] = self.final_support
	    #infos[1]['READSUPPORT'] = self.final_support
	    infos[0]['SPANNING_READS'] = self.support['spanning']
	    infos[1]['SPANNING_READS'] = self.support['spanning']
	    if self.support['flanking'] is not NONE:
		infos[0]['FLANKING_PAIRS'] = self.support['flanking']
		infos[1]['FLANKING_PAIRS'] = self.support['flanking']
	    
	adj_size = self.get_size()
	if type(adj_size) is int:
	    infos[0]['SVLEN'] = adj_size
	    infos[1]['SVLEN'] = adj_size
	    
	# somatic
	if self.somatic:
	    infos[0]['SOMATIC'] = 'SOMATIC'
	    infos[1]['SOMATIC'] = 'SOMATIC'
	    
        # contig and contig breakpoints
        if self.contigs:
            for i in range(2):
                infos[i]['BKPTID'] = ','.join(self.contigs)
            
        if self.contig_breaks and len(self.contig_breaks) == len(self.contigs):
            contig_breaks = []
            for bk in self.contig_breaks:
                if len(bk) == 2:
                    contig_breaks.append('%s-%s' % (bk[0], bk[1]))
                else:
                    print 'error'
                    
            if len(contig_breaks) == len(self.contigs):
                for i in range(2):
                    infos[i]['CTG_BKS'] = ','.join(contig_breaks)
		    	    
	# external info - overrides given info
	if info_ext:
	    for key, value in info_ext.iteritems():
		if len(value) == 2:
		    infos[0][key] = value[0]
		    infos[1][key] = value[1]
        
        if self.orients[0] == 'L':
            # LL
            if self.orients[1] == 'L':
                alts = ('%s%s]%s:%s]' % (refs[0], inserted_seqs[0], alt_chroms[1], alt_pos[1]),
                        '%s%s]%s:%s]' % (refs[1], inserted_seqs[1], alt_chroms[0], alt_pos[0]))
            # LR
            else:
                alts = ('%s%s[%s:%s[' % (refs[0], inserted_seqs[0], alt_chroms[1], alt_pos[1]),
                        ']%s:%s]%s%s' % (alt_chroms[0], alt_pos[0], inserted_seqs[1], refs[1]))
        else:
            # RL
            if self.orients[1] == 'L':
                alts = (']%s:%s]%s%s' % (chroms[1], alt_pos[1], inserted_seqs[0], refs[0]),
                        '%s%s[%s:%s[' % (refs[1], inserted_seqs[1], chroms[0], alt_pos[0])) 
            # RR
            else:
                alts = ('[%s:%s[%s%s' % (chroms[1], alt_pos[1], inserted_seqs[0], refs[0]),
                        '[%s:%s[%s%s' % (chroms[0], alt_pos[0], inserted_seqs[1], refs[1]))
	
        breakends = map(lambda i: '\t'.join([chroms[i], str(pos[i]), ids[i], refs[i], alts[i], '.', '.', VCF.info_dict_to_str(infos[i])]), range(2))
        
        return '\n'.join(breakends)
Example #5
0
    def as_sv(self,
              ref_fasta,
              id_ext=None,
              info_ext=None,
              chrom_ext=None,
              pos_ext=None):
        chrom = self.chroms[0] if chrom_ext is None else chrom_ext
        pos = self.breaks[0] if pos_ext is None else pos_ext

        chrom = chrom.lstrip('chr')

        alt = None
        ref = ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1,
                              self.breaks[0]).upper()
        sv_len = self.get_size()
        end = None
        if type(sv_len) is int:
            end = pos + sv_len

        if self.rearrangement == 'del':
            alt = '<DEL>'
            sv_type = 'DEL'
            if type(sv_len) is int:
                sv_len = -1 * sv_len
                end = pos - sv_len

        elif self.rearrangement == 'dup':
            alt = '<DUP:TANDEM>'
            sv_type = 'DUP'

        elif self.rearrangement == 'inv':
            alt = '<INV>'
            sv_type = 'INV'

        elif self.rearrangement == 'ins':
            alt = '<INS>'
            sv_type = 'INS'
            end = pos

        id = self.id if id_ext is None else id_ext
        qual = '.'
        filter = '.'
        info = {
            'SVTYPE': sv_type,
            'END': end,
            'BKPTID': ','.join(self.contigs),
        }
        if end is not None:
            info['END'] = end
        if type(sv_len) is int:
            info['SVLEN'] = sv_len

        if sv_type == 'DUP':
            if self.repeat_seq is not None:
                info['REPEAT_SEQ'] = self.repeat_seq
            if self.repeat_num is not None:
                info['REPEAT_NUM'] = self.repeat_num
            if self.repeat_num_change is not None:
                info['REPEAT_NUM_CHANGE'] = self.repeat_num_change

# read support
        if self.final_support is not None:
            #info['READSUPPORT'] = self.final_support
            info['SPANNING_READS'] = self.support['spanning']
            if self.support['flanking'] is not None:
                info['FLANKING_PAIRS'] = self.support['flanking']

# somatic
        if self.somatic:
            info['SOMATIC'] = 'SOMATIC'

        cipos = None
        homol_len = None
        homol_seq = None
        if self.homol_seq and self.homol_seq[0] != '-':
            homol_seq = self.homol_seq[0].upper()
            homol_len = len(self.homol_seq[0])
            contig_breaks = self.contig_breaks[0]
            # e.g. GMAP
            if contig_breaks[0] + 1 == contig_breaks[1]:
                #print 'gmap', contig_breaks
                pass
            # e.g. BWA-mem
            elif contig_breaks[0] >= contig_breaks[1]:
                cipos = '0,%d' % homol_len

        if cipos is not None:
            info['CIPOS'] = cipos
            info['CIPOS'] = cipos
        if homol_len is not None:
            info['HOMLEN'] = homol_len
            info['HOMLEN'] = homol_len
        if homol_seq is not None:
            info['HOMSEQ'] = homol_seq
            info['HOMSEQ'] = homol_seq

# external info - overrides given info
        if info_ext:
            for key, value in info_ext.iteritems():
                if key == 'SVLEN' and value == 'NA':
                    continue
                info[key] = value

        if ref is not None and alt is not None:
            fields = [
                chrom, pos, id, ref, alt, qual, filter,
                VCF.info_dict_to_str(info)
            ]
            return '\t'.join(map(str, fields))
Example #6
0
    def as_breakends(self,
                     ref_fasta,
                     genomic=True,
                     max_novel_seq_len=50,
                     info_ext=None,
                     parids=None,
                     event=None):
        chroms = map(lambda c: c.lstrip('chr'), self.chroms)
        alt_chroms = chroms[:]
        pos = list(self.breaks)
        alt_pos = pos[:]
        # inserted novel sequences
        inserted_seqs = ['', '']
        if self.novel_seq and self.novel_seq != 'NA' and self.novel_seq != '-':
            if len(self.novel_seq) > max_novel_seq_len:
                alt_chroms[0] = '<%s>' % self.contigs[0]
                alt_chroms[1] = '<%s>' % self.contigs[0]
                alt_pos[1] = self.contig_breaks[0][0] + 1
                alt_pos[0] = self.contig_breaks[0][1] - 1
            else:
                if len(self.aligns[0]) == 1:
                    inserted_seqs[0] = self.novel_seq if self.aligns[0][
                        0].strand == '+' else reverse_complement(
                            self.novel_seq)
                    inserted_seqs[1] = self.novel_seq if self.aligns[0][
                        0].strand == '+' else reverse_complement(
                            self.novel_seq)
                else:
                    inserted_seqs[0] = self.novel_seq if self.aligns[0][
                        0].strand == '+' else reverse_complement(
                            self.novel_seq)
                    inserted_seqs[1] = self.novel_seq if self.aligns[0][
                        1].strand == '+' else reverse_complement(
                            self.novel_seq)

# microhomology, cipos
        cipos = None
        homol_len = None
        homol_seq = None
        if self.homol_seq and self.homol_seq[0] != '-' and len(
                self.homol_seq) > 0:
            homol_seq = self.homol_seq[0].upper()
            homol_len = len(self.homol_seq[0])
            contig_breaks = self.contig_breaks[0]
            # e.g. GMAP
            if contig_breaks[0] + 1 == contig_breaks[1]:
                pass
            # e.g. BWA-mem
            elif contig_breaks[0] >= contig_breaks[1]:
                pos[0] -= homol_len
                alt_pos[1] += homol_len
                cipos = '0,%d' % homol_len

        refs = (ref_fasta.fetch(self.chroms[0], self.breaks[0] - 1,
                                self.breaks[0]).upper(),
                ref_fasta.fetch(self.chroms[1], self.breaks[1] - 1,
                                self.breaks[1]).upper())

        ids = ('%s%s' % (self.id, 'a'), '%s%s' % (self.id, 'b'))

        svtype = 'BND' if genomic else 'FND'
        infos = [{
            'SVTYPE': svtype,
            'MATEID': ids[1],
            'EVENTTYPE': self.rearrangement.upper()
        }, {
            'SVTYPE': svtype,
            'MATEID': ids[0],
            'EVENTTYPE': self.rearrangement.upper()
        }]
        if cipos is not None:
            infos[0]['CIPOS'] = cipos
            infos[1]['CIPOS'] = cipos
        if homol_len is not None:
            infos[0]['HOMLEN'] = homol_len
            infos[1]['HOMLEN'] = homol_len
        if homol_seq is not None:
            infos[0]['HOMSEQ'] = homol_seq
            infos[1]['HOMSEQ'] = homol_seq

# read support
        if self.final_support is not None:
            #infos[0]['READSUPPORT'] = self.final_support
            #infos[1]['READSUPPORT'] = self.final_support
            infos[0]['SPANNING_READS'] = self.support['spanning']
            infos[1]['SPANNING_READS'] = self.support['spanning']
            if self.support['flanking'] is not NONE:
                infos[0]['FLANKING_PAIRS'] = self.support['flanking']
                infos[1]['FLANKING_PAIRS'] = self.support['flanking']

        adj_size = self.get_size()
        if type(adj_size) is int:
            infos[0]['SVLEN'] = adj_size
            infos[1]['SVLEN'] = adj_size

# somatic
        if self.somatic:
            infos[0]['SOMATIC'] = 'SOMATIC'
            infos[1]['SOMATIC'] = 'SOMATIC'

        # contig and contig breakpoints
        if self.contigs:
            for i in range(2):
                infos[i]['BKPTID'] = ','.join(self.contigs)

        if self.contig_breaks and len(self.contig_breaks) == len(self.contigs):
            contig_breaks = []
            for bk in self.contig_breaks:
                if len(bk) == 2:
                    contig_breaks.append('%s-%s' % (bk[0], bk[1]))
                else:
                    print 'error'

            if len(contig_breaks) == len(self.contigs):
                for i in range(2):
                    infos[i]['CTG_BKS'] = ','.join(contig_breaks)

# external info - overrides given info
        if info_ext:
            for key, value in info_ext.iteritems():
                if len(value) == 2:
                    infos[0][key] = value[0]
                    infos[1][key] = value[1]

        if self.orients[0] == 'L':
            # LL
            if self.orients[1] == 'L':
                alts = ('%s%s]%s:%s]' %
                        (refs[0], inserted_seqs[0], alt_chroms[1], alt_pos[1]),
                        '%s%s]%s:%s]' %
                        (refs[1], inserted_seqs[1], alt_chroms[0], alt_pos[0]))
            # LR
            else:
                alts = ('%s%s[%s:%s[' %
                        (refs[0], inserted_seqs[0], alt_chroms[1], alt_pos[1]),
                        ']%s:%s]%s%s' %
                        (alt_chroms[0], alt_pos[0], inserted_seqs[1], refs[1]))
        else:
            # RL
            if self.orients[1] == 'L':
                alts = (']%s:%s]%s%s' %
                        (chroms[1], alt_pos[1], inserted_seqs[0], refs[0]),
                        '%s%s[%s:%s[' %
                        (refs[1], inserted_seqs[1], chroms[0], alt_pos[0]))
            # RR
            else:
                alts = ('[%s:%s[%s%s' %
                        (chroms[1], alt_pos[1], inserted_seqs[0], refs[0]),
                        '[%s:%s[%s%s' %
                        (chroms[0], alt_pos[0], inserted_seqs[1], refs[1]))

        breakends = map(
            lambda i: '\t'.join([
                chroms[i],
                str(pos[i]), ids[i], refs[i], alts[i], '.', '.',
                VCF.info_dict_to_str(infos[i])
            ]), range(2))

        return '\n'.join(breakends)