def add_chunk(self, sample, pred): # Write consensus alts to vcf cursor = 0 var_queue = list() ref_seq = self.ref_fasta.fetch(sample.ref_name) for pos, grp in itertools.groupby(sample.positions['major']): end = cursor + len(list(grp)) alt = ''.join(self.label_decoding[x] for x in pred[cursor:end]).replace(_gap_, '') # For simple insertions and deletions in which either # the REF or one of the ALT alleles would otherwise be # null/empty, the REF and ALT Strings must include the # base before the event (which must be reflected in # the POS field), unless the event occurs at position # 1 on the contig in which case it must include the # base after the event if alt == '': # deletion if pos == 0: # the "unless case" ref = ref_seq[1] alt = ref_seq[1] else: # the usual case pos = pos - 1 ref = ref_seq[pos:pos + 2] alt = ref_seq[pos] else: ref = ref_seq[pos] # Merging of variants produced by considering major.{minor} positions # These are of the form: # X -> Y - subs # prev.X -> prev - deletion # X -> Xyy.. - insertion # In the second case we may need to merge variants from consecutive # major positions. if alt == ref: self.write(var_queue) var_queue = list() else: var = vcf.Variant(sample.ref_name, pos, ref, alt) if len(var_queue) == 0 or pos - var_queue[-1].pos == 1: var_queue.append(var) else: self.write(var_queue) var_queue = [var] cursor = end self.write(var_queue)
def write(self, var_queue): if len(var_queue) > 1: are_dels = all(len(x.ref) == 2 for x in var_queue) are_same_ref = len(set(x.chrom for x in var_queue)) == 1 if are_dels and are_same_ref: name = var_queue[0].chrom pos = var_queue[0].pos ref = ''.join((x.ref[0] for x in var_queue)) ref += var_queue[-1].ref[-1] alt = ref[0] merged_var = vcf.Variant(name, pos, ref, alt) self.writer.write_variant(merged_var) else: raise ValueError('Cannot merge variants: {}.'.format(var_queue)) elif len(var_queue) == 1: self.writer.write_variant(var_queue[0])
def write(self, vcf_fh): if len(self) > 1: are_dels = all(len(x.ref) == 2 for x in self) are_same_ref = len(set(x.chrom for x in self)) == 1 if are_dels and are_same_ref: name = self[0].chrom pos = self[0].pos ref = ''.join((x.ref[0] for x in self)) ref += self[-1].ref[-1] alt = ref[0] merged_var = vcf.Variant(name, pos, ref, alt, info=info) vcf_fh.write_variant(merged_var) else: raise ValueError('Cannot merge variants: {}.'.format(self)) elif len(self) == 1: vcf_fh.write_variant(self[0]) del self[:]