def main(): global options, args separator = '|' # Parse the HGVS name into genomic coordinates and alleles. #chrom, offset, ref, alt = hgvs.parse_hgvs_name('ENST00000515609.1:c.30G>T', genome, get_transcript=get_transcript) #print chrom, offset, ref, alt # Format an HGVS name. chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'G') transcript = get_transcript('ENST00000359218.5') hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript) print hgvs_name chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'GA') transcript = get_transcript('ENST00000359218.5') hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript) hgvs_var = hgvs.HGVSName(hgvs_name) hgvs_str = 'ENST00000359218.5:c.10597+1079_10597+1080delTTinsT' hgvs_var2 = hgvs.HGVSName(hgvs_str) print hgvs_name quit() # Open and parse each line of the vcf file input_vcf = vcf.Reader(open(options.input_vcf, 'r')) variant = Variant(samples=input_vcf.samples) # Open output file with open(options.output_vcf, 'w') as output_psv: # Generate output file header #variant = ConsequenceType(input_vcf.samples) output_psv.write(variant.create_psv_header(separator=separator)) # Now parse lines in .vcf and output with new format: for record in input_vcf: # Only output sites that hasn't been filtered out if len(record.FILTER) == 0: #for consequence in range(0, len(record.INFO['CSQ'])): variant.get_from_record(record=record) output_psv.write(variant.put_to_psv(separator=separator))
def main(): global options, args separator = '|' # Parse the HGVS name into genomic coordinates and alleles. #chrom, offset, ref, alt = hgvs.parse_hgvs_name('ENST00000515609.1:c.30G>T', genome, get_transcript=get_transcript) #print chrom, offset, ref, alt # Format an HGVS name. chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'G') transcript = get_transcript('ENST00000359218.5') hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript) print hgvs_name chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'GA') transcript = get_transcript('ENST00000359218.5') hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript) hgvs_var = hgvs.HGVSName(hgvs_name) hgvs_str = 'ENST00000359218.5:c.10597+1079_10597+1080delTTinsT' hgvs_var2 = hgvs.HGVSName(hgvs_str) print hgvs_name quit() # Open and parse each line of the vcf file input_vcf = vcf.Reader(open(options.input_vcf, 'r')) variant = Variant(samples=input_vcf.samples) # Open output file with open(options.output_vcf, 'w') as output_psv: # Generate output file header #variant = ConsequenceType(input_vcf.samples) output_psv.write(variant.create_psv_header(separator=separator)) # Now parse lines in .vcf and output with new format: for record in input_vcf: # Only output sites that hasn't been filtered out if len(record.FILTER) == 0: #for consequence in range(0, len(record.INFO['CSQ'])): variant.get_from_record(record=record) output_psv.write(variant.put_to_psv(separator=separator))
def vcf_to_hgvs(self, reference_transcript, vcf_notation): """ Converts a single VCF notation variant to HGVS notation relative to a given transcript. @param reference_transcript: the refseq id of the reference transcript to use for HGVS notation @type reference_transcript: string @param vcf_notation: a tuple containing elements chromosome_number, coordinate, ref, and alt in that order @type vcf_notation: tuple of strings @return: hgvs notatation of variant in format reference_transcript:hgvs_description @rtype: string """ chromosome_number, coordinate, ref, alt = vcf_notation coordinate = int(coordinate) transcript = self._get_transcript(reference_transcript) return hgvs.format_hgvs_name(chromosome_number, coordinate, ref, alt, self.genome, transcript)
def vcf_to_hgvs(self, reference_transcript, vcf_notation): """ Converts a single VCF notation variant to HGVS notation relative to a given transcript. See Counsyl's HGVS library for more information on acceptable input formats: https://github.com/counsyl/hgvs. Args: reference_transcript (str): the refseq id of the reference transcript to use for HGVS notation vcf_notation (tuple of str): a tuple containing elements chromosome_number, coordinate, ref, and alt in that order Returns: str: hgvs notatation of variant in format reference_transcript:hgvs_description """ chromosome_number, coordinate, ref, alt = vcf_notation coordinate = int(coordinate) transcript = self._get_transcript(reference_transcript) return hgvs.format_hgvs_name(chromosome_number, coordinate, ref, alt, self.genome, transcript)
return transcripts.get(name) # Parse the HGVS name into genomic coordinates and alleles. chrom, offset, ref, alt = hgvs.parse_hgvs_name( 'NM_000352.3:c.215A>G', genome, get_transcript=get_transcript) print chrom, offset, ref, alt # Returns variant in VCF style: ('chr11', 17496508, 'T', 'C') # Notice that since the transcript is on the negative strand, the alleles # are reverse complemented during conversion. # Format an HGVS name. chrom, offset, ref, alt = ('chr11', 17496508, 'T', 'C') transcript = get_transcript('NM_000352.3') hgvs_name = hgvs.format_hgvs_name( chrom, offset, ref, alt, genome, transcript) print hgvs_name # Returns 'NM_000352.3(ABCC8):c.215A>G' hgvs_name = hgvs.HGVSName('NM_000352.3:c.215-10A>G') # fields of the HGVS name are available as attributes: # # hgvs_name.transcript = 'NM_000352.3' # hgvs_name.kind = 'c' # hgvs_name.mutation_type = '>' # hgvs_name.cdna_start = hgvs.CDNACoord(215, -10) # hgvs_name.cdna_end = hgvs.CDNACoord(215, -10) # hgvs_name.ref_allele = 'A' # hgvs_name.alt_allele = 'G'
return transcripts.get(name) # Parse the HGVS name into genomic coordinates and alleles. chrom, offset, ref, alt = hgvs.parse_hgvs_name('NM_000352.3:c.215A>G', genome, get_transcript=get_transcript) print chrom, offset, ref, alt # Returns variant in VCF style: ('chr11', 17496508, 'T', 'C') # Notice that since the transcript is on the negative strand, the alleles # are reverse complemented during conversion. # Format an HGVS name. chrom, offset, ref, alt = ('chr11', 17496508, 'T', 'C') transcript = get_transcript('NM_000352.3') hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript) print hgvs_name # Returns 'NM_000352.3(ABCC8):c.215A>G' hgvs_name = hgvs.HGVSName('NM_000352.3:c.215-10A>G') # fields of the HGVS name are available as attributes: # # hgvs_name.transcript = 'NM_000352.3' # hgvs_name.kind = 'c' # hgvs_name.mutation_type = '>' # hgvs_name.cdna_start = hgvs.CDNACoord(215, -10) # hgvs_name.cdna_end = hgvs.CDNACoord(215, -10) # hgvs_name.ref_allele = 'A' # hgvs_name.alt_allele = 'G' print(hgvs_name.transcript, hgvs_name.kind, hgvs_name.mutation_type,