Example #1
0
def main():
    global options, args
    separator = '|'

    # Parse the HGVS name into genomic coordinates and alleles.
    #chrom, offset, ref, alt = hgvs.parse_hgvs_name('ENST00000515609.1:c.30G>T', genome, get_transcript=get_transcript)
    #print chrom, offset, ref, alt

    # Format an HGVS name.
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'G')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome,
                                      transcript)
    print hgvs_name
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'GA')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome,
                                      transcript)
    hgvs_var = hgvs.HGVSName(hgvs_name)
    hgvs_str = 'ENST00000359218.5:c.10597+1079_10597+1080delTTinsT'
    hgvs_var2 = hgvs.HGVSName(hgvs_str)

    print hgvs_name
    quit()

    # Open and parse each line of the vcf file
    input_vcf = vcf.Reader(open(options.input_vcf, 'r'))
    variant = Variant(samples=input_vcf.samples)

    # Open output file
    with open(options.output_vcf, 'w') as output_psv:
        # Generate output file header
        #variant = ConsequenceType(input_vcf.samples)
        output_psv.write(variant.create_psv_header(separator=separator))

        # Now parse lines in .vcf and output with new format:
        for record in input_vcf:
            # Only output sites that hasn't been filtered out
            if len(record.FILTER) == 0:
                #for consequence in range(0, len(record.INFO['CSQ'])):
                variant.get_from_record(record=record)
                output_psv.write(variant.put_to_psv(separator=separator))
Example #2
0
def main():
    global options, args
    separator = '|'

    # Parse the HGVS name into genomic coordinates and alleles.
    #chrom, offset, ref, alt = hgvs.parse_hgvs_name('ENST00000515609.1:c.30G>T', genome, get_transcript=get_transcript)
    #print chrom, offset, ref, alt

    # Format an HGVS name.
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'G')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript)
    print hgvs_name
    chrom, offset, ref, alt = ('chr2', 179616770, 'GAA', 'GA')
    transcript = get_transcript('ENST00000359218.5')
    hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript)
    hgvs_var = hgvs.HGVSName(hgvs_name)
    hgvs_str = 'ENST00000359218.5:c.10597+1079_10597+1080delTTinsT'
    hgvs_var2 = hgvs.HGVSName(hgvs_str)

    print hgvs_name
    quit()

    # Open and parse each line of the vcf file
    input_vcf = vcf.Reader(open(options.input_vcf, 'r'))
    variant = Variant(samples=input_vcf.samples)

    # Open output file
    with open(options.output_vcf, 'w') as output_psv:
        # Generate output file header
        #variant = ConsequenceType(input_vcf.samples)
        output_psv.write(variant.create_psv_header(separator=separator))

        # Now parse lines in .vcf and output with new format:
        for record in input_vcf:
            # Only output sites that hasn't been filtered out
            if len(record.FILTER) == 0:
                #for consequence in range(0, len(record.INFO['CSQ'])):
                variant.get_from_record(record=record)
                output_psv.write(variant.put_to_psv(separator=separator))
Example #3
0
    def vcf_to_hgvs(self, reference_transcript, vcf_notation):
        """
        Converts a single VCF notation variant to HGVS notation relative to a given transcript.

        @param reference_transcript: the refseq id of the reference transcript to use for HGVS notation
        @type reference_transcript: string
        @param vcf_notation: a tuple containing elements chromosome_number, coordinate, ref, and alt in that order
        @type vcf_notation: tuple of strings
        @return: hgvs notatation of variant in format reference_transcript:hgvs_description
        @rtype: string
        """

        chromosome_number, coordinate, ref, alt = vcf_notation
        coordinate = int(coordinate)

        transcript = self._get_transcript(reference_transcript)

        return hgvs.format_hgvs_name(chromosome_number, coordinate, ref, alt, self.genome, transcript)
Example #4
0
    def vcf_to_hgvs(self, reference_transcript, vcf_notation):
        """
        Converts a single VCF notation variant to HGVS notation relative to a given transcript.

        See Counsyl's HGVS library for more information on acceptable input formats: https://github.com/counsyl/hgvs.

        Args:
            reference_transcript (str): the refseq id of the reference transcript to use for HGVS notation
            vcf_notation (tuple of str): a tuple containing elements chromosome_number, coordinate, ref, and alt in that order

        Returns:
            str: hgvs notatation of variant in format reference_transcript:hgvs_description

        """

        chromosome_number, coordinate, ref, alt = vcf_notation
        coordinate = int(coordinate)

        transcript = self._get_transcript(reference_transcript)

        return hgvs.format_hgvs_name(chromosome_number, coordinate, ref, alt, self.genome, transcript)
Example #5
0
    return transcripts.get(name)


# Parse the HGVS name into genomic coordinates and alleles.
chrom, offset, ref, alt = hgvs.parse_hgvs_name(
    'NM_000352.3:c.215A>G', genome, get_transcript=get_transcript)
print chrom, offset, ref, alt
# Returns variant in VCF style: ('chr11', 17496508, 'T', 'C')
# Notice that since the transcript is on the negative strand, the alleles
# are reverse complemented during conversion.


# Format an HGVS name.
chrom, offset, ref, alt = ('chr11', 17496508, 'T', 'C')
transcript = get_transcript('NM_000352.3')
hgvs_name = hgvs.format_hgvs_name(
    chrom, offset, ref, alt, genome, transcript)
print hgvs_name
# Returns 'NM_000352.3(ABCC8):c.215A>G'


hgvs_name = hgvs.HGVSName('NM_000352.3:c.215-10A>G')
# fields of the HGVS name are available as attributes:
#
# hgvs_name.transcript = 'NM_000352.3'
# hgvs_name.kind = 'c'
# hgvs_name.mutation_type = '>'
# hgvs_name.cdna_start = hgvs.CDNACoord(215, -10)
# hgvs_name.cdna_end = hgvs.CDNACoord(215, -10)
# hgvs_name.ref_allele = 'A'
# hgvs_name.alt_allele = 'G'
Example #6
0
    return transcripts.get(name)


# Parse the HGVS name into genomic coordinates and alleles.
chrom, offset, ref, alt = hgvs.parse_hgvs_name('NM_000352.3:c.215A>G',
                                               genome,
                                               get_transcript=get_transcript)
print chrom, offset, ref, alt
# Returns variant in VCF style: ('chr11', 17496508, 'T', 'C')
# Notice that since the transcript is on the negative strand, the alleles
# are reverse complemented during conversion.

# Format an HGVS name.
chrom, offset, ref, alt = ('chr11', 17496508, 'T', 'C')
transcript = get_transcript('NM_000352.3')
hgvs_name = hgvs.format_hgvs_name(chrom, offset, ref, alt, genome, transcript)
print hgvs_name
# Returns 'NM_000352.3(ABCC8):c.215A>G'

hgvs_name = hgvs.HGVSName('NM_000352.3:c.215-10A>G')
# fields of the HGVS name are available as attributes:
#
# hgvs_name.transcript = 'NM_000352.3'
# hgvs_name.kind = 'c'
# hgvs_name.mutation_type = '>'
# hgvs_name.cdna_start = hgvs.CDNACoord(215, -10)
# hgvs_name.cdna_end = hgvs.CDNACoord(215, -10)
# hgvs_name.ref_allele = 'A'
# hgvs_name.alt_allele = 'G'

print(hgvs_name.transcript, hgvs_name.kind, hgvs_name.mutation_type,