Python describe_dna Examples, extractor.describe_dna Python Examples

Example #1

0

Show file

File: test_describe.py Project: mutalyzer/description-extractor

 def test1(self):
     """
     Test 1.
     """
     result = describe_dna('ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA',
                           'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA')
     assert str(result) == '[5_6insTT;17del;26A>C;35dup]'

Example #2

0

Show file

File: mapper.py Project: mutalyzer/normalizer

def _filter(variants, ref_seq1, ref_seq2):
    raw_de_variants = extractor.describe_dna(ref_seq1, ref_seq2)
    seq_variants = de_to_hgvs(
        raw_de_variants,
        {"reference": ref_seq1, "observed": ref_seq2},
    )
    return [v for v in variants if v not in seq_variants]

Example #3

0

Show file

File: test_describe.py Project: BioinformaticsArchive/description-extractor

 def test2(self):
     """
     Test 2.
     """
     result = describe_dna(
         'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTACTGTG',
         'TAAGCACCAGGAGTCCATGAAGAAGCTGGATCCTCCCATGGAATCCCCTACTCTACTGTG')
     assert str(result) == '[26A>C;30C>A;35G>C]'

Example #4

0

Show file

File: test_describe.py Project: mutalyzer/description-extractor

 def test4(self):
     """
     Test 4.
     """
     result = describe_dna(
         'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA',
         'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGAATCCCCTACTCTA')
     assert str(result) == '[26_29inv;30C>G;41del]'

Example #5

0

Show file

File: test_describe.py Project: mutalyzer/description-extractor

 def test2(self):
     """
     Test 2.
     """
     result = describe_dna(
         'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTACTGTG',
         'TAAGCACCAGGAGTCCATGAAGAAGCTGGATCCTCCCATGGAATCCCCTACTCTACTGTG')
     assert str(result) == '[26A>C;30C>A;35G>C]'

Example #6

0

Show file

File: mapper.py Project: mutalyzer/normalizer

def _extract_hgvs_internal_model(obs_seq, r_model):
    ref_seq = r_model["sequence"]["seq"]
    de_variants = extractor.describe_dna(ref_seq, obs_seq)

    return de_to_hgvs(
        de_variants,
        {"reference": ref_seq, "observed": obs_seq},
    )

Example #7

0

Show file

File: test_describe.py Project: BioinformaticsArchive/description-extractor

 def test1(self):
     """
     Test 1.
     """
     result = describe_dna(
         'ATGATGATCAGATACAGTGTGATACAGGTAGTTAGACAA',
         'ATGATTTGATCAGATACATGTGATACCGGTAGTTAGGACAA')
     assert str(result) == '[5_6insTT;17del;26A>C;35dup]'

Example #8

0

Show file

File: test_describe.py Project: BioinformaticsArchive/description-extractor

 def test4(self):
     """
     Test 4.
     """
     result = describe_dna(
         'TAAGCACCAGGAGTCCATGAAGAAGATGGCTCCTGCCATGGAATCCCCTACTCTA',
         'TAAGCACCAGGAGTCCATGAAGAAGCCATGTCCTGCCATGAATCCCCTACTCTA')
     assert str(result) == '[26_29inv;30C>G;41del]'

Example #9

0

Show file

 def _extract(self):
     self.de_model = {
         "reference":
         copy.deepcopy(self.internal_indexing_model["reference"]),
         "coordinate_system":
         "i",
         "variants":
         describe_dna(
             self.references["reference"]["sequence"]["seq"],
             self.references["observed"]["sequence"]["seq"],
         ),
     }

Example #10

0

Show file

def description_extractor(reference, observed):
    de_variants = extractor.describe_dna(reference, observed)
    de_hgvs_variants = de_to_hgvs(de_variants, {
        "reference": reference,
        "observed": observed
    })
    hgvs_indexing_variants = variants_to_internal_indexing(de_hgvs_variants)
    crossmap = crossmap_to_hgvs_setup("g")
    hgvs_variants = locations_to_hgvs_locations(
        {"variants": hgvs_indexing_variants}, crossmap)
    normalized_description = variants_to_description(hgvs_variants["variants"])
    return normalized_description

Example #11

0

Show file

File: mapper.py Project: mutalyzer/normalizer

def map_description(
    description,
    reference_id,
    selector_id=None,
    slice_to=None,
    clean=False,
):
    # Get the observed sequence
    d = Description(description)
    d.normalize()
    if d.errors:
        return {"errors": d.errors}
    if not d.references and not d.references.get("observed"):
        return {"errors": [{"details": "No observed sequence or other error occured."}]}
    obs_seq = d.references["observed"]["sequence"]["seq"]

    r_model = retrieve_reference(reference_id)
    if r_model is None:
        return {"errors": [reference_not_retrieved(reference_id, [])]}

    ref_seq2 = d.references["reference"]["sequence"]["seq"]

    if selector_id:
        s_model = get_selector_model(r_model["annotations"], selector_id, True)
        if s_model is None:
            return {"errors": [no_selector_found(reference_id, selector_id, [])]}
        if s_model["inverted"]:
            obs_seq = reverse_complement(obs_seq)
            ref_seq2 = reverse_complement(ref_seq2)

    if slice_to:
        r_model = _get_reference_model(r_model, selector_id, slice_to)

    ref_seq1 = r_model["sequence"]["seq"]

    # Get the description extractor hgvs internal indexing variants
    variants = _extract_hgvs_internal_model(obs_seq, r_model)

    if clean:
        raw_de_variants = extractor.describe_dna(ref_seq1, ref_seq2)
        seq_variants = de_to_hgvs(
            raw_de_variants,
            {"reference": ref_seq1, "observed": ref_seq2},
        )
        if [v for v in seq_variants if v not in variants]:
            return {
                "errors": [{"code": "EMAPFILTER", "details": "Unsuccessful filtering."}]
            }
        variants = [v for v in variants if v not in seq_variants]

    return _get_description(variants, r_model, selector_id)

Example #12

0

Show file

File: test_describe.py Project: mutalyzer/description-extractor

    def _single_variant(self, sample, expected):
        """
        General single variant test.
        """
        reference = 'ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT'

        result = describe_dna(reference, sample)
        assert result[0].type == expected[0]
        assert result[0].start == expected[1]
        assert result[0].end == expected[2]
        assert result[0].sample_start == expected[3]
        assert result[0].sample_end == expected[4]
        assert result[0].deleted[0].sequence == expected[5]
        assert result[0].inserted[0].sequence == expected[6]
        assert str(result[0]) == expected[7]

Example #13

0

Show file

File: test_describe.py Project: BioinformaticsArchive/description-extractor

    def _single_variant(self, sample, expected):
        """
        General single variant test.
        """
        reference = 'ACGTCGATTCGCTAGCTTCGGGGGATAGATAGAGATATAGAGAT'

        result = describe_dna(reference, sample)
        assert result[0].type == expected[0]
        assert result[0].start == expected[1]
        assert result[0].end == expected[2]
        assert result[0].sample_start == expected[3]
        assert result[0].sample_end == expected[4]
        assert result[0].deleted[0].sequence == expected[5]
        assert result[0].inserted[0].sequence == expected[6]
        assert str(result[0]) == expected[7]

Example #14

0

Show file

def description_extractor_submit():
    """
    The Variant Description Extractor (experimental service).

    There multiple ways for the user to provide two sequences, corresponding to
    the values for the `reference_method` and `sample_method` fields, each
    requiring some additional fields to be defined:

    `raw_method`
      The reference and sample sequences are pasted into the form fields.

      - `reference_sequence`: The reference sequence.
      - `sample_sequence`: The sample sequence.

    `file_method`
      The reference and sample sequences are uploaded.

      - `reference_file`: The reference file.
      - `sample_file`: The sample file.

    `refseq_method`
      The reference and sample sequences are given by RefSeq accession numbers.

      - `reference_accession_number`: RefSeq accession number for the reference
        sequence.
      - `sample_accession_number`: RefSeq accession number for the sample
        sequence.
    """
    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received Description Extract request from %s'
                      % request.remote_addr)
    stats.increment_counter('description-extractor/website')

    r = s = ''
    reference_method = request.form.get('reference_method')
    sample_method = request.form.get('sample_method')
    reference_sequence = request.form.get('reference_sequence')
    sample_sequence = request.form.get('sample_sequence')
    reference_file = request.files.get('reference_file')
    sample_file = request.files.get('sample_file')
    reference_filename = ''
    sample_filename = ''
    reference_accession_number = request.form.get('reference_accession_number')
    sample_accession_number = request.form.get('sample_accession_number')

    if reference_method == 'refseq_method':
        if reference_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(reference_accession_number)
            if genbank_record:
                r = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference accession number input fields is empty.')
    elif reference_method == 'file_method':
        if reference_file:
            reference_filename = reference_file.filename
            r = util.read_dna(reference_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No reference file provided.')
    else: # raw_method
        if reference_sequence:
            r = util.read_dna(StringIO.StringIO(reference_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference sequence number input fields is empty.')

    if sample_method == 'refseq_method':
        if sample_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(sample_accession_number)
            if genbank_record:
                s = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample accession number input fields is empty.')
    elif sample_method == 'file_method':
        if sample_file:
            sample_filename = sample_file.filename
            s = util.read_dna(sample_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No sample file provided.')
    else: # raw_method
        if sample_sequence:
            s = util.read_dna(StringIO.StringIO(sample_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample sequence number input fields is empty.')

    # Todo: Move this to the describe module.
    if not r or not util.is_dna(r):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Reference sequence is not DNA.')
    if not s or not util.is_dna(s):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Sample sequence is not DNA.')

    raw_vars = None
    if r and s:
        if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
            len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
            output.addMessage(__file__, 3, 'EMAXSIZE',
                              'Input sequences are restricted to {:,} bp.'
                              .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
        else:
            raw_vars = extractor.describe_dna(r, s)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished Description Extract request')

    return render_template('description-extractor.html',
        extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH,
        reference_sequence=reference_sequence or '',
        sample_sequence=sample_sequence or '',
        reference_accession_number=reference_accession_number or '',
        sample_accession_number=sample_accession_number or '',
        reference_filename=reference_filename or '',
        sample_filename=sample_filename or '',
        raw_vars=raw_vars, errors=errors, summary=summary, messages=messages,
        reference_method=reference_method, sample_method=sample_method)

Example #15

0

Show file

def name_checker():
    """
    Name checker.
    """
    # For backwards compatibility with older LOVD versions, we support the
    # `mutationName` argument. If present, we redirect and add `standalone=1`.
    #
    # Also for backwards compatibility, we support the `name` argument as an
    # alias for `description`.
    if 'name' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['name'],
                                standalone=request.args.get('standalone')),
                        code=301)
    if 'mutationName' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['mutationName'],
                                standalone=1),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('name-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('name-checker/website')

    variantchecker.check_variant(description, output)

    errors, warnings, summary = output.Summary()
    parse_error = output.getOutput('parseError')

    record_type = output.getIndexedOutput('recordType', 0, '')
    reference = output.getIndexedOutput('reference', 0, '')
    if reference:
        if record_type == 'LRG':
            reference_filename = reference + '.xml'
        else :
            reference_filename = reference + '.gb'
    else:
        reference_filename = None

    genomic_dna = output.getIndexedOutput('molType', 0) != 'n'
    genomic_description = output.getIndexedOutput('genomicDescription', 0, '')

    # Create a link to the UCSC Genome Browser.
    browser_link = None
    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if raw_variants:
        positions = [pos
                     for descr, (first, last) in raw_variants[2]
                     for pos in (first, last)]
        bed_url = url_for('.bed', description=description, _external=True)
        browser_link = ('http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&'
                        'position={chromosome}:{start}-{stop}&hgt.customText='
                        '{bed_file}'.format(chromosome=raw_variants[0],
                                            start=min(positions) - 10,
                                            stop=max(positions) + 10,
                                            bed_file=urllib.quote(bed_url)))

    # Experimental description extractor.
    if (output.getIndexedOutput('original', 0) and
        output.getIndexedOutput('mutated', 0)):
        allele = extractor.describe_dna(output.getIndexedOutput('original', 0),
                                        output.getIndexedOutput('mutated', 0))
        extracted = '(skipped)'
        if allele:
            extracted = unicode(allele)

    else:
        extracted = ''

    # Todo: Generate the fancy HTML views for the proteins here instead of in
    #   `mutalyzer.variantchecker`.
    arguments = {
        'description'         : description,
        'messages'            : map(util.message_info, output.getMessages()),
        'summary'             : summary,
        'parse_error'         : parse_error,
        'errors'              : errors,
        'genomicDescription'  : genomic_description,
        'chromDescription'    : output.getIndexedOutput(
                                  'genomicChromDescription', 0),
        'genomicDNA'          : genomic_dna,
        'visualisation'       : output.getOutput('visualisation'),
        'descriptions'        : output.getOutput('descriptions'),
        'protDescriptions'    : output.getOutput('protDescriptions'),
        'oldProtein'          : output.getOutput('oldProteinFancy'),
        'altStart'            : output.getIndexedOutput('altStart', 0),
        'altProtein'          : output.getOutput('altProteinFancy'),
        'newProtein'          : output.getOutput('newProteinFancy'),
        'transcriptInfo'      : output.getIndexedOutput('hasTranscriptInfo',
                                                        0, False),
        'transcriptCoding'    : output.getIndexedOutput('transcriptCoding', 0,
                                                        False),
        'exonInfo'            : output.getOutput('exonInfo'),
        'cdsStart_g'          : output.getIndexedOutput('cdsStart_g', 0),
        'cdsStart_c'          : output.getIndexedOutput('cdsStart_c', 0),
        'cdsStop_g'           : output.getIndexedOutput('cdsStop_g', 0),
        'cdsStop_c'           : output.getIndexedOutput('cdsStop_c', 0),
        'restrictionSites'    : output.getOutput('restrictionSites'),
        'legends'             : output.getOutput('legends'),
        'reference_filename'  : reference_filename,  # Todo: Download link is not shown...
        'browserLink'         : browser_link,
        'extractedDescription': extracted,
        'standalone'          : bool(request.args.get('standalone'))
    }

    output.addMessage(__file__, -1, 'INFO',
                      'Finished variant %s' % description)

    return render_template('name-checker.html', **arguments)

Example #16

0

Show file

File: mutalyzer.py Project: cchng/mutalyzer

def check_name(description):
    """
    Run the name checker.
    """
    O = output.Output(__file__)

    O.addMessage(__file__, -1, "INFO", "Received variant " + description)

    RD = variantchecker.check_variant(description, O)

    O.addMessage(__file__, -1, "INFO", "Finished processing variant " + description)

    ### OUTPUT BLOCK ###
    gn = O.getOutput("genename")
    if gn :
        print "Gene Name: " + gn[0]
    tv = O.getOutput("transcriptvariant")
    if tv :
        print "Transcript variant: " + tv[0]
        print
    #if

    for i in O.getMessages() :
        print i
    errors, warnings, summary = O.Summary()
    print summary
    print

    if not errors:
        print "Overview of the raw variants:"
        for i in O.getOutput("visualisation"):
            for j in range(len(i)):
                print i[j]
            print
        #for

        print "Genomic description:"
        print O.getIndexedOutput('genomicDescription', 0, '')

        print "\nChromosomal description:"
        print O.getOutput("genomicChromDescription")

        print "\nAffected transcripts:"
        for i in O.getOutput('descriptions'):
            print i
        print "\nAffected proteins:"
        for i in O.getOutput('protDescriptions'):
            print i

        print "\nOld protein:"
        for i in O.getOutput("oldProteinFancyText"):
          print i

        print "\nNew protein:"
        for i in O.getOutput("newProteinFancyText"):
          print i

        print "\nAlternative protein:"
        for i in O.getOutput("altProteinFancyText"):
          print i

        print "\nExon information:"
        for i in O.getOutput("exonInfo") :
            print i

        print "\nCDS  information:"
        print O.getOutput("cdsStart_c"), O.getOutput("cdsStop_c")
        print O.getOutput("cdsStart_g"), O.getOutput("cdsStop_g")

        print "\nEffect on Restriction sites:"
        for i in O.getOutput("restrictionSites") :
            print i

        print "\nLegend:"
        for i in O.getOutput("legends") :
            print i

        reference_sequence = O.getIndexedOutput("original", 0)
        sample_sequence = O.getIndexedOutput("mutated", 0)

        described_allele = extractor.describe_dna(reference_sequence,
                                                  sample_sequence)
        #described_protein_allele = describe.describe(
        #    O.getIndexedOutput("oldprotein", 0),
        #    O.getIndexedOutput("newprotein", 0, default=""),
        #    DNA=False)
        described_protein_allele = ""

        described = described_protein = '(skipped)'

        if described_allele:
            described = described_allele
        if described_protein_allele:
            described_protein = described_protein_allele

        print "\nExperimental services:"
        print described
        print described_protein
        #print "+++ %s" % O.getOutput("myTranscriptDescription")
        print json.dumps({
            #"reference_sequence": reference_sequence,
            #"sample_sequence": sample_sequence,
            "allele_description": described_allele}, cls=AlleleEncoder)

Example #17

0

Show file

File: views.py Project: cchng/mutalyzer

def description_extractor_submit():
    """
    The Variant Description Extractor (experimental service).

    There multiple ways for the user to provide two sequences, corresponding to
    the values for the `reference_method` and `sample_method` fields, each
    requiring some additional fields to be defined:

    `raw_method`
      The reference and sample sequences are pasted into the form fields.

      - `reference_sequence`: The reference sequence.
      - `sample_sequence`: The sample sequence.

    `file_method`
      The reference and sample sequences are uploaded.

      - `reference_file`: The reference file.
      - `sample_file`: The sample file.

    `refseq_method`
      The reference and sample sequences are given by RefSeq accession numbers.

      - `reference_accession_number`: RefSeq accession number for the reference
        sequence.
      - `sample_accession_number`: RefSeq accession number for the sample
        sequence.
    """
    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO',
                      'Received Description Extract request from %s'
                      % request.remote_addr)
    stats.increment_counter('description-extractor/website')

    r = s = ''
    reference_method = request.form.get('reference_method')
    sample_method = request.form.get('sample_method')
    reference_sequence = request.form.get('reference_sequence')
    sample_sequence = request.form.get('sample_sequence')
    reference_file = request.files.get('reference_file')
    sample_file = request.files.get('sample_file')
    reference_filename = ''
    sample_filename = ''
    reference_accession_number = request.form.get('reference_accession_number')
    sample_accession_number = request.form.get('sample_accession_number')

    if reference_method == 'refseq_method':
        if reference_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(reference_accession_number)
            if genbank_record:
                r = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference accession number input fields is empty.')
    elif reference_method == 'file_method':
        if reference_file:
            reference_filename = reference_file.filename
            r = util.read_dna(reference_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No reference file provided.')
    else: # raw_method
        if reference_sequence:
            r = util.read_dna(StringIO.StringIO(reference_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Reference sequence number input fields is empty.')

    if sample_method == 'refseq_method':
        if sample_accession_number:
            retriever = Retriever.GenBankRetriever(output)
            genbank_record = retriever.loadrecord(sample_accession_number)
            if genbank_record:
                s = unicode(genbank_record.seq)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample accession number input fields is empty.')
    elif sample_method == 'file_method':
        if sample_file:
            sample_filename = sample_file.filename
            s = util.read_dna(sample_file)
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'No sample file provided.')
    else: # raw_method
        if sample_sequence:
            s = util.read_dna(StringIO.StringIO(sample_sequence))
        else:
            output.addMessage(__file__, 3, 'EEMPTYFIELD',
                'Sample sequence number input fields is empty.')

    # Todo: Move this to the describe module.
    if not r or not util.is_dna(r):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Reference sequence is not DNA.')
    if not s or not util.is_dna(s):
        output.addMessage(__file__, 3, 'ENODNA',
                          'Sample sequence is not DNA.')

    raw_vars = None
    if r and s:
        if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or
            len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH):
            output.addMessage(__file__, 3, 'EMAXSIZE',
                              'Input sequences are restricted to {:,} bp.'
                              .format(settings.EXTRACTOR_MAX_INPUT_LENGTH))
        else:
            raw_vars = extractor.describe_dna(r, s)

    errors, warnings, summary = output.Summary()
    messages = map(util.message_info, output.getMessages())

    output.addMessage(__file__, -1, 'INFO',
                      'Finished Description Extract request')

    return render_template('description-extractor.html',
        extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH,
        reference_sequence=reference_sequence or '',
        sample_sequence=sample_sequence or '',
        reference_accession_number=reference_accession_number or '',
        sample_accession_number=sample_accession_number or '',
        reference_filename=reference_filename or '',
        sample_filename=sample_filename or '',
        raw_vars=raw_vars, errors=errors, summary=summary, messages=messages,
        reference_method=reference_method, sample_method=sample_method)

Example #18

0

Show file

File: views.py Project: cchng/mutalyzer

def name_checker():
    """
    Name checker.
    """
    # For backwards compatibility with older LOVD versions, we support the
    # `mutationName` argument. If present, we redirect and add `standalone=1`.
    #
    # Also for backwards compatibility, we support the `name` argument as an
    # alias for `description`.
    if 'name' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['name'],
                                standalone=request.args.get('standalone')),
                        code=301)
    if 'mutationName' in request.args:
        return redirect(url_for('.name_checker',
                                description=request.args['mutationName'],
                                standalone=1),
                        code=301)

    description = request.args.get('description')

    if not description:
        return render_template('name-checker.html')

    output = Output(__file__)
    output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s'
                      % (description, request.remote_addr))
    stats.increment_counter('name-checker/website')

    variantchecker.check_variant(description, output)

    errors, warnings, summary = output.Summary()
    parse_error = output.getOutput('parseError')

    record_type = output.getIndexedOutput('recordType', 0, '')
    reference = output.getIndexedOutput('reference', 0, '')
    if reference:
        if record_type == 'LRG':
            reference_filename = reference + '.xml'
        else :
            reference_filename = reference + '.gb'
    else:
        reference_filename = None

    genomic_dna = output.getIndexedOutput('molType', 0) != 'n'
    genomic_description = output.getIndexedOutput('genomicDescription', 0, '')

    # Create a link to the UCSC Genome Browser.
    browser_link = None
    raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0)
    if raw_variants:
        positions = [pos
                     for descr, (first, last) in raw_variants[2]
                     for pos in (first, last)]
        bed_url = url_for('.bed', description=description, _external=True)
        browser_link = ('http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&'
                        'position={chromosome}:{start}-{stop}&hgt.customText='
                        '{bed_file}'.format(chromosome=raw_variants[0],
                                            start=min(positions) - 10,
                                            stop=max(positions) + 10,
                                            bed_file=urllib.quote(bed_url)))

    # Experimental description extractor.
    if (output.getIndexedOutput('original', 0) and
        output.getIndexedOutput('mutated', 0)):
        allele = extractor.describe_dna(output.getIndexedOutput('original', 0),
                                        output.getIndexedOutput('mutated', 0))
        extracted = '(skipped)'
        if allele:
            extracted = unicode(allele)

    else:
        extracted = ''

    # Todo: Generate the fancy HTML views for the proteins here instead of in
    #   `mutalyzer.variantchecker`.
    arguments = {
        'description'         : description,
        'messages'            : map(util.message_info, output.getMessages()),
        'summary'             : summary,
        'parse_error'         : parse_error,
        'errors'              : errors,
        'genomicDescription'  : genomic_description,
        'chromDescription'    : output.getIndexedOutput(
                                  'genomicChromDescription', 0),
        'genomicDNA'          : genomic_dna,
        'visualisation'       : output.getOutput('visualisation'),
        'descriptions'        : output.getOutput('descriptions'),
        'protDescriptions'    : output.getOutput('protDescriptions'),
        'oldProtein'          : output.getOutput('oldProteinFancy'),
        'altStart'            : output.getIndexedOutput('altStart', 0),
        'altProtein'          : output.getOutput('altProteinFancy'),
        'newProtein'          : output.getOutput('newProteinFancy'),
        'transcriptInfo'      : output.getIndexedOutput('hasTranscriptInfo',
                                                        0, False),
        'transcriptCoding'    : output.getIndexedOutput('transcriptCoding', 0,
                                                        False),
        'exonInfo'            : output.getOutput('exonInfo'),
        'cdsStart_g'          : output.getIndexedOutput('cdsStart_g', 0),
        'cdsStart_c'          : output.getIndexedOutput('cdsStart_c', 0),
        'cdsStop_g'           : output.getIndexedOutput('cdsStop_g', 0),
        'cdsStop_c'           : output.getIndexedOutput('cdsStop_c', 0),
        'restrictionSites'    : output.getOutput('restrictionSites'),
        'legends'             : output.getOutput('legends'),
        'reference_filename'  : reference_filename,  # Todo: Download link is not shown...
        'browserLink'         : browser_link,
        'extractedDescription': extracted,
        'standalone'          : bool(request.args.get('standalone'))
    }

    output.addMessage(__file__, -1, 'INFO',
                      'Finished variant %s' % description)

    return render_template('name-checker.html', **arguments)

Example #19

0

Show file

def check_name(description):
    """
    Run the name checker.
    """
    O = output.Output(__file__)

    O.addMessage(__file__, -1, "INFO", "Received variant " + description)

    RD = variantchecker.check_variant(description, O)

    O.addMessage(__file__, -1, "INFO",
                 "Finished processing variant " + description)

    ### OUTPUT BLOCK ###
    gn = O.getOutput("genename")
    if gn:
        print "Gene Name: " + gn[0]
    tv = O.getOutput("transcriptvariant")
    if tv:
        print "Transcript variant: " + tv[0]
        print
    #if

    for i in O.getMessages():
        print i
    errors, warnings, summary = O.Summary()
    print summary
    print

    if not errors:
        print "Overview of the raw variants:"
        for i in O.getOutput("visualisation"):
            for j in range(len(i)):
                print i[j]
            print
        #for

        print "Genomic description:"
        print O.getIndexedOutput('genomicDescription', 0, '')

        print "\nChromosomal description:"
        print O.getOutput("genomicChromDescription")

        print "\nAffected transcripts:"
        for i in O.getOutput('descriptions'):
            print i
        print "\nAffected proteins:"
        for i in O.getOutput('protDescriptions'):
            print i

        print "\nOld protein:"
        for i in O.getOutput("oldProteinFancyText"):
            print i

        print "\nNew protein:"
        for i in O.getOutput("newProteinFancyText"):
            print i

        print "\nAlternative protein:"
        for i in O.getOutput("altProteinFancyText"):
            print i

        print "\nExon information:"
        for i in O.getOutput("exonInfo"):
            print i

        print "\nCDS  information:"
        print O.getOutput("cdsStart_c"), O.getOutput("cdsStop_c")
        print O.getOutput("cdsStart_g"), O.getOutput("cdsStop_g")

        print "\nEffect on Restriction sites:"
        for i in O.getOutput("restrictionSites"):
            print i

        print "\nLegend:"
        for i in O.getOutput("legends"):
            print i

        reference_sequence = O.getIndexedOutput("original", 0)
        sample_sequence = O.getIndexedOutput("mutated", 0)

        described_allele = extractor.describe_dna(reference_sequence,
                                                  sample_sequence)
        #described_protein_allele = describe.describe(
        #    O.getIndexedOutput("oldProtein", 0),
        #    O.getIndexedOutput("newProtein", 0, default=""),
        #    DNA=False)
        described_protein_allele = ""

        described = described_protein = '(skipped)'

        if described_allele:
            described = described_allele
        if described_protein_allele:
            described_protein = described_protein_allele

        print "\nExperimental services:"
        print described
        print described_protein
        #print "+++ %s" % O.getOutput("myTranscriptDescription")
        print json.dumps(
            {
                #"reference_sequence": reference_sequence,
                #"sample_sequence": sample_sequence,
                "allele_description": described_allele
            },
            cls=AlleleEncoder)

Example #20

0

Show file

File: getRsids.py Project: owebb1/l7g-ml

def rsid_search(tile_index, variant, suppress_output=True):
    results_dict = {}

    tile = application.Tile(tile_index)
    varval = int(variant)
    # retrieve tile information using getTileVariants script
    info_tile = getTileVariants.tile_iteration(tile,
                                               suppress_output=True,
                                               all_functionality=True)

    # break up output into common variant (.000) and specifically chosen
    # variant
    tile_variant = info_tile.variants.split('\n')[varval]
    common_variant = info_tile.variants.split('\n')[0]

    if tile_variant == common_variant:
        print("No difference between tiles. Exiting...")
        sys.exit()

    # retrieve sequence, ignoring hash + id
    tile_seq = tile_variant.split(',')[2]
    common_seq = common_variant.split(',')[2]

    results_dict['tile_sequence'] = tile_variant
    results_dict['common_sequence'] = common_variant
    results_dict['variant'] = variant
    results_dict['index'] = tile_index
    results_dict['mutations'] = {}

    if not suppress_output:
        print("Common sequence: {}".format(common_seq))
        print("Variant sequence: {}".format(tile_seq))
    # delete spanning tile parts if necessary
    if len(tile_seq) - len(common_seq) >= 24:
        tile_seq = tile_seq[:len(common_seq)]
        if not suppress_output:
            print("Detected spanning tile. Deleting extra part...")
    elif len(common_seq) - len(tile_seq) >= 24:
        common_seq = common_seq[:len(tile_seq)]
        if not suppress_output:
            print("Detected spanning tile. Deleting extra part...")

    # run mutalyzer description extractor for alignment data
    allele = describe_dna(common_seq, tile_seq)
    changes = str(allele)

    # break up into list if needed
    changes = changes.replace('[', '')
    changes = changes.replace(']', '')
    changes = changes.split(';')

    mutations = map(lambda x: get_mutation(x, info_tile, common_seq), changes)

    # offset all mutations
    map(lambda x: x.offset(info_tile.to_dict()['position_start']), mutations)

    # store all the queries
    rsid_queries = map(lambda x: x.rsid_query(), mutations)

    rsid_queries = list(filter(lambda x: x != '', rsid_queries))

    mutations_lst = zip(mutations, rsid_queries)

    # print results
    for mutation, rsid_lst in mutations_lst:
        if not suppress_output:
            print('---')
            print("Mutation: {}".format(mutation))
            print("Representation: {}".format(repr(mutation)))

        mutation_info = []

        if len(rsid_lst) > 0 and rsid_lst[0] != '':
            if not suppress_output:
                print("Possible SNP RSIDS:")
            for rsid_query in rsid_lst:
                chrom, rsid, location, ref, alt = rsid_query.split(" ")
                result_str = "RSID: {}; Location: {}, REF: {}, ALT: {}".format(
                    rsid, location, ref, alt)

                if not suppress_output:
                    print(result_str)
                mutation_info.append((rsid, location, ref, alt))
        else:
            if not suppress_output:
                print("No possible SNPs found")

        results_dict['mutations'][repr(mutation)] = mutation_info

    return results_dict