예제 #1
0
    def parse_text(self, text_query):
        complement_prefix = 'Complement of '

        for line in text_query.splitlines():
            if line.startswith(complement_prefix):
                line = line[len(complement_prefix):]
            data = line.strip().split()
            if len(data) == 4:
                chrom, pos, ref, alt = data
                if chrom.startswith('chr'):
                    chrom = chrom[3:]

                items = bdb.get_genomic_muts(chrom, pos, ref, alt)

                if not items:
                    line = complement_prefix + line
                    items = bdb.get_genomic_muts(chrom, pos, complement(ref),
                                                 complement(alt))

            elif len(data) == 2:
                gene, mut = [x.upper() for x in data]

                items = get_protein_muts(gene, mut)
            else:
                self.badly_formatted.append(line)
                continue

            self.add_mutation_items(items, line)
예제 #2
0
    def parse_vcf(self, vcf_file):

        for line in vcf_file:
            line = line.decode('latin1').strip()
            if line.startswith('#'):
                continue
            data = line.split()

            if len(data) < 5:
                if not line:  # if we reached end of the file
                    break
                self.badly_formatted.append(line)
                continue

            chrom, pos, var_id, ref, alts = data[:5]

            if chrom.startswith('chr'):
                chrom = chrom[3:]

            alts = alts.split(',')
            for alt in alts:

                items = bdb.get_genomic_muts(chrom, pos, ref, alt)

                chrom = 'chr' + chrom
                parsed_line = ' '.join((chrom, pos, ref, alt)) + '\n'

                self.add_mutation_items(items, parsed_line)

                # we don't have queries in our format for vcf files:
                # those need to be built this way
                self.query += parsed_line
예제 #3
0
    def mutation(self, chrom, dna_pos, dna_ref, dna_alt):
        """Rest API endpoint.
        Stop codon mutations are not considered."""

        _, filter_manager = self._make_filters()

        if chrom.startswith('chr'):
            chrom = chrom[3:]

        items = bdb.get_genomic_muts(chrom, dna_pos, dna_ref, dna_alt)

        raw_mutations = filter_manager.apply([item.mutation for item in items])

        parsed_mutations = represent_mutations(raw_mutations, filter_manager)

        return jsonify(parsed_mutations)
예제 #4
0
def autocomplete_mutation(query, limit=None):
    """Returns: (autocompletion_mutation_results, are_there_more)"""
    # TODO: rewriting this into regexp-based set of function may increase readability
    # TODO: use limit to restrict queries

    query = query.upper().strip()
    data = query.split()

    items = []
    messages = []

    if len(data) == 1:

        if query.startswith('CHR'):
            messages += json_message(
                'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format'
            )

        gene = data[0].strip()

        if gene_exists(gene):
            messages += json_message(
                'Awaiting for <code>{ref}{pos}{alt}</code> - expecting mutation in <code>{gene} {ref}{pos}{alt}</code> format'
            )
        else:
            # return json_message('Gene %s not found in the database' % gene)
            pass

    elif len(data) == 4:
        chrom, pos, ref, alt = data

        if not query.startswith('CHR'):
            return []

        chrom = chrom[3:]

        try:
            items = bdb.get_genomic_muts(chrom, pos, ref, alt)

            # maybe an interesting mutation is located on the other strand
            if not items:
                query = 'complement of ' + query
                items = bdb.get_genomic_muts(chrom, pos, complement(ref),
                                             complement(alt))

            items = prepare_items(items, query, 'nucleotide mutation')
        except ValueError:
            return json_message(
                'Did you mean to search for mutation with <code>{chrom} {pos} {ref} {alt}</code> format?'
                ' The <code>{pos}</code> should be an integer.')

    elif len(data) == 3:
        if not query.startswith('CHR'):
            return []

        return json_message(
            'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format'
        )

    elif len(data) == 2:
        gene, mut = data

        result = match_aa_mutation(gene, mut, query)
        items = [r for r in result if r['type'] != 'message']
        messages = [r for r in result if r['type'] == 'message']

        if query.startswith('CHR') and mut.isnumeric():
            messages += json_message(
                'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format'
            )

    if not limit:
        limit = len(items)
    return items[:limit] + messages, len(items) > limit