def parse_text(self, text_query): complement_prefix = 'Complement of ' for line in text_query.splitlines(): if line.startswith(complement_prefix): line = line[len(complement_prefix):] data = line.strip().split() if len(data) == 4: chrom, pos, ref, alt = data if chrom.startswith('chr'): chrom = chrom[3:] items = bdb.get_genomic_muts(chrom, pos, ref, alt) if not items: line = complement_prefix + line items = bdb.get_genomic_muts(chrom, pos, complement(ref), complement(alt)) elif len(data) == 2: gene, mut = [x.upper() for x in data] items = get_protein_muts(gene, mut) else: self.badly_formatted.append(line) continue self.add_mutation_items(items, line)
def parse_vcf(self, vcf_file): for line in vcf_file: line = line.decode('latin1').strip() if line.startswith('#'): continue data = line.split() if len(data) < 5: if not line: # if we reached end of the file break self.badly_formatted.append(line) continue chrom, pos, var_id, ref, alts = data[:5] if chrom.startswith('chr'): chrom = chrom[3:] alts = alts.split(',') for alt in alts: items = bdb.get_genomic_muts(chrom, pos, ref, alt) chrom = 'chr' + chrom parsed_line = ' '.join((chrom, pos, ref, alt)) + '\n' self.add_mutation_items(items, parsed_line) # we don't have queries in our format for vcf files: # those need to be built this way self.query += parsed_line
def mutation(self, chrom, dna_pos, dna_ref, dna_alt): """Rest API endpoint. Stop codon mutations are not considered.""" _, filter_manager = self._make_filters() if chrom.startswith('chr'): chrom = chrom[3:] items = bdb.get_genomic_muts(chrom, dna_pos, dna_ref, dna_alt) raw_mutations = filter_manager.apply([item.mutation for item in items]) parsed_mutations = represent_mutations(raw_mutations, filter_manager) return jsonify(parsed_mutations)
def autocomplete_mutation(query, limit=None): """Returns: (autocompletion_mutation_results, are_there_more)""" # TODO: rewriting this into regexp-based set of function may increase readability # TODO: use limit to restrict queries query = query.upper().strip() data = query.split() items = [] messages = [] if len(data) == 1: if query.startswith('CHR'): messages += json_message( 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format' ) gene = data[0].strip() if gene_exists(gene): messages += json_message( 'Awaiting for <code>{ref}{pos}{alt}</code> - expecting mutation in <code>{gene} {ref}{pos}{alt}</code> format' ) else: # return json_message('Gene %s not found in the database' % gene) pass elif len(data) == 4: chrom, pos, ref, alt = data if not query.startswith('CHR'): return [] chrom = chrom[3:] try: items = bdb.get_genomic_muts(chrom, pos, ref, alt) # maybe an interesting mutation is located on the other strand if not items: query = 'complement of ' + query items = bdb.get_genomic_muts(chrom, pos, complement(ref), complement(alt)) items = prepare_items(items, query, 'nucleotide mutation') except ValueError: return json_message( 'Did you mean to search for mutation with <code>{chrom} {pos} {ref} {alt}</code> format?' ' The <code>{pos}</code> should be an integer.') elif len(data) == 3: if not query.startswith('CHR'): return [] return json_message( 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format' ) elif len(data) == 2: gene, mut = data result = match_aa_mutation(gene, mut, query) items = [r for r in result if r['type'] != 'message'] messages = [r for r in result if r['type'] == 'message'] if query.startswith('CHR') and mut.isnumeric(): messages += json_message( 'Awaiting for mutation in <code>{chrom} {pos} {ref} {alt}</code> format' ) if not limit: limit = len(items) return items[:limit] + messages, len(items) > limit