def bed(): """ Create a BED track for the given variant, listing the positions of its raw variants, e.g., for use in the UCSC Genome Browser. This basically just runs the variant checker and extracts the raw variants with positions. """ # Backwards compatibility. if 'name' in request.args: return redirect(url_for('.bed', description=request.args['name']), code=301) description = request.args.get('description') if not description: abort(404) output = Output(__file__) variantchecker.check_variant(description, output) raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0) if not raw_variants: abort(404) # Todo: Hard-coded hg19. fields = { 'name' : 'Mutalyzer', 'description': 'Mutalyzer track for ' + description, 'visibility' : 'pack', 'db' : 'hg19', 'url' : url_for('.name_checker', description=description, _external=True), 'color': '255,0,0'} bed = ' '.join(['track'] + ['%s="%s"' % field for field in fields.items()]) + '\n' for descr, positions in raw_variants[2]: bed += '\t'.join([raw_variants[0], unicode(min(positions) - 1), unicode(max(positions)), descr, '0', raw_variants[1]]) + '\n' response = make_response(bed) response.headers['Content-Type'] = 'text/plain; charset=utf-8' return response
def back_translator(): """ Back translator. """ output = Output(__file__) output.addMessage( __file__, -1, 'INFO', 'Received Back Translate request from {}'.format(request.remote_addr)) stats.increment_counter('back-translator/website') description = request.args.get('description') variants = [] if description: variants = backtranslator.backtranslate(output, description) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished Back Translate request') return render_template('back-translator.html', errors=errors, summary=summary, description=description or '', messages=messages, variants=variants)
def snp_converter(): """ SNP converter. Convert a dbSNP rs number to HGVS description(s) of the SNP specified on the reference sequence(s) used by dbSNP. """ # Backwards compatibility. if 'rsId' in request.args: return redirect(url_for('.snp_converter', rs_id=request.args['rsId']), code=301) rs_id = request.args.get('rs_id') if not rs_id: return render_template('snp-converter.html') output = Output(__file__) output.addMessage( __file__, -1, 'INFO', 'Received request snpConvert(%s) from %s' % (rs_id, request.remote_addr)) stats.increment_counter('snp-converter/website') descriptions = ncbi.rsid_to_descriptions(rs_id, output) messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished request snpConvert(%s)' % rs_id) return render_template('snp-converter.html', rs_id=rs_id, descriptions=descriptions, messages=messages, summary=output.Summary()[2])
def syntax_checker(): """ Parse the given variant and render the syntax checker HTML form. """ # Backwards compatibility. if 'variant' in request.args: return redirect(url_for('.syntax_checker', description=request.args['variant']), code=301) description = request.args.get('description') if not description: return render_template('syntax-checker.html') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request syntaxCheck(%s) from %s' % (description, request.remote_addr)) stats.increment_counter('syntax-checker/website') grammar = Grammar(output) grammar.parse(description) parse_error = output.getOutput('parseError') messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished request syntaxCheck(%s)' % description) return render_template('syntax-checker.html', description=description, messages=messages, parse_error=parse_error)
def _processSNP(self, batch_job, cmd, flags): """ Process an entry from the SNP converter Batch, write the results to the job-file. If an Exception is raised, catch and continue. Side-effect: - Output written to outputfile. @arg cmd: The SNP converter input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) O.addMessage(__file__, -1, "INFO", "Received SNP converter batch rs" + cmd) stats.increment_counter('snp-converter/batch') #Read out the flags # Todo: Do something with the flags? skip = self.__processFlags(O, flags) descriptions = [] if not skip: R = Retriever.Retriever(O) descriptions = R.snpConvert(cmd) # Todo: Is output ok? outputline = "%s\t" % cmd outputline += "%s\t" % "|".join(descriptions) outputline += "%s\t" % "|".join(O.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename): # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = [ 'Input Variant', 'HGVS description(s)', 'Errors and warnings' ] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else: handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s%s" % (outputline, separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finished SNP converter batch rs%s" % cmd)
def lovd_get_gs(): """ LOVD bypass to get the correct GeneSymbol incl Transcript variant. Used by LOVD to get the correct transcript variant out of a genomic record. LOVD uses a genomic reference (``NC_``?) in combination with a gene symbol to pass variant info to mutalyzer. Mutalyzer 1.0 was only using the first transcript. LOVD supplies the NM of the transcript needed but this was ignored. This helper allows LOVD to get the requested transcript variant from a genomic reference. Parameters: mutationName The mutationname without gene symbol. variantRecord The NM reference of the variant. forward If set this forwards the request to the name checker. Returns: Output of name checker if `forward` is set, otherwise the gene symbol with the variant notation as string. """ mutation_name = request.args['mutationName'] variant_record = request.args['variantRecord'] forward = request.args.get('forward') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request getGS(%s, %s, %s) from %s' % (mutation_name, variant_record, forward, request.remote_addr)) variantchecker.check_variant(mutation_name, output) output.addMessage(__file__, -1, 'INFO', 'Finished request getGS(%s, %s, %s)' % (mutation_name, variant_record, forward)) legends = output.getOutput('legends') # Filter the transcript from the legend. legends = [l for l in legends if '_v' in l[0]] for l in legends: if l[1] == variant_record: if forward: p, a = mutation_name.split(':') return redirect(url_for('.name_checker', description='%s(%s):%s' % (p, l[0], a), standalone=1)) else: response = make_response(l[0]) response.headers['Content-Type'] = 'text/plain; charset=utf-8' return response response = make_response('Transcript not found') response.headers['Content-Type'] = 'text/plain; charset=utf-8' return response
def _processSyntaxCheck(self, batch_job, cmd, flags): """ Process an entry from the Syntax Check, write the results to the job-file. Side-effect: - Output written to outputfile @arg cmd: The Syntax Checker input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ output = Output(__file__) grammar = Grammar(output) output.addMessage(__file__, -1, "INFO", "Received SyntaxChecker batchvariant " + cmd) stats.increment_counter('syntax-checker/batch') skip = self.__processFlags(output, flags) #Process if not skip: parsetree = grammar.parse(cmd) else: parsetree = None if parsetree: result = "OK" else: result = "|".join(output.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename): # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = ['Input', 'Status'] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else: handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s\t%s%s" % (cmd, result, separator)) handle.close() output.addMessage(__file__, -1, "INFO", "Finished SyntaxChecker batchvariant " + cmd)
def import_from_reference(assembly, reference): """ Import transcript mappings from a genomic reference. .. todo: Also report how much was added/updated. .. note: Currently no exon locations are supported, this has only been tested on mtDNA. """ chromosome = assembly.chromosomes.filter_by(name='chrM').one() output = Output(__file__) retriever = Retriever.GenBankRetriever(output) record = retriever.loadrecord(reference) if record.molType != 'm': raise ValueError('Only mitochondial references are supported') select_transcript = len(record.geneList) > 1 for gene in record.geneList: # We support exactly one transcript per gene. try: transcript = sorted(gene.transcriptList, key=attrgetter('name'))[0] except IndexError: continue # We use gene.location for now, it is always present and the same # for our purposes. #start, stop = transcript.mRNA.location[0], transcript.mRNA.location[1] start, stop = gene.location orientation = 'reverse' if gene.orientation == -1 else 'forward' try: cds = transcript.CDS.location except AttributeError: cds = None mapping = TranscriptMapping.create_or_update( chromosome, 'refseq', record.source_accession, gene.name, orientation, start, stop, [start], [stop], 'reference', cds=cds, select_transcript=select_transcript, version=int(record.source_version)) session.add(mapping) session.commit()
def test_getcache(self): """ Running the getCache method should give us the expected number of cache entries. """ created_since = datetime.datetime.today() - datetime.timedelta(days=14) output = Output(__file__) sync = CacheSync(output) r = self._call('getCache', created_since) assert len(r.CacheEntry) == 3
def _processSNP(self, batch_job, cmd, flags): """ Process an entry from the SNP converter Batch, write the results to the job-file. If an Exception is raised, catch and continue. Side-effect: - Output written to outputfile. @arg cmd: The SNP converter input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) O.addMessage(__file__, -1, "INFO", "Received SNP converter batch rs" + cmd) stats.increment_counter('snp-converter/batch') #Read out the flags # Todo: Do something with the flags? skip = self.__processFlags(O, flags) descriptions = [] if not skip : R = Retriever.Retriever(O) descriptions = R.snpConvert(cmd) # Todo: Is output ok? outputline = "%s\t" % cmd outputline += "%s\t" % "|".join(descriptions) outputline += "%s\t" % "|".join(O.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = ['Input Variant', 'HGVS description(s)', 'Errors and warnings'] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else : handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s%s" % (outputline, separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finished SNP converter batch rs%s" % cmd)
def _processSyntaxCheck(self, batch_job, cmd, flags): """ Process an entry from the Syntax Check, write the results to the job-file. Side-effect: - Output written to outputfile @arg cmd: The Syntax Checker input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ output = Output(__file__) grammar = Grammar(output) output.addMessage(__file__, -1, "INFO", "Received SyntaxChecker batchvariant " + cmd) stats.increment_counter('syntax-checker/batch') skip = self.__processFlags(output, flags) #Process if not skip : parsetree = grammar.parse(cmd) else : parsetree = None if parsetree : result = "OK" else : result = "|".join(output.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = ['Input', 'Status'] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else : handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s\t%s%s" % (cmd, result, separator)) handle.close() output.addMessage(__file__, -1, "INFO", "Finished SyntaxChecker batchvariant " + cmd)
def snp_converter(): """ SNP converter. Convert a dbSNP rs number to HGVS description(s) of the SNP specified on the reference sequence(s) used by dbSNP. """ # Backwards compatibility. if 'rsId' in request.args: return redirect(url_for('.snp_converter', rs_id=request.args['rsId']), code=301) rs_id = request.args.get('rs_id') if not rs_id: return render_template('snp-converter.html') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request snpConvert(%s) from %s' % (rs_id, request.remote_addr)) stats.increment_counter('snp-converter/website') try: descriptions = ncbi.rsid_to_descriptions(rs_id) except ncbi.ServiceError: output.addMessage(__file__, 4, 'EENTREZ', 'An error occured while communicating with dbSNP.') messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished request snpConvert(%s)' % rs_id) return render_template('snp-converter.html', rs_id=rs_id, descriptions=descriptions, messages=messages, summary=output.Summary()[2])
def snp_converter(): """ SNP converter. Convert a dbSNP rs number to HGVS description(s) of the SNP specified on the reference sequence(s) used by dbSNP. """ # Backwards compatibility. if 'rsId' in request.args: return redirect(url_for('.snp_converter', rs_id=request.args['rsId']), code=301) rs_id = request.args.get('rs_id') if not rs_id: return render_template('snp-converter.html') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request snpConvert(%s) from %s' % (rs_id, request.remote_addr)) stats.increment_counter('snp-converter/website') retriever = Retriever.Retriever(output) descriptions = retriever.snpConvert(rs_id) messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished request snpConvert(%s)' % rs_id) return render_template('snp-converter.html', rs_id=rs_id, descriptions=descriptions, messages=messages, summary=output.Summary()[2])
def back_translator(): """ Back translator. """ output = Output(__file__) output.addMessage( __file__, -1, 'INFO', 'Received Back Translate request from {}'.format(request.remote_addr)) stats.increment_counter('back-translator/website') description = request.args.get('description') variants = [] if description: variants = backtranslator.backtranslate(output, description) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished Back Translate request') return render_template( 'back-translator.html', errors=errors, summary=summary, description=description or '', messages=messages, variants=variants)
def description_extractor(): """ The Variant Description Extractor (experimental service). """ reference_sequence = request.args.get('reference_sequence') variant_sequence = request.args.get('variant_sequence') if not (reference_sequence and variant_sequence): return render_template('description-extractor.html') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received Description Extract request from %s' % request.remote_addr) # Todo: Move this to the describe module. if not util.is_dna(reference_sequence): output.addMessage(__file__, 3, 'ENODNA', 'Reference sequence is not DNA.') if not util.is_dna(variant_sequence): output.addMessage(__file__, 3, 'ENODNA', 'Variant sequence is not DNA.') raw_vars = describe.describe(reference_sequence, variant_sequence) description = describe.alleleDescription(raw_vars) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished Description Extract request') return render_template('description-extractor.html', reference_sequence=reference_sequence, variant_sequence=variant_sequence, raw_vars=raw_vars, description=description, errors=errors, summary=summary, messages=messages)
def output(settings): return Output('test')
def description_extractor_submit(): """ The Variant Description Extractor (experimental service). There multiple ways for the user to provide two sequences, corresponding to the values for the `reference_method` and `sample_method` fields, each requiring some additional fields to be defined: `raw_method` The reference and sample sequences are pasted into the form fields. - `reference_sequence`: The reference sequence. - `sample_sequence`: The sample sequence. `file_method` The reference and sample sequences are uploaded. - `reference_file`: The reference file. - `sample_file`: The sample file. `refseq_method` The reference and sample sequences are given by RefSeq accession numbers. - `reference_accession_number`: RefSeq accession number for the reference sequence. - `sample_accession_number`: RefSeq accession number for the sample sequence. """ output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received Description Extract request from %s' % request.remote_addr) stats.increment_counter('description-extractor/website') r = s = '' reference_method = request.form.get('reference_method') sample_method = request.form.get('sample_method') reference_sequence = request.form.get('reference_sequence') sample_sequence = request.form.get('sample_sequence') reference_file = request.files.get('reference_file') sample_file = request.files.get('sample_file') reference_filename = '' sample_filename = '' reference_accession_number = request.form.get('reference_accession_number') sample_accession_number = request.form.get('sample_accession_number') if reference_method == 'refseq_method': if reference_accession_number: retriever = Retriever.GenBankRetriever(output) genbank_record = retriever.loadrecord(reference_accession_number) if genbank_record: r = unicode(genbank_record.seq) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'Reference accession number input fields is empty.') elif reference_method == 'file_method': if reference_file: reference_filename = reference_file.filename r = util.read_dna(reference_file) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'No reference file provided.') else: # raw_method if reference_sequence: r = util.read_dna(StringIO.StringIO(reference_sequence)) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'Reference sequence number input fields is empty.') if sample_method == 'refseq_method': if sample_accession_number: retriever = Retriever.GenBankRetriever(output) genbank_record = retriever.loadrecord(sample_accession_number) if genbank_record: s = unicode(genbank_record.seq) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'Sample accession number input fields is empty.') elif sample_method == 'file_method': if sample_file: sample_filename = sample_file.filename s = util.read_dna(sample_file) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'No sample file provided.') else: # raw_method if sample_sequence: s = util.read_dna(StringIO.StringIO(sample_sequence)) else: output.addMessage(__file__, 3, 'EEMPTYFIELD', 'Sample sequence number input fields is empty.') # Todo: Move this to the describe module. if not r or not util.is_dna(r): output.addMessage(__file__, 3, 'ENODNA', 'Reference sequence is not DNA.') if not s or not util.is_dna(s): output.addMessage(__file__, 3, 'ENODNA', 'Sample sequence is not DNA.') raw_vars = None if r and s: if (len(r) > settings.EXTRACTOR_MAX_INPUT_LENGTH or len(s) > settings.EXTRACTOR_MAX_INPUT_LENGTH): output.addMessage(__file__, 3, 'EMAXSIZE', 'Input sequences are restricted to {:,} bp.' .format(settings.EXTRACTOR_MAX_INPUT_LENGTH)) else: raw_vars = extractor.describe_dna(r, s) errors, warnings, summary = output.Summary() messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished Description Extract request') return render_template('description-extractor.html', extractor_max_input_length=settings.EXTRACTOR_MAX_INPUT_LENGTH, reference_sequence=reference_sequence or '', sample_sequence=sample_sequence or '', reference_accession_number=reference_accession_number or '', sample_accession_number=sample_accession_number or '', reference_filename=reference_filename or '', sample_filename=sample_filename or '', raw_vars=raw_vars, errors=errors, summary=summary, messages=messages, reference_method=reference_method, sample_method=sample_method)
def batch_jobs_submit(): """ Run batch jobs and render batch checker HTML form. The batch jobs are added to the database by the scheduler and ran by the BatchChecker daemon. """ job_type = request.form.get('job_type') email = request.form.get('email') # Note that this is always a seekable binary file object. batch_file = request.files.get('file') assemblies = Assembly.query \ .order_by(*Assembly.order_by_criteria) \ .all() assembly_name_or_alias = request.form.get('assembly_name_or_alias', settings.DEFAULT_ASSEMBLY) errors = [] if not email: errors.append('Please provide an email address.') if job_type not in BATCH_JOB_TYPES: errors.append('Invalid batch job type.') if not file: errors.append('Please select a local file for upload.') if job_type == 'position-converter': try: Assembly.by_name_or_alias(assembly_name_or_alias) except NoResultFound: errors.append('Not a valid assembly.') argument = assembly_name_or_alias else: argument = None output = Output(__file__) if not errors: stats.increment_counter('batch-job/website') scheduler = Scheduler.Scheduler() file_instance = File.File(output) job, columns = file_instance.parseBatchFile(batch_file) if job is None: errors.append('Could not parse input file, please check your ' 'file format.') else: # Creates the result download URL from a job result_id. def create_download_url(result_id): return url_for('.batch_job_result', result_id=result_id, _external=True) result_id = scheduler.addJob( email, job, columns, job_type, argument=argument, create_download_url=create_download_url) # Todo: We now assume that the job was not scheduled if there are # messages, which is probably not correct. if not output.getMessages(): return redirect(url_for('.batch_job_progress', result_id=result_id)) for error in errors: output.addMessage(__file__, 3, 'EBATCHJOB', error) messages = map(util.message_info, output.getMessages()) return render_template('batch-jobs.html', assemblies=assemblies, assembly_name_or_alias=assembly_name_or_alias, job_type=job_type, max_file_size=settings.MAX_FILE_SIZE // 1048576, messages=messages)
def setup(self): super(TestGrammar, self).setup() self.output = Output(__file__) self.grammar = Grammar(self.output)
def setup(self): super(TestConverter, self).setup() self.output = Output(__file__)
def reference_loader_submit(): """ Reference sequence loader. There are five ways for the user to load a reference sequence, corresponding to values for the `method` field, each requiring some additional fields to be defined.: `method=upload_method` The reference sequence file is uploaded from a local file. - `file`: Reference sequence file to upload. `method=url_method` The reference sequence file can be found at the specified URL. - `url`: URL of reference sequence file to load. `method=slice_gene_method` Retrieve part of the reference genome for an HGNC gene symbol. - `genesymbol`: Gene symbol. - `organism`: Organism. - `upstream`: Number of 5' flanking nucleotides. - `downstream`: Number of 3' flanking nucleotides. `method=slice_accession_method` Retrieve a range of a chromosome by accession number. - `accession`: Chromosome Accession Number. - `accession_start`: Start position (one-based, inclusive, in reference orientation). - `accession_stop`: Stop position (one-based, inclusive, in reference orientation). - `accession_orientation`: Orientation. `method=slice_chromosome_method` Retrieve a range of a chromosome by name. - `assembly_name_or_alias`: Genome assembly by name or by alias. - `chromosome`: Chromosome name. - `chromosome_start`: Start position (one-based, inclusive, in reference orientation). - `chromosome_stop`: Stop position (one-based, inclusive, in reference orientation). - `chromosome_orientation`: Orientation. """ method = request.form.get('method') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request upload(%s) with arguments %s from %s' % (method, unicode(request.form), request.remote_addr)) assemblies = Assembly.query \ .order_by(*Assembly.order_by_criteria) \ .all() retriever = Retriever.GenBankRetriever(output) ud, errors = '', [] class InputException(Exception): pass def check_position(position, field): position = position.replace(',', '').replace('.', '').replace('-', '') try: return int(position) except AttributeError, ValueError: raise InputException('Expected an integer in field: %s' % field)
class TestConverter(MutalyzerTest): """ Test the Converter class. """ fixtures = (database, hg19, hg19_transcript_mappings) def setup(self): super(TestConverter, self).setup() self.output = Output(__file__) def _converter(self, assembly_name_or_alias): """ Create a Converter instance for a given genome assembly. """ assembly = Assembly.query \ .filter(or_(Assembly.name == assembly_name_or_alias, Assembly.alias == assembly_name_or_alias)) \ .one() return Converter(assembly, self.output) def test_converter(self): """ Simple test. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274G>T') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_non_coding(self): """ Test with variant on non-coding transcript. """ converter = self._converter('hg19') genomic = converter.c2chrom('NR_028383.1:n.-2173C>A') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_compound(self): """ Test with compound variant. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.[274G>T;278A>G]' in coding assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding def test_hla_cluster(self): """ Convert to primary assembly. Transcript NM_000500.5 is mapped to different chromosome locations, but we like to just see the primary assembly mapping to chromosome 6. See also bug #58. """ # Todo: This test is bogus now that we use a fixture that has just the # mapping to chromosome 6. However, I think we only get this mapping # from our current source (NCBI seq_gene.md) anyway, so I'm not sure # where we got the other mappings from in the past (but haven't # investigated really). converter = self._converter('hg19') genomic = converter.c2chrom('NM_000500.5:c.92C>T') assert genomic == 'NC_000006.11:g.32006291C>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_000500.5:c.92C>T' in coding def test_converter_del_length_reverse(self): """ Position converter on deletion (denoted by length) on transcripts located on the reverse strand. """ converter = self._converter('hg19') coding = converter.chrom2c( 'NC_000022.10:g.51016285_51017117del123456789', 'list') # Fix for r536: disable the -u and +d convention. #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding assert 'NM_001145134.1:c.-159_60del123456789' in coding assert 'NR_021492.1:n.-5170_-4338del123456789' in coding def test_S_Venkata_Suresh_Kumar(self): """ Test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported February 24, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') assert 'NM_001007553.1:c.3863delA' not in coding assert 'NM_001007553.1:c.*953delA' in coding assert 'NM_001130523.1:c.*953delA' in coding def test_S_Venkata_Suresh_Kumar_more(self): """ Another test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported March 21, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list') assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding assert 'NM_002241.4:c.1-7_9del16' in coding def test_range_order_forward_correct(self): """ Just a normal position converter call, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.-1_274del') assert genomic == 'NC_000011.9:g.111957631_111959695del' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.-1_274del' in coding def test_range_order_forward_incorrect_c2chrom(self): """ Incorrect order of a range on the forward strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_reverse_correct(self): """ Just a normal position converter call on the reverse strand, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.-1_40del') assert genomic == 'NC_000020.10:g.48770135_48770175del' coding = converter.chrom2c(genomic, 'list') assert 'NM_001162505.1:c.-1_40del' in coding def test_range_order_reverse_incorrect_c2chrom(self): """ Incorrect order of a range on the reverse strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.40_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_incorrect_chrom2c(self): """ Incorrect order of a chromosomal range. See Trac #95. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list') assert coding == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_delins_large_ins_c2chrom(self): """ Delins with multi-base insertion c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_c2chrom(self): """ Delins with multi-base insertion and explicit deleted sequence c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_chrom2c(self): """ Delins with multi-base insertion chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_chrom2c(self): """ Delins with multi-base insertion and explicit deleted sequence chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_chrm_chrom2c(self): """ Mitochondrial m. to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_012920.1:m.12030del', 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_name_chrom2c(self): """ Mitochondrial m. (by chromosome name) to c. """ converter = self._converter('hg19') variant = converter.correctChrVariant('chrM:m.12030del') coding = converter.chrom2c(variant, 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_c2chrom(self): """ Mitochondrial c. to m. """ converter = self._converter('hg19') genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del') assert genomic == 'NC_012920.1:m.12030del' def test_nm_without_selector_chrom2c(self): """ NM reference without transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2:c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_with_selector_chrom2c(self): """ NM reference with transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_c2chrom_no_selector(self): """ To NM reference should never result in transcript selection. """ converter = self._converter('hg19') variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T') coding = converter.chrom2c(variant, 'list') assert 'NM_017780.2:c.109A>T' in coding def test_incorrect_selector_c2chrom(self): """ Incorrect selector. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_incorrect_selector_version_c2chrom(self): """ Incorrect selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_no_selector_version_c2chrom(self): """ Selector but no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_incorrect_selector_no_selector_version_c2chrom(self): """ Incorrect selector, no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_ins_seq_chrom2c(self): """ Insertion of a sequence (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT', 'list') assert 'NM_003002.2:c.-150_-149insGAT' in coding assert 'NM_012459.2:c.10_11insATC' in coding def test_ins_seq_seq(self): """ Insertion of two sequences (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c( 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list') assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding def test_ins_seq_c2chrom_reverse(self): """ Insertion of a sequence on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11insATC') assert genomic == 'NC_000011.9:g.111957482_111957483insGAT' def test_ins_seq_seq_c2chrom_reverse(self): """ Insertion of two sequences on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]') assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
class TestGrammar(MutalyzerTest): """ Test the mytalyzer.grammar module. """ def setup(self): super(TestGrammar, self).setup() self.output = Output(__file__) self.grammar = Grammar(self.output) def _parse(self, description): """ Parse a variant description. """ self.grammar.parse(description) assert self.output.getOutput('parseError') == [] def test_some_variants(self): """ Some example variants. """ self._parse('NM_002001.2:c.[12del]') self._parse('NM_002001.2:c.[(12del)]') self._parse('NM_002001.2:c.[(12del)?]') self._parse('NM_002001.2:c.[(12del);(12del)]') self._parse('NM_002001.2:c.[(12del;12del)]') self._parse('NM_002001.2:c.[((12del)?;12del)?]') def test_compound_insertion(self): """ Some some compound insertions. """ self._parse('NM_002001.2:c.15_16insA') self._parse('NM_002001.2:c.15_16insATC') self._parse('NM_002001.2:c.15_16ins[A]') self._parse('NM_002001.2:c.15_16ins[ATC]') self._parse('NM_002001.2:c.15_16ins28_39') self._parse('NM_002001.2:c.15_16ins[28_39]') self._parse('NM_002001.2:c.15_16ins[28_39;A]') self._parse('NM_002001.2:c.15_16ins[28_39;ATC]') self._parse('NM_002001.2:c.15_16ins[28_39;A;ATC]') self._parse('NM_002001.2:c.15_16ins28_39inv') self._parse('NM_002001.2:c.15_16ins[28_39inv]') self._parse('NM_002001.2:c.15_16ins[28_39inv;A]') self._parse('NM_002001.2:c.15_16ins[28_39inv;ATC]') self._parse('NM_002001.2:c.15_16ins[28_39inv;A;ATC]') def test_compound_delins(self): """ Some some compound deletion-insertions. """ self._parse('NM_002001.2:c.12_17delinsA') self._parse('NM_002001.2:c.12_17delinsATC') self._parse('NM_002001.2:c.12_17delins[A]') self._parse('NM_002001.2:c.12_17delins[ATC]') self._parse('NM_002001.2:c.12_17delins28_39') self._parse('NM_002001.2:c.12_17delins[28_39]') self._parse('NM_002001.2:c.12_17delins[28_39;A]') self._parse('NM_002001.2:c.12_17delins[28_39;ATC]') self._parse('NM_002001.2:c.12_17delins[28_39;A;ATC]') self._parse('NM_002001.2:c.12_17delins28_39inv') self._parse('NM_002001.2:c.12_17delins[28_39inv]') self._parse('NM_002001.2:c.12_17delins[28_39inv;A]') self._parse('NM_002001.2:c.12_17delins[28_39inv;ATC]') self._parse('NM_002001.2:c.12_17delins[28_39inv;A;ATC]') def test_protein_variants(self): """ Some protein variants. """ self._parse('NG_009105.1(OPN1LW):p.=') self._parse('NG_009105.1(OPN1LW):p.?') self._parse('NM_000076.2(CDKN1C):p.0') self._parse('NM_000076.2(CDKN1C):p.0?') self._parse('NG_009105.1(OPN1LW):p.(=)') self._parse('NM_000076.2(CDKN1C):p.(Ala123del)') self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126del)') self._parse('NM_000076.2(CDKN1C):p.(Ala123_Leu126delinsVal)') self._parse('NM_000076.2(CDKN1C):p.Ala123del') self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126del') self._parse('NM_000076.2(CDKN1C):p.Ala123_Leu126delinsVal') self._parse('NM_000076.2(CDKN1C):p.Ala123_*317delinsVal') self._parse('NM_000076.2(CDKN1C):p.Ala123_X317delinsVal') self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVal') self._parse('NM_000076.2(CDKN1C):p.Ala123delinsValPro') self._parse('NM_000076.2(CDKN1C):p.Ala123delinsVP') self._parse('NM_000076.2(CDKN1C):p.Ala123fs') self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124Serfs*148)') self._parse('NM_000076.2(CDKN1C_i001):p.(Glu124SerfsX148)') self._parse('NM_000076.2(CDKN1C_i001):p.(E124Sfs*148)') self._parse('NM_000076.2(CDKN1C_i001):p.(E124SfsX148)') self._parse('NG_009105.1(OPN1LW):p.Met1Leu') self._parse('NP_064445.1(OPN1LW):p.Met1?') self._parse('NP_064445.1(OPN1LW):p.M1?') self._parse('NP_064445.1:p.Gln16del') self._parse('NP_064445.1:p.Gln16dup') self._parse('NP_064445.1:p.Gln3del') self._parse('NP_064445.1:p.Q16del') self._parse('NP_064445.1:p.Q16dup') self._parse('NP_064445.1:p.Q16*') self._parse('NP_064445.1:p.Q16X') self._parse('NG_009105.1:p.Gln3Leu') self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') self._parse('NG_009105.1(OPN1LW_i1):p.Gln3Leu') self._parse('NG_009105.1(OPN1LW_v1):p.Gln3Leu') self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insLeu') self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4insGln') self._parse('NG_009105.1(OPN1LW):p.Gln3_Gln4dup') self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQ') self._parse('NG_009105.1(OPN1LW):p.Q3_Q4insQQ') self._parse('NG_009105.1(OPN1LW):p.Q3_Q4dup') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7del') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValLeu') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsValPro') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsGlnGlnTrpSerLeu') self._parse('NG_009105.1(OPN1LW):p.Q3_L7delinsGlnGlnTrpSerLeu') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7delinsQQWSL') #self._parse('NG_009105.1(OPN1LW):p.Met1AlaextMet-1') #self._parse('NG_009105.1(OPN1LW):p.M1AextM-1') #self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7[3]') self._parse('NG_009105.1(OPN1LW):p.Gln3_Leu7(1_6)') self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') self._parse('NG_009105.1(OPN1LW):p.Gln3Leu') #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Trpext*3)') self._parse('NM_000076.2(CDKN1C_i001):p.(*317TrpextX3)') #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cysext*1)') self._parse('NM_000076.2(CDKN1C_i001):p.(*317CysextX1)') #self._parse('NM_000076.2(CDKN1C_i001):p.(*317Cext*1)') self._parse('NM_000076.2(CDKN1C_i001):p.(*317CextX1)') #self._parse('t(X;17)(DMD:p.Met1_Val1506; SGCA:p.Val250_*387)') def test_minus_in_gene_symbol(self): """ Gene symbol is allowed to contain a minus character. """ self._parse('UD_132464528477(KRTAP2-4_v001):c.100del')
def name_checker(): """ Name checker. """ # For backwards compatibility with older LOVD versions, we support the # `mutationName` argument. If present, we redirect and add `standalone=1`. # # Also for backwards compatibility, we support the `name` argument as an # alias for `description`. if 'name' in request.args: return redirect(url_for('.name_checker', description=request.args['name'], standalone=request.args.get('standalone')), code=301) if 'mutationName' in request.args: return redirect(url_for('.name_checker', description=request.args['mutationName'], standalone=1), code=301) description = request.args.get('description') if not description: return render_template('name-checker.html') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received variant %s from %s' % (description, request.remote_addr)) stats.increment_counter('name-checker/website') variantchecker.check_variant(description, output) errors, warnings, summary = output.Summary() parse_error = output.getOutput('parseError') record_type = output.getIndexedOutput('recordType', 0, '') reference = output.getIndexedOutput('reference', 0, '') if reference: if record_type == 'LRG': reference_filename = reference + '.xml' else : reference_filename = reference + '.gb' else: reference_filename = None genomic_dna = output.getIndexedOutput('molType', 0) != 'n' genomic_description = output.getIndexedOutput('genomicDescription', 0, '') # Create a link to the UCSC Genome Browser. browser_link = None raw_variants = output.getIndexedOutput('rawVariantsChromosomal', 0) if raw_variants: positions = [pos for descr, (first, last) in raw_variants[2] for pos in (first, last)] bed_url = url_for('.bed', description=description, _external=True) browser_link = ('http://genome.ucsc.edu/cgi-bin/hgTracks?db=hg19&' 'position={chromosome}:{start}-{stop}&hgt.customText=' '{bed_file}'.format(chromosome=raw_variants[0], start=min(positions) - 10, stop=max(positions) + 10, bed_file=urllib.quote(bed_url))) # Experimental description extractor. if (output.getIndexedOutput('original', 0) and output.getIndexedOutput('mutated', 0)): allele = extractor.describe_dna(output.getIndexedOutput('original', 0), output.getIndexedOutput('mutated', 0)) extracted = '(skipped)' if allele: extracted = unicode(allele) else: extracted = '' # Todo: Generate the fancy HTML views for the proteins here instead of in # `mutalyzer.variantchecker`. arguments = { 'description' : description, 'messages' : map(util.message_info, output.getMessages()), 'summary' : summary, 'parse_error' : parse_error, 'errors' : errors, 'genomicDescription' : genomic_description, 'chromDescription' : output.getIndexedOutput( 'genomicChromDescription', 0), 'genomicDNA' : genomic_dna, 'visualisation' : output.getOutput('visualisation'), 'descriptions' : output.getOutput('descriptions'), 'protDescriptions' : output.getOutput('protDescriptions'), 'oldProtein' : output.getOutput('oldProteinFancy'), 'altStart' : output.getIndexedOutput('altStart', 0), 'altProtein' : output.getOutput('altProteinFancy'), 'newProtein' : output.getOutput('newProteinFancy'), 'transcriptInfo' : output.getIndexedOutput('hasTranscriptInfo', 0, False), 'transcriptCoding' : output.getIndexedOutput('transcriptCoding', 0, False), 'exonInfo' : output.getOutput('exonInfo'), 'cdsStart_g' : output.getIndexedOutput('cdsStart_g', 0), 'cdsStart_c' : output.getIndexedOutput('cdsStart_c', 0), 'cdsStop_g' : output.getIndexedOutput('cdsStop_g', 0), 'cdsStop_c' : output.getIndexedOutput('cdsStop_c', 0), 'restrictionSites' : output.getOutput('restrictionSites'), 'legends' : output.getOutput('legends'), 'reference_filename' : reference_filename, # Todo: Download link is not shown... 'browserLink' : browser_link, 'extractedDescription': extracted, 'standalone' : bool(request.args.get('standalone')) } output.addMessage(__file__, -1, 'INFO', 'Finished variant %s' % description) return render_template('name-checker.html', **arguments)
def setup(self): super(TestMutator, self).setup() self.output = Output(__file__)
class TestConverter(MutalyzerTest): """ Test the Converter class. """ fixtures = (database, hg19, hg19_transcript_mappings) def setup(self): super(TestConverter, self).setup() self.output = Output(__file__) def _converter(self, assembly_name_or_alias): """ Create a Converter instance for a given genome assembly. """ assembly = Assembly.query \ .filter(or_(Assembly.name == assembly_name_or_alias, Assembly.alias == assembly_name_or_alias)) \ .one() return Converter(assembly, self.output) def test_converter(self): """ Simple test. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274G>T') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_non_coding(self): """ Test with variant on non-coding transcript. """ converter = self._converter('hg19') genomic = converter.c2chrom('NR_028383.1:n.-2173C>A') assert genomic == 'NC_000011.9:g.111959695G>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274G>T' in coding # Fix for r536: disable the -u and +d convention. #assert 'NR_028383.1:c.1-u2173C>A' in coding assert 'NR_028383.1:n.-2173C>A' in coding def test_converter_compound(self): """ Test with compound variant. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.[274G>T;278A>G]') assert genomic == 'NC_000011.9:g.[111959695G>T;111959699A>G]' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.[274G>T;278A>G]' in coding assert 'NR_028383.1:n.[-2173C>A;-2177T>C]' in coding def test_hla_cluster(self): """ Convert to primary assembly. Transcript NM_000500.5 is mapped to different chromosome locations, but we like to just see the primary assembly mapping to chromosome 6. See also bug #58. """ # Todo: This test is bogus now that we use a fixture that has just the # mapping to chromosome 6. However, I think we only get this mapping # from our current source (NCBI seq_gene.md) anyway, so I'm not sure # where we got the other mappings from in the past (but haven't # investigated really). converter = self._converter('hg19') genomic = converter.c2chrom('NM_000500.5:c.92C>T') assert genomic == 'NC_000006.11:g.32006291C>T' coding = converter.chrom2c(genomic, 'list') assert 'NM_000500.5:c.92C>T' in coding def test_converter_del_length_reverse(self): """ Position converter on deletion (denoted by length) on transcripts located on the reverse strand. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000022.10:g.51016285_51017117del123456789', 'list') # Fix for r536: disable the -u and +d convention. #assert 'NM_001145134.1:c.-138-u21_60del123456789' in coding #assert 'NR_021492.1:c.1-u5170_1-u4338del123456789' in coding assert 'NM_001145134.1:c.-159_60del123456789' in coding assert 'NR_021492.1:n.-5170_-4338del123456789' in coding def test_S_Venkata_Suresh_Kumar(self): """ Test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported February 24, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.115259837_115259837delT', 'list') assert 'NM_001007553.1:c.3863delA' not in coding assert 'NM_001007553.1:c.*953delA' in coding assert 'NM_001130523.1:c.*953delA' in coding def test_S_Venkata_Suresh_Kumar_more(self): """ Another test for correct mapping information on genes where CDS start or stop is exactly on the border of an exon. Bug reported March 21, 2012 by S Venkata Suresh Kumar. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000001.10:g.160012314_160012329del16', 'list') assert 'NM_002241.4:c.-27250-7_-27242del16' not in coding assert 'NM_002241.4:c.1-7_9del16' in coding def test_range_order_forward_correct(self): """ Just a normal position converter call, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.-1_274del') assert genomic == 'NC_000011.9:g.111957631_111959695del' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.-1_274del' in coding def test_range_order_forward_incorrect_c2chrom(self): """ Incorrect order of a range on the forward strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_reverse_correct(self): """ Just a normal position converter call on the reverse strand, both directions. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.-1_40del') assert genomic == 'NC_000020.10:g.48770135_48770175del' coding = converter.chrom2c(genomic, 'list') assert 'NM_001162505.1:c.-1_40del' in coding def test_range_order_reverse_incorrect_c2chrom(self): """ Incorrect order of a range on the reverse strand. See Trac #95. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_001162505.1:c.40_-1del') assert genomic == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_range_order_incorrect_chrom2c(self): """ Incorrect order of a chromosomal range. See Trac #95. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695_111957631del', 'list') assert coding == None erange = self.output.getMessagesWithErrorCode('ERANGE') assert len(erange) == 1 def test_delins_large_ins_c2chrom(self): """ Delins with multi-base insertion c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_c2chrom(self): """ Delins with multi-base insertion and explicit deleted sequence c. to chrom. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_003002.2:c.274delGinsTAAA') assert genomic == 'NC_000011.9:g.111959695delinsTAAA' coding = converter.chrom2c(genomic, 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_chrom2c(self): """ Delins with multi-base insertion chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_delins_large_ins_explicit_chrom2c(self): """ Delins with multi-base insertion and explicit deleted sequence chrom to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111959695delGinsTAAA', 'list') assert 'NM_003002.2:c.274delinsTAAA' in coding def test_chrm_chrom2c(self): """ Mitochondrial m. to c. """ converter = self._converter('hg19') coding = converter.chrom2c('NC_012920.1:m.12030del', 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_name_chrom2c(self): """ Mitochondrial m. (by chromosome name) to c. """ converter = self._converter('hg19') variant = converter.correctChrVariant('chrM:m.12030del') coding = converter.chrom2c(variant, 'list') assert 'NC_012920.1(ND4_v001):c.1271del' in coding def test_chrm_c2chrom(self): """ Mitochondrial c. to m. """ converter = self._converter('hg19') genomic = converter.c2chrom('NC_012920.1(ND4_v001):c.1271del') assert genomic == 'NC_012920.1:m.12030del' def test_nm_without_selector_chrom2c(self): """ NM reference without transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2:c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_with_selector_chrom2c(self): """ NM reference with transcript selection c. to g. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v001):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_nm_c2chrom_no_selector(self): """ To NM reference should never result in transcript selection. """ converter = self._converter('hg19') variant = converter.correctChrVariant('NC_000008.10:g.61654100A>T') coding = converter.chrom2c(variant, 'list') assert 'NM_017780.2:c.109A>T' in coding def test_incorrect_selector_c2chrom(self): """ Incorrect selector. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_incorrect_selector_version_c2chrom(self): """ Incorrect selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7_v002):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_no_selector_version_c2chrom(self): """ Selector but no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD7):c.109A>T') assert genomic == 'NC_000008.10:g.61654100A>T' def test_incorrect_selector_no_selector_version_c2chrom(self): """ Incorrect selector, no selector version. """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_017780.2(CHD8):c.109A>T') erange = self.output.getMessagesWithErrorCode('EACCNOTINDB') assert len(erange) == 1 def test_ins_seq_chrom2c(self): """ Insertion of a sequence (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111957482_111957483insGAT', 'list') assert 'NM_003002.2:c.-150_-149insGAT' in coding assert 'NM_012459.2:c.10_11insATC' in coding def test_ins_seq_seq(self): """ Insertion of two sequences (chrom2c). """ converter = self._converter('hg19') coding = converter.chrom2c('NC_000011.9:g.111957482_111957483ins[GAT;AAA]', 'list') assert 'NM_003002.2:c.-150_-149ins[GAT;AAA]' in coding assert 'NM_012459.2:c.10_11ins[TTT;ATC]' in coding def test_ins_seq_c2chrom_reverse(self): """ Insertion of a sequence on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11insATC') assert genomic == 'NC_000011.9:g.111957482_111957483insGAT' def test_ins_seq_seq_c2chrom_reverse(self): """ Insertion of two sequences on reverse strand (c2chrom). """ converter = self._converter('hg19') genomic = converter.c2chrom('NM_012459.2:c.10_11ins[TTT;ATC]') assert genomic == 'NC_000011.9:g.111957482_111957483ins[GAT;AAA]'
def _processConversion(self, batch_job, cmd, flags): """ Process an entry from the Position Converter, write the results to the job-file. The Position Converter is wrapped in a try except block which ensures that he Batch Process keeps running. Errors are caught and the user will be notified. Side-effect: - Output written to outputfile. @arg cmd: The Syntax Checker input @type cmd: unicode @arg i: The JobID @type i: integer @arg build: The build to use for the converter @type build: unicode @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) variant = cmd variants = None gName = "" cNames = [""] O.addMessage(__file__, -1, "INFO", "Received PositionConverter batchvariant " + cmd) stats.increment_counter('position-converter/batch') skip = self.__processFlags(O, flags) if not skip : try : #process try: assembly = Assembly.by_name_or_alias(batch_job.argument) except NoResultFound: O.addMessage(__file__, 3, 'ENOASSEMBLY', 'Not a valid assembly: ' + batch_job.argument) raise converter = Converter(assembly, O) #Also accept chr accNo variant = converter.correctChrVariant(variant) #TODO: Parse the variant and check for c or g. This is ugly if not(":c." in variant or ":n." in variant or ":g." in variant) : #Bad name grammar = Grammar(O) grammar.parse(variant) #if if ":c." in variant or ":n." in variant : # Do the c2chrom dance variant = converter.c2chrom(variant) # NOTE: # If we received a coding reference convert that to the # genomic position variant. Use that variant as the input # of the chrom2c. # If the input is a genomic variant or if we converted a # coding variant to a genomic variant we try to find all # other affected coding variants. if variant and ":g." in variant : # Do the chrom2c dance variants = converter.chrom2c(variant, "dict") if variants : gName = variant # Due to the cyclic behavior of the Position Converter # we know for a fact that if a correct chrom name is # generated by the converter.c2chrom that we will at # least find one variant with chrom2c. Collect the # variants from a nested lists and store them. cNames = [cName for cName2 in variants.values() \ for cName in cName2] except Exception: #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") #except #if error = "%s" % "|".join(O.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = ['Input Variant', 'Errors', 'Chromosomal Variant', 'Coding Variant(s)'] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else : handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s\t%s\t%s\t%s%s" % (cmd, error, gName, "\t".join(cNames), separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finisehd PositionConverter batchvariant " + cmd)
def batch_jobs_submit(): """ Run batch jobs and render batch checker HTML form. The batch jobs are added to the database by the scheduler and ran by the BatchChecker daemon. """ job_type = request.form.get('job_type') email = request.form.get('email') # Note that this is always a seekable binary file object. batch_file = request.files.get('file') assemblies = Assembly.query \ .order_by(*Assembly.order_by_criteria) \ .all() assembly_name_or_alias = request.form.get('assembly_name_or_alias', settings.DEFAULT_ASSEMBLY) errors = [] if not email: email = '{}@website.mutalyzer'.format(request.remote_addr) if job_type not in BATCH_JOB_TYPES: errors.append('Invalid batch job type.') if not file: errors.append('Please select a local file for upload.') if job_type == 'position-converter': try: Assembly.by_name_or_alias(assembly_name_or_alias) except NoResultFound: errors.append('Not a valid assembly.') argument = assembly_name_or_alias else: argument = None output = Output(__file__) if not errors: stats.increment_counter('batch-job/website') scheduler = Scheduler.Scheduler() file_instance = File.File(output) job, columns = file_instance.parseBatchFile(batch_file) if job is None: errors.append('Could not parse input file, please check your ' 'file format.') else: result_id = scheduler.addJob(email, job, columns, job_type, argument=argument) # Todo: We now assume that the job was not scheduled if there are # messages, which is probably not correct. if not output.getMessages(): return redirect(url_for('.batch_job_progress', result_id=result_id)) for error in errors: output.addMessage(__file__, 3, 'EBATCHJOB', error) messages = map(util.message_info, output.getMessages()) return render_template('batch-jobs.html', assemblies=assemblies, assembly_name_or_alias=assembly_name_or_alias, job_type=job_type, max_file_size=settings.MAX_FILE_SIZE // 1048576, messages=messages)
def lovd_variant_info(): """ The chromosomal to coding and vice versa conversion interface for LOVD. Search for an NM number in the database, if the version number matches, get the start and end positions in a variant and translate these positions to chromosomal notation if the variant is in coding notation and vice versa. - If no end position is present, the start position is assumed to be the end position. - If the version number is not found in the database, an error message is generated and a suggestion for an other version is given. - If the reference sequence is not found at all, an error is returned. - If no variant is present, the transcription start and end and CDS end in coding notation is returned. - If the variant is not accepted by the nomenclature parser, a parse error will be printed. Get variant info and return the result as plain text. Parameters: LOVD_ver The version of the calling LOVD. build The human genome build (hg19 assumed). acc The accession number (NM number). var A description of the variant. Returns: start_main The main coordinate of the start position in I{c.} (non-star) notation. start_offset The offset coordinate of the start position in I{c.} notation (intronic position). end_main The main coordinate of the end position in I{c.} (non-star) notation. end_offset The offset coordinate of the end position in I{c.} notation (intronic position). start_g The I{g.} notation of the start position. end_g The I{g.} notation of the end position. type The mutation type. Returns (alternative): trans_start Transcription start in I{c.} notation. trans_stop Transcription stop in I{c.} notation. CDS_stop CDS stop in I{c.} notation. """ lovd_version = request.args['LOVD_ver'] build = request.args['build'] accession = request.args['acc'] description = request.args.get('var') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request variantInfo(%s:%s (LOVD_ver %s, ' 'build %s)) from %s' % (accession, description, lovd_version, build, request.remote_addr)) try: assembly = Assembly.by_name_or_alias(build) except NoResultFound: response = make_response('invalid build') response.headers['Content-Type'] = 'text/plain; charset=utf-8' return response converter = Converter(assembly, output) result = '' # If no variant is given, return transcription start, transcription # end and CDS stop in c. notation. if description: ret = converter.mainMapping(accession, description) else: ret = converter.giveInfo(accession) if ret: result = '%i\n%i\n%i' % ret if not result and not getattr(ret, 'startmain', None): out = output.getOutput('LOVDERR') if out: result = out[0] else: result = 'Unknown error occured' output.addMessage(__file__, -1, 'INFO', 'Finished request variantInfo(%s:%s (LOVD_ver %s, ' 'build %s))' % (accession, description, lovd_version, build)) if not result and getattr(ret, 'startmain', None): result = '%i\n%i\n%i\n%i\n%i\n%i\n%s' % ( ret.startmain, ret.startoffset, ret.endmain, ret.endoffset, ret.start_g, ret.end_g, ret.mutationType) # Todo: Obsoleted error messages, remove soon. if lovd_version == '2.0-23': response = re.sub('^Error \(.*\):', 'Error:', result) response = make_response(result) response.headers['Content-Type'] = 'text/plain; charset=utf-8' return response
def _processConversion(self, batch_job, cmd, flags): """ Process an entry from the Position Converter, write the results to the job-file. The Position Converter is wrapped in a try except block which ensures that he Batch Process keeps running. Errors are caught and the user will be notified. Side-effect: - Output written to outputfile. @arg cmd: The Syntax Checker input @type cmd: unicode @arg i: The JobID @type i: integer @arg build: The build to use for the converter @type build: unicode @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) variant = cmd variants = None gName = "" cNames = [""] O.addMessage(__file__, -1, "INFO", "Received PositionConverter batchvariant " + cmd) stats.increment_counter('position-converter/batch') skip = self.__processFlags(O, flags) if not skip: try: #process try: assembly = Assembly.by_name_or_alias(batch_job.argument) except NoResultFound: O.addMessage(__file__, 3, 'ENOASSEMBLY', 'Not a valid assembly: ' + batch_job.argument) raise converter = Converter(assembly, O) #Also accept chr accNo variant = converter.correctChrVariant(variant) #TODO: Parse the variant and check for c or g. This is ugly if not (":c." in variant or ":n." in variant or ":g." in variant): #Bad name grammar = Grammar(O) grammar.parse(variant) #if if ":c." in variant or ":n." in variant: # Do the c2chrom dance variant = converter.c2chrom(variant) # NOTE: # If we received a coding reference convert that to the # genomic position variant. Use that variant as the input # of the chrom2c. # If the input is a genomic variant or if we converted a # coding variant to a genomic variant we try to find all # other affected coding variants. if variant and ":g." in variant: # Do the chrom2c dance variants = converter.chrom2c(variant, "dict") if variants: gName = variant # Due to the cyclic behavior of the Position Converter # we know for a fact that if a correct chrom name is # generated by the converter.c2chrom that we will at # least find one variant with chrom2c. Collect the # variants from a nested lists and store them. cNames = [cName for cName2 in variants.values() \ for cName in cName2] except Exception: #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") #except #if error = "%s" % "|".join(O.getBatchMessages(2)) #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename): # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = [ 'Input Variant', 'Errors', 'Chromosomal Variant', 'Coding Variant(s)' ] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else: handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s\t%s\t%s\t%s%s" % (cmd, error, gName, "\t".join(cNames), separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finisehd PositionConverter batchvariant " + cmd)
def reference_loader_submit(): """ Reference sequence loader. There are five ways for the user to load a reference sequence, corresponding to values for the `method` field, each requiring some additional fields to be defined.: `method=upload_method` The reference sequence file is uploaded from a local file. - `file`: Reference sequence file to upload. `method=url_method` The reference sequence file can be found at the specified URL. - `url`: URL of reference sequence file to load. `method=slice_gene_method` Retrieve part of the reference genome for an HGNC gene symbol. - `genesymbol`: Gene symbol. - `organism`: Organism. - `upstream`: Number of 5' flanking nucleotides. - `downstream`: Number of 3' flanking nucleotides. `method=slice_accession_method` Retrieve a range of a chromosome by accession number. - `accession`: Chromosome Accession Number. - `accession_start`: Start position. - `accession_stop`: Stop position. - `accession_orientation`: Orientation. `method=slice_chromosome_method` Retrieve a range of a chromosome by name. - `assembly_name_or_alias`: Genome assembly by name or by alias. - `chromosome`: Chromosome name. - `chromosome_start`: Start position. - `chromosome_stop`: Stop position. - `chromosome_orientation`: Orientation. """ method = request.form.get('method') output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request upload(%s) with arguments %s from %s' % (method, unicode(request.form), request.remote_addr)) assemblies = Assembly.query \ .order_by(*Assembly.order_by_criteria) \ .all() retriever = Retriever.GenBankRetriever(output) ud, errors = '', [] class InputException(Exception): pass def check_position(position, field): position = position.replace(',', '').replace('.', '').replace('-', '') try: return int(position) except AttributeError, ValueError: raise InputException('Expected an integer in field: %s' % field)
def position_converter(): """ Position converter. """ # Backwards compatibility. if 'variant' in request.args: return redirect(url_for('.position_converter', description=request.args['variant']), code=301) assemblies = Assembly.query \ .order_by(*Assembly.order_by_criteria) \ .all() assembly_name_or_alias = request.args.get('assembly_name_or_alias', settings.DEFAULT_ASSEMBLY) description = request.args.get('description') if not description: return render_template('position-converter.html', assemblies=assemblies, assembly_name_or_alias=assembly_name_or_alias) output = Output(__file__) output.addMessage(__file__, -1, 'INFO', 'Received request positionConverter(%s, %s) from %s' % (assembly_name_or_alias, description, request.remote_addr)) stats.increment_counter('position-converter/website') chromosomal_description = None transcript_descriptions = None try: assembly = Assembly.by_name_or_alias(assembly_name_or_alias) except NoResultFound: output.addMessage(__file__, 3, 'ENOASSEMBLY', 'Not a valid assembly.') else: converter = Converter(assembly, output) # Convert chromosome name to accession number. corrected_description = converter.correctChrVariant(description) if corrected_description: # Now we're ready to actually do position conversion. if not(':c.' in corrected_description or ':n.' in corrected_description or ':g.' in corrected_description or ':m.' in corrected_description): grammar = Grammar(output) grammar.parse(corrected_description) if (':c.' in corrected_description or ':n.' in corrected_description): corrected_description = converter.c2chrom( corrected_description) chromosomal_description = corrected_description if corrected_description and (':g.' in corrected_description or ':m.' in corrected_description): descriptions = converter.chrom2c(corrected_description, 'dict') if descriptions is None: chromosomal_description = None elif descriptions: transcript_descriptions = [ '%-10s:\t%s' % (key[:10], '\n\t\t'.join(value)) for key, value in descriptions.items()] messages = map(util.message_info, output.getMessages()) output.addMessage(__file__, -1, 'INFO', 'Finished request positionConverter(%s, %s)' % (assembly_name_or_alias, description)) return render_template('position-converter.html', assemblies=assemblies, assembly_name_or_alias=assembly_name_or_alias, description=description, chromosomal_description=chromosomal_description, transcript_descriptions=transcript_descriptions, messages=messages)
def _processNameBatch(self, batch_job, cmd, flags): """ Process an entry from the Name Batch, write the results to the job-file. If an Exception is raised, catch and continue. Side-effect: - Output written to outputfile. @arg cmd: The NameChecker input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) O.addMessage(__file__, -1, "INFO", "Received NameChecker batchvariant " + cmd) stats.increment_counter('name-checker/batch') #Read out the flags skip = self.__processFlags(O, flags) if not skip : #Run mutalyzer and get values from Output Object 'O' try : variantchecker.check_variant(cmd, O) except Exception: #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") import traceback O.addMessage(__file__, 4, "DEBUG", unicode(repr(traceback.format_exc()))) #except finally : #check if we need to update the database self._updateDbFlags(O, batch_job.id) #if batchOutput = O.getOutput("batchDone") outputline = "%s\t" % cmd outputline += "%s\t" % "|".join(O.getBatchMessages(2)) if batchOutput : outputline += batchOutput[0] #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename) : # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = ['Input', 'Errors and warnings', 'AccNo', 'Genesymbol', 'Variant', 'Reference Sequence Start Descr.', 'Coding DNA Descr.', 'Protein Descr.', 'GeneSymbol Coding DNA Descr.', 'GeneSymbol Protein Descr.', 'Genomic Reference', 'Coding Reference', 'Protein Reference', 'Affected Transcripts', 'Affected Proteins', 'Restriction Sites Created', 'Restriction Sites Deleted'] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else : handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s%s" % (outputline, separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finished NameChecker batchvariant " + cmd)
def _processNameBatch(self, batch_job, cmd, flags): """ Process an entry from the Name Batch, write the results to the job-file. If an Exception is raised, catch and continue. Side-effect: - Output written to outputfile. @arg cmd: The NameChecker input @type cmd: @arg i: The JobID @type i: @arg flags: Flags of the current entry @type flags: """ O = Output(__file__) O.addMessage(__file__, -1, "INFO", "Received NameChecker batchvariant " + cmd) stats.increment_counter('name-checker/batch') #Read out the flags skip = self.__processFlags(O, flags) if not skip: #Run mutalyzer and get values from Output Object 'O' try: variantchecker.check_variant(cmd, O) except Exception: #Catch all exceptions related to the processing of cmd O.addMessage(__file__, 4, "EBATCHU", "Unexpected error occurred, dev-team notified") import traceback O.addMessage(__file__, 4, "DEBUG", unicode(repr(traceback.format_exc()))) #except finally: #check if we need to update the database self._updateDbFlags(O, batch_job.id) #if batchOutput = O.getOutput("batchDone") outputline = "%s\t" % cmd outputline += "%s\t" % "|".join(O.getBatchMessages(2)) if batchOutput: outputline += batchOutput[0] #Output filename = "%s/batch-job-%s.txt" % (settings.CACHE_DIR, batch_job.result_id) if not os.path.exists(filename): # If the file does not yet exist, create it with the correct # header above it. The header is read from the config file as # a list. We need a tab delimited string. header = [ 'Input', 'Errors and warnings', 'AccNo', 'Genesymbol', 'Variant', 'Reference Sequence Start Descr.', 'Coding DNA Descr.', 'Protein Descr.', 'GeneSymbol Coding DNA Descr.', 'GeneSymbol Protein Descr.', 'Genomic Reference', 'Coding Reference', 'Protein Reference', 'Affected Transcripts', 'Affected Proteins', 'Restriction Sites Created', 'Restriction Sites Deleted' ] handle = io.open(filename, mode='a', encoding='utf-8') handle.write("%s\n" % "\t".join(header)) #if else: handle = io.open(filename, mode='a', encoding='utf-8') if flags and 'C' in flags: separator = '\t' else: separator = '\n' handle.write("%s%s" % (outputline, separator)) handle.close() O.addMessage(__file__, -1, "INFO", "Finished NameChecker batchvariant " + cmd)