def setUpClass(cls): fasta_path = resource_filename(__name__, 'data/test.fa') fasta_without_prefix_path = resource_filename( __name__, 'data/test_without_prefix.fa') cls.ann = Annotator(fasta_path, 'grch37') cls.ann_without_prefix = Annotator(fasta_without_prefix_path, 'grch37')
def run_serial(args): """ 串行运行 """ try: vcf = pysam.VariantFile(args.I) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() header = vcf.header header.add_line( '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant ' 'annotation. These include delta scores (DS) and delta positions (DP) for ' 'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). ' 'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">' ) try: output = pysam.VariantFile(args.O, mode='w', header=header) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() ann = Annotator(args.R, args.A) for record in vcf: scores = get_delta_scores(record, ann, args.D, args.M) if len(scores) > 0: record.info['SpliceAI'] = scores output.write(record) vcf.close() output.close()
def main(): args = get_options() if None in [args.I, args.O, args.D, args.M]: logging.error( 'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation ' '[-D [distance]] [-M [mask]]') exit() try: vcf = pysam.VariantFile(args.I) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() header = vcf.header header.add_line( '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant ' 'annotation. These include delta scores (DS) and delta positions (DP) for ' 'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). ' 'This version also includes the distance (DIST) to the nearest splice site.' 'Format: ALLELE|SYMBOL|DIST|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">' ) try: output = pysam.VariantFile(args.O, mode='w', header=header) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() # loading prescored files prescored_files = [] try: for filename in args.P: vcf_file = pysam.VariantFile(filename) prescored_files.append(vcf_file) except (IOError, ValueError) as e: logging.error('{}'.format(e)) ann = Annotator(args.R, args.A) for record in vcf: scores = get_delta_scores(record, ann, args.D, args.M, prescored_files) if len(scores) > 0: record.info['SpliceAI'] = scores output.write(record) vcf.close() output.close()
def main(): args = get_options() if None in [args.I, args.O, args.D, args.M]: logging.error( 'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation ' '[-D [distance]] [-M [mask]]') exit() try: vcf = pysam.VariantFile(args.I) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() header = vcf.header ###Adding header lines required to satisfy vcf format output.write() header.add_line('###fileDate=20191004') header.add_line('##reference=GRCh37/hg19') header.add_line( '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3.1 variant ' 'annotation. These include delta scores (DS) and delta positions (DP) for ' 'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). ' 'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">' ) header.add_line( '##INFO=<ID=NS,Number=20000,Type=Integer,Description="Dummy NS">') try: output = pysam.VariantFile(args.O, mode='w', header=header) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() ann = Annotator(args.R, args.A) for record in vcf: scores = get_delta_scores(record, ann, args.D, args.M) if len(scores) > 0: record.info['SpliceAI'] = scores output.write(record) vcf.close() output.close()
def process_record(records, results, ref_fasta, annotations, dist_var, mask): # 创建一个注释助手类 ann = Annotator(ref_fasta, annotations) # 监听队列 while True: # 尝试从队列获得一个待打分的变异 try: record = records.get_nowait() except queue.Empty: continue # 判断队列是否结束 if record != 'END': # 对变异进行打分并把结果放入队列 scores = get_delta_scores(record, ann, dist_var, mask) results.put((record.id, scores)) else: # 队列结束,重新把结束标志放入队列,以终止其他进程 records.put('END') break
def main(): args = get_options() vcf = pysam.VariantFile(args.I) header = vcf.header header.add_line('##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.2 variant ' 'annotation. These include delta scores (DS) and delta positions (DP) for ' 'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). ' 'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">') output = pysam.VariantFile(args.O, mode='w', header=header) ann = Annotator(args.R, args.A) for record in vcf: scores = get_delta_scores(record, ann) if len(scores) > 0: record.info['SpliceAI'] = scores output.write(record)
def main(): args = get_options() if None in [args.I, args.O, args.D, args.M]: logging.error( 'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation ' '[-D [distance]] [-M [mask]]') exit() try: vcf = pysam.VariantFile(args.I) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() header = vcf.header header.add_line( '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant ' 'annotation. These include delta scores (DS) and delta positions (DP) for ' 'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). ' 'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">' ) try: output = pysam.VariantFile(args.O, mode='w', header=header) except (IOError, ValueError) as e: logging.error('{}'.format(e)) exit() ann = Annotator(args.R, args.A) run = partial(process_record, ann, args.D, args.M) with mp.Pool(args.t) as p: for record in p.map(run, vcf): output.write(record) vcf.close() output.close()
def get_variant_assessment(): logger.debug("Invoking get_variant_assessment") logger.debug("Chromosome: " + request.form['chrom']) logger.debug("Position: " + request.form['pos']) logger.debug("Ref: " + request.form['ref']) logger.debug("Alt: " + request.form['alt']) logger.debug("Assembly: " + request.form['assembly']) logger.debug("Distance: " + request.form['distance']) logger.debug("Mask: " + request.form['mask']) Record.chrom = request.form['chrom'] Record.pos = int(request.form['pos']) Record.ref = request.form['ref'] Record.alts = [request.form['alt']] assembly = request.form['assembly'] distance = int(request.form['distance']) mask = int(request.form['mask']) ann = Annotator(app.config['HG19'], assembly) pred = get_delta_scores(Record, ann, distance, mask) return json.dumps(pred)
GRCH38_ANNOTATIONS = "./annotations/gencode.v38.annotation.txt.gz" ANNOTATION_INTERVAL_TREES = { "37": defaultdict(IntervalTree), "38": defaultdict(IntervalTree), } for genome_version, annotation_path in ("37", GRCH37_ANNOTATIONS), ("38", GRCH38_ANNOTATIONS): print(f"Loading {annotation_path}", flush=True) df = pd.read_table(annotation_path, dtype={"TX_START": int, "TX_END": int}) for _, row in df.iterrows(): chrom = row["CHROM"].replace("chr", "") ANNOTATION_INTERVAL_TREES[genome_version][chrom].add(Interval(row["TX_START"], row["TX_END"] + 0.1, row["#NAME"])) SPLICEAI_ANNOTATOR = { "37": Annotator(HG19_FASTA_PATH, GRCH37_ANNOTATIONS), "38": Annotator(HG38_FASTA_PATH, GRCH38_ANNOTATIONS), } SPLICEAI_MAX_DISTANCE_LIMIT = 10000 SPLICEAI_DEFAULT_DISTANCE = 50 # maximum distance between the variant and gained/lost splice site, defaults to 50 SPLICEAI_DEFAULT_MASK = 0 # mask scores representing annotated acceptor/donor gain and unannotated acceptor/donor loss, defaults to 0 USE_PRECOMPUTED_SCORES = 1 # whether to use precomputed scores by default SPLICEAI_SCORE_FIELDS = "ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL".split("|") SPLICEAI_EXAMPLE = f"/spliceai/?hg=38&distance=50&mask=0&precomputed=1&variant=chr8-140300615-C-G" VARIANT_RE = re.compile( "(chr)?(?P<chrom>[0-9XYMTt]{1,2})" "[-\s:]+"