Esempio n. 1
0
    def setUpClass(cls):

        fasta_path = resource_filename(__name__, 'data/test.fa')
        fasta_without_prefix_path = resource_filename(
            __name__, 'data/test_without_prefix.fa')
        cls.ann = Annotator(fasta_path, 'grch37')
        cls.ann_without_prefix = Annotator(fasta_without_prefix_path, 'grch37')
Esempio n. 2
0
def run_serial(args):
    """
    串行运行
    """
    try:
        vcf = pysam.VariantFile(args.I)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()
    header = vcf.header
    header.add_line(
        '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant '
        'annotation. These include delta scores (DS) and delta positions (DP) for '
        'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). '
        'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">'
    )
    try:
        output = pysam.VariantFile(args.O, mode='w', header=header)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()
    ann = Annotator(args.R, args.A)
    for record in vcf:
        scores = get_delta_scores(record, ann, args.D, args.M)
        if len(scores) > 0:
            record.info['SpliceAI'] = scores
        output.write(record)
    vcf.close()
    output.close()
Esempio n. 3
0
def main():

    args = get_options()

    if None in [args.I, args.O, args.D, args.M]:
        logging.error(
            'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation '
            '[-D [distance]] [-M [mask]]')
        exit()

    try:
        vcf = pysam.VariantFile(args.I)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    header = vcf.header
    header.add_line(
        '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant '
        'annotation. These include delta scores (DS) and delta positions (DP) for '
        'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). '
        'This version also includes the distance (DIST) to the nearest splice site.'
        'Format: ALLELE|SYMBOL|DIST|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">'
    )

    try:
        output = pysam.VariantFile(args.O, mode='w', header=header)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    # loading prescored files
    prescored_files = []
    try:
        for filename in args.P:
            vcf_file = pysam.VariantFile(filename)
            prescored_files.append(vcf_file)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))

    ann = Annotator(args.R, args.A)

    for record in vcf:
        scores = get_delta_scores(record, ann, args.D, args.M, prescored_files)
        if len(scores) > 0:
            record.info['SpliceAI'] = scores
        output.write(record)

    vcf.close()
    output.close()
Esempio n. 4
0
def main():

    args = get_options()

    if None in [args.I, args.O, args.D, args.M]:
        logging.error(
            'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation '
            '[-D [distance]] [-M [mask]]')
        exit()

    try:
        vcf = pysam.VariantFile(args.I)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    header = vcf.header
    ###Adding header lines required to satisfy vcf format output.write()
    header.add_line('###fileDate=20191004')
    header.add_line('##reference=GRCh37/hg19')
    header.add_line(
        '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3.1 variant '
        'annotation. These include delta scores (DS) and delta positions (DP) for '
        'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). '
        'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">'
    )
    header.add_line(
        '##INFO=<ID=NS,Number=20000,Type=Integer,Description="Dummy NS">')

    try:
        output = pysam.VariantFile(args.O, mode='w', header=header)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    ann = Annotator(args.R, args.A)

    for record in vcf:
        scores = get_delta_scores(record, ann, args.D, args.M)
        if len(scores) > 0:
            record.info['SpliceAI'] = scores
        output.write(record)

    vcf.close()
    output.close()
Esempio n. 5
0
def process_record(records, results, ref_fasta, annotations, dist_var, mask):
    # 创建一个注释助手类
    ann = Annotator(ref_fasta, annotations)
    # 监听队列
    while True:
        # 尝试从队列获得一个待打分的变异
        try:
            record = records.get_nowait()
        except queue.Empty:
            continue
        # 判断队列是否结束
        if record != 'END':
            # 对变异进行打分并把结果放入队列
            scores = get_delta_scores(record, ann, dist_var, mask)
            results.put((record.id, scores))
        else:
            # 队列结束,重新把结束标志放入队列,以终止其他进程
            records.put('END')
            break
Esempio n. 6
0
def main():

    args = get_options()

    vcf = pysam.VariantFile(args.I)
    header = vcf.header
    header.add_line('##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.2 variant '
                    'annotation. These include delta scores (DS) and delta positions (DP) for '
                    'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). '
                    'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">')
    output = pysam.VariantFile(args.O, mode='w', header=header)
    ann = Annotator(args.R, args.A)

    for record in vcf:

        scores = get_delta_scores(record, ann)
        if len(scores) > 0:
            record.info['SpliceAI'] = scores
        output.write(record)
Esempio n. 7
0
def main():

    args = get_options()

    if None in [args.I, args.O, args.D, args.M]:
        logging.error(
            'Usage: spliceai [-h] [-I [input]] [-O [output]] -R reference -A annotation '
            '[-D [distance]] [-M [mask]]')
        exit()

    try:
        vcf = pysam.VariantFile(args.I)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    header = vcf.header
    header.add_line(
        '##INFO=<ID=SpliceAI,Number=.,Type=String,Description="SpliceAIv1.3 variant '
        'annotation. These include delta scores (DS) and delta positions (DP) for '
        'acceptor gain (AG), acceptor loss (AL), donor gain (DG), and donor loss (DL). '
        'Format: ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL">'
    )

    try:
        output = pysam.VariantFile(args.O, mode='w', header=header)
    except (IOError, ValueError) as e:
        logging.error('{}'.format(e))
        exit()

    ann = Annotator(args.R, args.A)
    run = partial(process_record, ann, args.D, args.M)
    with mp.Pool(args.t) as p:
        for record in p.map(run, vcf):
            output.write(record)

    vcf.close()
    output.close()
Esempio n. 8
0
def get_variant_assessment():
    logger.debug("Invoking get_variant_assessment")
    logger.debug("Chromosome: " + request.form['chrom'])
    logger.debug("Position: " + request.form['pos'])
    logger.debug("Ref: " + request.form['ref'])
    logger.debug("Alt: " + request.form['alt'])
    logger.debug("Assembly: " + request.form['assembly'])
    logger.debug("Distance: " + request.form['distance'])
    logger.debug("Mask: " + request.form['mask'])

    Record.chrom = request.form['chrom']
    Record.pos = int(request.form['pos'])
    Record.ref = request.form['ref']
    Record.alts = [request.form['alt']]

    assembly = request.form['assembly']
    distance = int(request.form['distance'])
    mask = int(request.form['mask'])

    ann = Annotator(app.config['HG19'], assembly)

    pred = get_delta_scores(Record, ann, distance, mask)

    return json.dumps(pred)
Esempio n. 9
0
GRCH38_ANNOTATIONS = "./annotations/gencode.v38.annotation.txt.gz"

ANNOTATION_INTERVAL_TREES = {
    "37": defaultdict(IntervalTree),
    "38": defaultdict(IntervalTree),
}

for genome_version, annotation_path in ("37", GRCH37_ANNOTATIONS), ("38", GRCH38_ANNOTATIONS):
    print(f"Loading {annotation_path}", flush=True)
    df = pd.read_table(annotation_path, dtype={"TX_START": int, "TX_END": int})
    for _, row in df.iterrows():
        chrom = row["CHROM"].replace("chr", "")
        ANNOTATION_INTERVAL_TREES[genome_version][chrom].add(Interval(row["TX_START"], row["TX_END"] + 0.1, row["#NAME"]))

SPLICEAI_ANNOTATOR = {
    "37": Annotator(HG19_FASTA_PATH, GRCH37_ANNOTATIONS),
    "38": Annotator(HG38_FASTA_PATH, GRCH38_ANNOTATIONS),
}

SPLICEAI_MAX_DISTANCE_LIMIT = 10000
SPLICEAI_DEFAULT_DISTANCE = 50  # maximum distance between the variant and gained/lost splice site, defaults to 50
SPLICEAI_DEFAULT_MASK = 0  # mask scores representing annotated acceptor/donor gain and unannotated acceptor/donor loss, defaults to 0
USE_PRECOMPUTED_SCORES = 1  # whether to use precomputed scores by default

SPLICEAI_SCORE_FIELDS = "ALLELE|SYMBOL|DS_AG|DS_AL|DS_DG|DS_DL|DP_AG|DP_AL|DP_DG|DP_DL".split("|")

SPLICEAI_EXAMPLE = f"/spliceai/?hg=38&distance=50&mask=0&precomputed=1&variant=chr8-140300615-C-G"

VARIANT_RE = re.compile(
    "(chr)?(?P<chrom>[0-9XYMTt]{1,2})"
    "[-\s:]+"