예제 #1
0
 def _convert_to_py(self, annotation):
     if annotation:
         return Interval._from_java(annotation)
     else:
         return annotation
예제 #2
0
#   * In tier 1 regions.
#
# Definition of probably not somatic:
#   DP > 10 AND
#   (
#     GT != het OR
#     (
#       binomTest(ad, dp, 0.5, "two.sided") >= alpha
#     )
#   )
#
# With alpha = 0.1 we expect a little over 10% of het sites will be falsely lost.
# For alpha = 0.1, the DP > 10 restriction corresponds to effective bounds on VAF
# of [0.2, 0.8].

for chrom in range(1, 23):
    print 'Chromosome %d...' % chrom
    (vds.filter_intervals(Interval.parse(
        '%d' % chrom)).annotate_variants_table(
            tier1_bed, root='va.tier1bed').filter_variants_expr(
                'va.tier1bed == true && v.isAutosomal()',
                keep=True).filter_samples_expr(
                    '"^Z" ~ s', keep=False).split_multi().filter_genotypes(
                        '''
            g.dp > 10 && 
            (
              (!g.isHet()) || binomTest(g.ad[1], g.dp, 0.5, "two.sided") >= 0.1
            )''',
                        keep=True).min_rep().export_plink(
                            'tmp/08b_genotypes_%d' % chrom))
예제 #3
0
파일: expr.py 프로젝트: Fedja/hail
 def _convert_to_py(self, annotation):
     if annotation:
         return Interval._from_java(annotation)
     else:
         return annotation
예제 #4
0
#!../../software/pyhail.sh
import hail
from hail.representation import Interval
from hail.expr import TString, TBoolean, TFloat, TInt


hc = hail.HailContext(log = 'log/99_dreamlab2.log', tmp_dir = 'tmp/hail')

vds = hc.read('../MGRB.phase2.tier12.match.vqsr.minrep.vds')

# Chr22 only, rough variant quality filters
vds = (vds
    .filter_intervals(Interval.parse('22'), keep=True)
    .filter_variants_expr('va.filters.isEmpty()', keep=True)
    .split_multi()
    .variant_qc()
    .filter_variants_expr('''
        v.altAllele.isSNP &&
        va.qc.callRate >= 0.99 && 
        va.qc.dpMean >= 20 && va.qc.dpMean <= 60 && 
        va.qc.dpStDev < 8 && 
        va.filters.isEmpty() && 
        va.qc.AF >= 0.05 && va.qc.AF <= 0.95''')
)

# Drop samples with poor metrics on these filtered variants.
vds = (vds
    .sample_qc()
    .filter_samples_expr('sa.qc.callRate >= 0.985')
)
#!/usr/bin/env python

import argparse as ap
import hail
from hail.representation import Interval

p = ap.ArgumentParser()
p.add_argument("--exomes-vds", help="Exomes dataset to be loaded, already split", required=True)
p.add_argument("--genomes-vds", help="Genomes dataset to be loaded, already split", required=True)
p.add_argument( "--exomes-vds-out", help="Exomes file to be written", required=True)
p.add_argument("--genomes-vds-out", help="Genomes file to be written", required=True)
p.add_argument("--interval", help="Interval to subset", required=True)
args = p.parse_args()

hc = hail.HailContext(log="/hail.log")

(hc.read(args.exomes_vds)
    .filter_intervals(Interval.parse(args.interval))
    .write(args.exomes_vds_out, overwrite=True))

(hc.read(args.genomes_vds)
    .filter_intervals(Interval.parse(args.interval))
    .write(args.genomes_vds_out, overwrite=True))
예제 #6
0
#!./bin/pyhail.sh
import pyspark
import hail
from hail.representation import Interval
from hail import KeyTable
import os.path

hc = hail.HailContext(log='log/07_plink_export_45andup_subpops.log',
                      tmp_dir='tmp/hail')

interval_list = [Interval.parse('%d' % chrom) for chrom in range(1, 22)]
tier1_bed = KeyTable.import_bed(
    '../../locus-annotations/source_data/HG001_GRCh37_GIAB_highconf_CG-IllFB-IllGATKHC-Ion-10X-SOLID_CHROM1-X_v.3.3.2_highconf_nosomaticdel.bed'
)

(hc.read(
    '../MGRB.phase2.SNPtier12.match.vqsr.minrep.locusannot.WGStier12.unrelated.vds'
).filter_intervals(interval_list, keep=True).annotate_variants_table(
    tier1_bed, root='va.tier1bed').filter_variants_expr(
        'va.tier1bed == true', keep=True).annotate_variants_expr('va = {}').
 filter_samples_expr('"^B" ~ s').split_multi().min_rep().write(
     'tmp/MGRB.phase2.SNPtier12.match.vqsr.minrep.locusannot.WGStier12.unrelated.45andUp.GiaB_HCR.noannot.split.minrep.vds'
 ))

sample_lists = [
    '../45andup_followup_qcpass_anycancer_mf.sample_list',
    '../45andup_followup_qcpass_breastcancer_f.sample_list',
    '../45andup_followup_qcpass_breastcancer_mf.sample_list',
    '../45andup_followup_qcpass_colorectalcancer_mf.sample_list',
    '../45andup_followup_qcpass_melanomacancer_mf.sample_list',
    '../45andup_followup_qcpass_nocancer_f.sample_list',