def __init__(self, appName="PyHail", master=None, local='local[*]', log='hail.log', quiet=False, append=False, parquet_compression='uncompressed', block_size=1, branching_factor=50, tmp_dir='/tmp'): from pyspark import SparkContext SparkContext._ensure_initialized() self.gateway = SparkContext._gateway self.jvm = SparkContext._jvm self.jsc = scala_package_object(self.jvm.org.broadinstitute.hail.driver).configureAndCreateSparkContext( appName, joption(self.jvm, master), local, log, quiet, append, parquet_compression, block_size, branching_factor, tmp_dir) self.sc = SparkContext(gateway=self.gateway, jsc=self.jvm.JavaSparkContext(self.jsc)) self.jsql_context = scala_package_object(self.jvm.org.broadinstitute.hail.driver).createSQLContext(self.jsc) self.sql_context = SQLContext(self.sc, self.jsql_context)
def count(self, genotypes=False): """Return number of samples, varaints and genotypes. :param bool genotypes: If True, return number of called genotypes and genotype call rate. """ return (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver) .count(self.jvds, genotypes) .toJavaMap())
def count(self, genotypes=False): """Return number of samples, varaints and genotypes. :param bool genotypes: If True, return number of called genotypes and genotype call rate. """ try: return (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver) .count(self.jvds, genotypes) .toJavaMap()) except Py4JJavaError as e: self._raise_py4j_exception(e)
def make_keytable(self, variant_condition, genotype_condition, key_names): """Make a KeyTable with one row per variant. Per sample field names in the result are formed by concatening the sample ID with the genotype_condition left hand side with dot (.). If the left hand side is empty:: `` = expr then the dot (.) is ommited. **Example** Consider a ``VariantDataset`` ``vds`` with 2 variants and 3 samples:: Variant FORMAT A B C 1:1:A:T GT:GQ 0/1:99 ./. 0/0:99 1:2:G:C GT:GQ 0/1:89 0/1:99 1/1:93 Then:: >>> vds = hc.import_vcf('data/sample.vcf') >>> vds.make_keytable('v = v', 'gt = g.gt', gq = g.gq', []) returns a ``KeyTable`` with schema:: v: Variant A.gt: Int B.gt: Int C.gt: Int A.gq: Int B.gq: Int C.gq: Int in particular, the values would be:: v A.gt B.gt C.gt A.gq B.gq C.gq 1:1:A:T 1 NA 0 99 NA 99 1:2:G:C 1 1 2 89 99 93 :param variant_condition: Variant annotation expressions. :type variant_condition: str or list of str :param genotype_condition: Genotype annotation expressions. :type genotype_condition: str or list of str :param key_names: list of key columns :type key_names: list of str :rtype: KeyTable """ if isinstance(variant_condition, list): variant_condition = ','.join(variant_condition) if isinstance(genotype_condition, list): genotype_condition = ','.join(genotype_condition) jkt = (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver) .makeKT(self.jvds, variant_condition, genotype_condition, jarray(self.hc.gateway, self.hc.jvm.java.lang.String, key_names))) return KeyTable(self.hc, jkt)
def _raise_py4j_exception(self, e): msg = scala_package_object(self.jvm.org.broadinstitute.hail.utils).getMinimalMessage(e.java_exception) raise FatalError(msg, e.java_exception)