Example #1
0
    def __init__(self, appName="PyHail", master=None, local='local[*]',
                 log='hail.log', quiet=False, append=False, parquet_compression='uncompressed',
                 block_size=1, branching_factor=50, tmp_dir='/tmp'):
        from pyspark import SparkContext
        SparkContext._ensure_initialized()

        self.gateway = SparkContext._gateway
        self.jvm = SparkContext._jvm

        self.jsc = scala_package_object(self.jvm.org.broadinstitute.hail.driver).configureAndCreateSparkContext(
            appName, joption(self.jvm, master), local,
            log, quiet, append, parquet_compression,
            block_size, branching_factor, tmp_dir)
        self.sc = SparkContext(gateway=self.gateway, jsc=self.jvm.JavaSparkContext(self.jsc))

        self.jsql_context = scala_package_object(self.jvm.org.broadinstitute.hail.driver).createSQLContext(self.jsc)
        self.sql_context = SQLContext(self.sc, self.jsql_context)
Example #2
0
    def count(self, genotypes=False):
        """Return number of samples, varaints and genotypes.

        :param bool genotypes: If True, return number of called
            genotypes and genotype call rate.

        """

        return (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver)
                .count(self.jvds, genotypes)
                .toJavaMap())
Example #3
0
    def count(self, genotypes=False):
        """Return number of samples, varaints and genotypes.

        :param bool genotypes: If True, return number of called
            genotypes and genotype call rate.

        """

        try:
            return (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver)
                    .count(self.jvds, genotypes)
                    .toJavaMap())
        except Py4JJavaError as e:
            self._raise_py4j_exception(e)
Example #4
0
    def make_keytable(self, variant_condition, genotype_condition, key_names):
        """Make a KeyTable with one row per variant.

        Per sample field names in the result are formed by concatening
        the sample ID with the genotype_condition left hand side with
        dot (.).  If the left hand side is empty::

          `` = expr

        then the dot (.) is ommited.

        **Example**

        Consider a ``VariantDataset`` ``vds`` with 2 variants and 3 samples::

          Variant	FORMAT	A	B	C
          1:1:A:T	GT:GQ	0/1:99	./.	0/0:99
          1:2:G:C	GT:GQ	0/1:89	0/1:99	1/1:93

        Then::

          >>> vds = hc.import_vcf('data/sample.vcf')
          >>> vds.make_keytable('v = v', 'gt = g.gt', gq = g.gq', [])

        returns a ``KeyTable`` with schema::

          v: Variant
          A.gt: Int
          B.gt: Int
          C.gt: Int
          A.gq: Int
          B.gq: Int
          C.gq: Int

        in particular, the values would be::

          v	A.gt	B.gt	C.gt	A.gq	B.gq	C.gq
          1:1:A:T	1	NA	0	99	NA	99
          1:2:G:C	1	1	2	89	99	93

        :param variant_condition: Variant annotation expressions.
        :type variant_condition: str or list of str

        :param genotype_condition: Genotype annotation expressions.
        :type genotype_condition: str or list of str

        :param key_names: list of key columns
        :type key_names: list of str

        :rtype: KeyTable

        """
        
        if isinstance(variant_condition, list):
            variant_condition = ','.join(variant_condition)
        if isinstance(genotype_condition, list):
            genotype_condition = ','.join(genotype_condition)

        jkt = (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver)
               .makeKT(self.jvds, variant_condition, genotype_condition,
                       jarray(self.hc.gateway, self.hc.jvm.java.lang.String, key_names)))
        return KeyTable(self.hc, jkt)
Example #5
0
 def _raise_py4j_exception(self, e):
     msg = scala_package_object(self.jvm.org.broadinstitute.hail.utils).getMinimalMessage(e.java_exception)
     raise FatalError(msg, e.java_exception)