def import_keytable(self, path, key_names, npartitions=None, config=None): """Import delimited text file (text table) as KeyTable. :param path: files to import. :type path: str or list of str :param key_names: The name(s) of fields to be considered keys :type key_names: str or list of str :param npartitions: Number of partitions. :type npartitions: int or None :param config: Configuration options for importing text files :type config: :class:`.TextTableConfig` or None :rtype: :class:`.KeyTable` """ path_args = [] if isinstance(path, str): path_args.append(path) else: for p in path: path_args.append(p) if not isinstance(key_names, str): key_names = ','.join(key_names) if not npartitions: npartitions = self.sc.defaultMinPartitions if not config: config = TextTableConfig() return KeyTable(self, self.jvm.org.broadinstitute.hail.keytable.KeyTable.importTextTable( self.jsc, jarray(self.gateway, self.jvm.java.lang.String, path_args), key_names, npartitions, config.to_java(self)))
def run_command(self, vds, pargs): jargs = jarray(self.gateway, self.jvm.java.lang.String, pargs) t = self.jvm.org.broadinstitute.hail.driver.ToplevelCommands.lookup( jargs) cmd = t._1() cmd_args = t._2() result = cmd.run(self._jstate(vds.jvds if vds != None else None), cmd_args) return VariantDataset(self, result.vds())
def run_command(self, vds, pargs): jargs = jarray(self.gateway, self.jvm.java.lang.String, pargs) t = self.jvm.org.broadinstitute.hail.driver.ToplevelCommands.lookup(jargs) cmd = t._1() cmd_args = t._2() jstate = self._jstate(vds.jvds if vds != None else None) try: result = cmd.run(jstate, cmd_args) except Py4JJavaError as e: self._raise_py4j_exception(e) return VariantDataset(self, result.vds())
def make_keytable(self, variant_condition, genotype_condition, key_names): """Make a KeyTable with one row per variant. Per sample field names in the result are formed by concatening the sample ID with the genotype_condition left hand side with dot (.). If the left hand side is empty:: `` = expr then the dot (.) is ommited. **Example** Consider a ``VariantDataset`` ``vds`` with 2 variants and 3 samples:: Variant FORMAT A B C 1:1:A:T GT:GQ 0/1:99 ./. 0/0:99 1:2:G:C GT:GQ 0/1:89 0/1:99 1/1:93 Then:: >>> vds = hc.import_vcf('data/sample.vcf') >>> vds.make_keytable('v = v', 'gt = g.gt', gq = g.gq', []) returns a ``KeyTable`` with schema:: v: Variant A.gt: Int B.gt: Int C.gt: Int A.gq: Int B.gq: Int C.gq: Int in particular, the values would be:: v A.gt B.gt C.gt A.gq B.gq C.gq 1:1:A:T 1 NA 0 99 NA 99 1:2:G:C 1 1 2 89 99 93 :param variant_condition: Variant annotation expressions. :type variant_condition: str or list of str :param genotype_condition: Genotype annotation expressions. :type genotype_condition: str or list of str :param key_names: list of key columns :type key_names: list of str :rtype: KeyTable """ if isinstance(variant_condition, list): variant_condition = ','.join(variant_condition) if isinstance(genotype_condition, list): genotype_condition = ','.join(genotype_condition) jkt = (scala_package_object(self.hc.jvm.org.broadinstitute.hail.driver) .makeKT(self.jvds, variant_condition, genotype_condition, jarray(self.hc.gateway, self.hc.jvm.java.lang.String, key_names))) return KeyTable(self.hc, jkt)