Esempio n. 1
    def aggregate_by_key(self, key_condition, agg_condition):
        """Group by key condition and aggregate results.


        Compute mean height by sex:

        >>> kt = hc.import_keytable("data/example.tsv")
        >>> kt_ht_by_sex = kt.aggregate_by_key("SEX = SEX", "MEAN_HT = HT.stats().mean")

        The key table ``kt`` has the following data:

        |   ID   |    HT    |    SEX   |
        |   1    |    65    |     M    |
        |   2    |    72    |     M    |
        |   3    |    70    |     F    |
        |   4    |    60    |     F    |

        The result of :py:meth:`.aggregate_by_key` is a KeyTable ``kt_ht_by_sex`` with the following data:

        |   SEX  |MEAN_HT   |
        |   M    |  68.5    |
        |   F    |   65     |


        The scope for both ``key_condition`` and ``agg_condition`` is all column names in the input :class:`KeyTable`.

        For more information, see the documentation on writing `expressions <../overview.html#expressions>`_
        and using the `Hail Expression Language <../reference.html#HailExpressionLanguage>`_.

        :param key_condition: Named expression(s) for how to compute the keys of the new key table.
        :type key_condition: str or list of str
        :param agg_condition: Named aggregation expression(s).
        :type agg_condition: str or list of str

        :return: A new key table with the keys computed from the ``key_condition`` and the remaining columns computed from the ``agg_condition``.
        :rtype: :class:`.KeyTable`
        if isinstance(key_condition, list):
            key_condition = ",".join(key_condition)

        if isinstance(agg_condition, list):
            agg_condition = ", ".join(agg_condition)

            return KeyTable(self.hc,
                            self._jkt.aggregate(key_condition, agg_condition))
        except Py4JJavaError as e:
Esempio n. 2
    def join(self, right, how='inner'):
        """Join two KeyTables together.


        Join ``kt1`` to ``kt2`` to produce ``kt3``:

        >>> kt1 = hc.import_keytable("data/example1.tsv")
        >>> kt2 = hc.import_keytable("data/example2.tsv")
        >>> kt3 = kt1.join(kt2)


        Hail supports four types of joins specified by ``how``:

         - **inner** -- Key must be present in both ``kt1`` and ``kt2``.
         - **outer** -- Key present in ``kt1`` or ``kt2``. For keys only in ``kt1``, the value of non-key columns from ``kt2`` is set to missing.
           Likewise, for keys only in ``kt2``, the value of non-key columns from ``kt1`` is set to missing.
         - **left** -- Key present in ``kt1``. For keys only in ``kt1``, the value of non-key columns from ``kt2`` is set to missing.
         - **right** -- Key present in ``kt2``. For keys only in ``kt2``, the value of non-key columns from ``kt1`` is set to missing.

        .. note::
            Both KeyTables must have identical key schemas and non-overlapping column names.

        :param  right: KeyTable to join
        :type right: :class:`.KeyTable`
        :param str how: Method for joining two tables together. One of "inner", "outer", "left", "right".

        :return: A key table that is the result of joining this key table with another.
        :rtype: :class:`.KeyTable`
            return KeyTable(self.hc, self._jkt.join(right._jkt, how))
        except Py4JJavaError as e:
Esempio n. 3
    def annotate(self, condition):
        """Add new columns computed from existing columns.


        Add new column ``Y`` which is equal to 5 times ``X``:

        >>> kt = (hc.import_keytable("data/example.tsv")
        >>>         .annotate("Y = 5 * X"))


        The scope for ``condition`` is all column names in the input :class:`KeyTable`.

        For more information, see the documentation on writing `expressions <../overview.html#expressions>`_
        and using the `Hail Expression Language <../reference.html#HailExpressionLanguage>`_.

        :param condition: Annotation expression or multiple annotation expressions.
        :type condition: str or list of str

        :return: A key table with new columns specified by ``condition``.
        :rtype: :class:`.KeyTable`
        if isinstance(condition, list):
            condition = ','.join(condition)

            return KeyTable(self.hc, self._jkt.annotate(condition))
        except Py4JJavaError as e:
Esempio n. 4
    def filter(self, condition, keep=True):
        """Filter rows.


        Keep rows where ``C1`` equals 5:

        >>> kt = (hc.import_keytable("data/example.tsv")
        >>>         .filter("C1 == 5"))

        Remove rows where ``C1`` equals 10:

        >>> kt = (hc.import_keytable("data/example.tsv")
        >>>         .filter("C1 == 10", keep=False))


        The scope for ``condition`` is all column names in the input :class:`KeyTable`.

        For more information, see the documentation on writing `expressions <../overview.html#expressions>`_
        and using the `Hail Expression Language <../reference.html#HailExpressionLanguage>`_.

        .. caution::
           When ``condition`` evaluates to missing, the row will be removed regardless of whether ``keep=True`` or ``keep=False``.

        :param str condition: Annotation expression.
        :param bool keep: Keep rows where ``condition`` evaluates to True.

        :return: A key table whose rows have been filtered by evaluating ``condition``.
        :rtype: :class:`.KeyTable`
            return KeyTable(self.hc, self._jkt.filter(condition, keep))
        except Py4JJavaError as e:
Esempio n. 5
    def key_by(self, key_names):
        """Change which columns are keys.


        Assume ``kt`` is a ``KeyTable`` with three columns: c1, c2 and
        c3 and key c1.

        Change key columns:

        >>> kt.key_by(['c2', 'c3'])

        Set to no keys:

        >>> kt.key_by([])


        The order of the columns will be the original order with the key
        columns moved to the beginning in the order given by ``key_names``.

        :param key_names: List of columns to be used as keys.
        :type key_names: list of str

        :return: A key table whose key columns are given by ``key_names``.
        :rtype: :class:`.KeyTable`

            return KeyTable(self.hc,
                  , key_names))
        except Py4JJavaError as e:
Esempio n. 6
    def rename(self, column_names):
        """Rename columns of KeyTable.

        ``column_names`` can be either a list of new names or a dict
        mapping old names to new names.  If ``column_names`` is a list,
        its length must be the number of columns in this ``KeyTable``.


        Rename using a list:

        >>> kt = hc.import_keytable('data/example.tsv')
        >>> kt_renamed = kt.rename(['newColumn1', 'newColumn2', 'newColumn3'])

        Rename using a dict:

        >>> kt = hc.import_keytable('data/example.tsv')
        >>> kt_renamed = kt.rename({'column1' : 'newColumn1'})

        :param column_names: list of new column names or a dict mapping old names to new names.
        :type list of str or dict of str: str

        :return: A key table with renamed columns.
        :rtype: :class:`.KeyTable`
            return KeyTable(self.hc, self._jkt.rename(column_names))
        except Py4JJavaError as e:
Esempio n. 7
    def count_rows(self):
        """Number of rows.

        :rtype: long
            return self._jkt.nRows()
        except Py4JJavaError as e:
Esempio n. 8
    def flatten(self):
        """Flatten nested Structs.  Column names will be concatenated with dot


        Flatten Structs in KeyTable:

        >>> (hc.import_keytable("data/example.tsv")
        >>>    .flatten())

        Consider a KeyTable ``kt`` with signature

        .. code-block:: text

            a: Struct {
                p: Int
                q: Double
            b: Int
            c: Struct {
                x: String
                y: Array[Struct {
                z: Map[Int]

        and a single key column ``a``.  The result of flatten is

        .. code-block:: text

            a.p: Int
            a.q: Double
            b: Int
            c.x: String
            c.y: Array[Struct {
                z: Map[Int]

        with key columns ``a.p, a.q``.

        Note, structures inside non-struct types will not be

        :return: A key table with no columns of type Struct.
        :rtype: :class:`.KeyTable`

            return KeyTable(self.hc, self._jkt.flatten())
        except Py4JJavaError as e:
Esempio n. 9
    def _run_command(self, vds, pargs):
        jargs = jarray(, pargs)
        t = self._hail.driver.ToplevelCommands.lookup(jargs)
        cmd = t._1()
        cmd_args = t._2()
        jstate = self._jstate(vds._jvds if vds != None else None)

            result =, cmd_args)
        except Py4JJavaError as e:

        return VariantDataset(self, result.vds())
Esempio n. 10
    def export(self, output, types_file=None):
        """Export to a TSV file.


        Rename column names of KeyTable and export to file:

        >>> (hc.import_keytable("data/example.tsv")
        >>>    .rename({'column1' : 'newColumn1'})
        >>>    .export("data/kt1_renamed.tsv"))

        :param str output: Output file path.
        :param str types_file: Output path of types file.
            self._jkt.export(self.hc._jsc, output, types_file)
        except Py4JJavaError as e:
Esempio n. 11
    def same(self, other):
        """Test whether two key tables are identical.


        >>> kt1 = hc.import_keytable("data/example1.tsv")
        >>> kt2 = hc.import_keytable("data/example2.tsv")
        >>> if kt1.same(kt2):
        >>>     print_function("KeyTables are the same!")

        :param other: key table to compare against
        :type other: :class:`.KeyTable` 

        :rtype: bool
            return self._jkt.same(other._jkt)
        except Py4JJavaError as e:
Esempio n. 12
    def exists(self, code):
        """Test whether a condition is true for any row.


        Test whether any row in the KeyTable has the value of ``C1`` equal to 5:

        >>> kt = hc.import_keytable('data/example.tsv')
        >>> if kt.exists("C1 == 5"):
        >>>     print_function("At least one row has C1 equal 5.")

        :param str code: Boolean expression.

        :rtype: bool
            return self._jkt.exists(code)
        except Py4JJavaError as e:
Esempio n. 13
    def expand_types(self):
        """Expand types Locus, Interval, AltAllele, Variant, Genotype, Char,
        Set and Dict.  Char is converted to String.  Set is converted
        to Array.  Dict[T] is converted to

        .. code-block:: text

            Array[Struct {
                key: String
                value: T

        :return: key table with signature containing only types:
          Boolean, Int, Long, Float, Double, Array and Struct
        :rtype: :class:`.KeyTable`
            return KeyTable(self.hc, self._jkt.expandTypes())
        except Py4JJavaError as e:
Esempio n. 14
    def to_dataframe(self, expand=True, flatten=True):
        """Converts this KeyTable to a Spark DataFrame.

        :param bool expand: If true, expand_types before converting to
        :param bool flatten: If true, flatten before converting to
          DataFrame.  If both are true, flatten is run after expand so
          that expanded types are flattened.

        :rtype: :class:`pyspark.sql.DataFrame`

            jkt = self._jkt
            if expand:
                jkt = jkt.expandTypes()
            if flatten:
                jkt = jkt.flatten()
            return DataFrame(jkt.toDF(self.hc._jsql_context),
        except Py4JJavaError as e:
Esempio n. 15
    def select(self, column_names):
        """Select a subset of columns.


        Assume ``kt`` is a ``KeyTable`` with three columns: C1, C2 and

        Select/drop columns:

        >>> new_kt =['C1'])

        Reorder the columns:

        >>> new_kt =['C3', 'C1', 'C2'])

        Drop all columns:

        >>> new_kt =[])


        The order of the columns will be the order given
        by ``column_names`` with the key columns moved to the beginning
        in the order of the key columns in this ``KeyTable``.

        :param column_names: List of columns to be selected.
        :type: list of str

        :return: A key table with selected columns in the order given by ``column_names``.
        :rtype: :class:`.KeyTable`

            new_key_names = [k for k in self.key_names if k in column_names]
            return KeyTable(self.hc,
                  , new_key_names))
        except Py4JJavaError as e:
Esempio n. 16
 def __repr__(self):
         return self._jkt.toString()
     except Py4JJavaError as e:
Esempio n. 17
    def explode(self, column_names):
        """Explode columns of this KeyTable.

        The explode operation unpacks the elements in a column of type ``Array`` or ``Set`` into its own row.
        If an empty ``Array`` or ``Set`` is exploded, the entire row is removed from the :py:class:`.KeyTable`.


        Assume ``kt`` is a :py:class:`.KeyTable` with three columns: c1, c2 and
        c3. The types of each column are ``String``, ``Array[Int]``, and ``Array[Array[Int]]`` respectively.
        c1 cannot be exploded because its type is not an ``Array`` or ``Set``.
        c2 can only be exploded once because the type of c2 after the first explode operation is ``Int``.

        | c1 |   c2     |   c3           |
        |  a | [1,2,NA] |[[3,4], []]     |

        Explode c2:

        >>> exploded_kt = (hc.import_keytable("data/example.tsv")
        >>>                  .explode('c2'))

        | c1 |   c2  |    c3           |
        |  a | 1     | [[3,4], []]     |
        |  a | 2     | [[3,4], []]     |

        Explode c2 once and c3 twice:

        >>> exploded_kt = (hc.import_keytable("data/example.tsv")
        >>>                  .explode(['c2', 'c3', 'c3']))

        | c1 |   c2  |   c3        |
        |  a | 1     |3            |
        |  a | 2     |3            |
        |  a | 1     |4            |
        |  a | 2     |4            |

        :param column_names: Column name(s) to be exploded.
        :type column_names: str or list of str
        :return: A key table with columns exploded.
        :rtype: :class:`.KeyTable`

            if isinstance(column_names, str):
                column_names = [column_names]
            return KeyTable(self.hc, self._jkt.explode(column_names))
        except Py4JJavaError as e: