def apply_and_cast(x, fn, dtype, skip_undefined):
     if is_missing(x) and skip_undefined: return []
     try:
         if skip_undefined:
             return [dtype(item) for item in fn(x) if not is_missing(item)]
         return [dtype(item) for item in fn(x)]
     except TypeError:
         return TypeError
 def extend(row, n_cols, na_value):
     if na_value is not None:
         if isinstance(row, list):
             row = [na_value if is_missing(x) else x for x in row]
         else:
             row = {x: na_value if is_missing(row[x]) else row[x] for x in row}
     if len(row) < n_cols:
         if isinstance(row, list):
             for i in range(len(row), n_cols):
                 row.append(na_value)
         else:
             for i in limit:
                 if i not in row: row[i] = na_value
     return row
 def do_cast(x, dtype, ignore_cast_failure):
     if is_missing(x): return x
     if type(x) == dtype:
         return x
     try:
         return dtype(x)
     except (ValueError, TypeError):
         # TODO: this does not seem to cach as it should
         return None if ignore_cast_failure else ValueError
 def num_missing(self):
     """
     Number of missing elements in the RDD.
     """
     self._entry()
     self.materialized = True
     res = self._rdd.aggregate(0,             # action
                               lambda acc, v: acc + 1 if is_missing(v) else acc,
                               lambda acc1, acc2: acc1 + acc2)
     self._exit()
     return res
    def drop_missing_values(self):
        """
        Create new RDD containing only the non-missing values of the
        RDD.

        A missing value shows up in an RDD as 'None'.  This will also drop
        float('nan').
        """
        self._entry()
        res = self._rdd.filter(lambda x: not is_missing(x))
        self._exit()
        return self._rv(res)
    def fill_missing_values(self, value):
        """
        Create new rdd with all missing values (None or NaN) filled in
        with the given value.

        The size of the new rdd will be the same as the original rdd. If
        the given value is not the same type as the values in the rdd,
        `fill_missing_values` will attempt to convert the value to the original rdd's
        type. If this fails, an error will be raised.
        """
        self._entry(value)
        res = self._rdd.map(lambda x: value if is_missing(x) else x)
        self._exit()
        return self._rv(res)
 def apply_and_cast(x, fn, dtype, skip_undefined):
     if is_missing(x) and skip_undefined: return None
     try:
         return dtype(fn(x))
     except TypeError:
         return TypeError
 def ne_zero(x):
     if is_missing(x): return False
     return x != 0