Esempio n. 1
0
 def _process_excel_file(self, filename):
     try:
         import xlrd
     except ImportError:
         raise TValueError("Module `xlrd` is required in order to read "
                           "Excel file '%s'. You can install this module "
                           "by running `pip install xlrd` in the command "
                           "line." % filename)
     if self._result is None:
         self._result = {}
     wb = xlrd.open_workbook(filename)
     for ws in wb.sheets():
         # If the worksheet is empty, skip it
         if ws.ncols == 0:
             continue
         # Assume first row contains headers
         colnames = ws.row_values(0)
         cols0 = [core.column_from_list(ws.col_values(i, start_rowx=1),
                                        -stype.str32.value)
                  for i in range(ws.ncols)]
         colset = core.columns_from_columns(cols0)
         res = Frame(colset.to_datatable(), names=colnames)
         self._result[ws.name] = res
     if len(self._result) == 0:
         self._result = None
     if len(self._result) == 1:
         self._result = [*self._result.values()][0]
Esempio n. 2
0
 def _compute_columns(self):
     ee = self._engine
     _dt = self.dt.internal
     cols = [_dt.column(i) for i in self._elems]
     if ee.get_source_rowindex():
         for _col in cols:
             ri = ee.get_final_rowindex(_col.rowindex)
             _col.replace_rowindex(ri)
     return core.columns_from_columns(cols)
Esempio n. 3
0
 def _compute_columns(self):
     ee = self._engine
     _dt = self.dt.internal
     cols = [_dt.column(self._start + i * self._step)
             for i in range(self._count)]
     if ee.get_source_rowindex():
         for _col in cols:
             ri = ee.get_final_rowindex(_col.rowindex)
             _col.replace_rowindex(ri)
     return core.columns_from_columns(cols)
Esempio n. 4
0
 def _compute_columns(self):
     if isinstance(self._engine, LlvmEvaluationEngine):
         fnptr = self._mapnode.get_result()
         rowindex = self._engine.rowindex
         if rowindex:
             nrows = rowindex.nrows
         else:
             nrows = self.dt.nrows
         return core.columns_from_mixed(self._elems, self.dt.internal,
                                        nrows, fnptr)
     else:
         ee = self._engine
         _dt = ee.dt.internal
         _ri = ee.rowindex
         ncols = len(self._elems)
         if ee.groupby:
             opfirst = reduce_opcodes["first"]
             n_reduce_cols = 0
             for elem in self._elems:
                 if isinstance(elem, int):
                     is_groupby_col = elem in ee.groupby_cols
                     n_reduce_cols += is_groupby_col
                 else:
                     n_reduce_cols += elem.is_reduce_expr(ee)
             expand_dataset = (n_reduce_cols < ncols)
             columns = ee.groupby_cols + self._elems
             self._names = ([ee.dt.names[i]
                             for i in ee.groupby_cols] + self._names)
             for i, elem in enumerate(columns):
                 if isinstance(elem, int):
                     col = core.expr_column(_dt, elem, _ri)
                     if not expand_dataset:
                         col = core.expr_reduceop(opfirst, col, ee.groupby)
                 else:
                     col = elem.evaluate_eager(ee)
                     if expand_dataset and elem.is_reduce_expr(ee):
                         col = col.ungroup(ee.groupby)
                 columns[i] = col
         else:
             columns = [
                 core.expr_column(_dt, e, _ri)
                 if isinstance(e, int) else e.evaluate_eager(ee)
                 for e in self._elems
             ]
         return core.columns_from_columns(columns)
 def _compute_columns(self):
     if isinstance(self._engine, LlvmEvaluationEngine):
         fnptr = self._mapnode.get_result()
         rowindex = self._engine.rowindex
         if rowindex:
             nrows = rowindex.nrows
         else:
             nrows = self.dt.nrows
         return core.columns_from_mixed(self._elems, self.dt.internal,
                                        nrows, fnptr)
     else:
         ee = self._engine
         _dt = ee.dt.internal
         _ri = ee.rowindex
         columns = [
             core.expr_column(_dt, e, _ri)
             if isinstance(e, int) else e.evaluate_eager(ee)
             for e in self._elems
         ]
         return core.columns_from_columns(columns)
Esempio n. 6
0
def make_datatable(dt,
                   rows,
                   select,
                   groupby=None,
                   join=None,
                   sort=None,
                   engine=None,
                   mode=None,
                   replacement=None):
    """
    Implementation of the `Frame.__call__()` method.

    This is the "main" function in the module; it is responsible for
    evaluating various transformations when they are applied to a target
    Frame.
    """
    if isinstance(groupby, datatable.join):
        join = groupby
        groupby = None
    update_mode = mode == "update"
    delete_mode = mode == "delete"
    jframe = join.joinframe if join else None
    with f.bind_datatable(dt), g.bind_datatable(jframe):
        ee = make_engine(engine, dt, jframe)
        ee.rowindex = dt.internal.rowindex
        rowsnode = make_rowfilter(rows, ee)
        grbynode = make_groupby(groupby, ee)
        colsnode = make_columnset(select, ee, update_mode)
        sortnode = make_sort(sort, ee)

        if join:
            join.execute(ee)

        if sortnode:
            if isinstance(rowsnode, AllRFNode) and not grbynode:
                rowsnode = SortedRFNode(sortnode)
            else:  # pragma: no cover
                raise NotImplementedError(
                    "Cannot yet apply sort argument to a view datatable or "
                    "combine with rows / groupby argument.")

        assert not delete_mode
        if update_mode:
            assert grbynode is None
            allrows = isinstance(rowsnode, AllRFNode)
            # Without `materialize`, when an update is applied to a view,
            # `rowsnode.execute()` will merge the rowindex implied by
            # `rowsnode` with its parent's rowindex. This will cause the
            # parent's data to be updated, which is wrong.
            dt.materialize()
            if isinstance(replacement, (int, float, str, type(None))):
                replacement = datatable.Frame([replacement])
                if allrows:
                    replacement = datatable.repeat(replacement, dt.nrows)
            elif isinstance(replacement, datatable.Frame):
                pass
            elif isinstance(replacement, BaseExpr):
                _col = replacement.evaluate_eager(ee)
                _colset = core.columns_from_columns([_col])
                replacement = _colset.to_frame(None)
            else:
                replacement = datatable.Frame(replacement)
            rowsnode.execute()
            colsnode.execute_update(dt, replacement)
            return

        rowsnode.execute()
        if grbynode:
            grbynode.execute(ee)

        colsnode.execute()
        res_dt = ee.columns.to_frame(colsnode.column_names)
        if grbynode and res_dt.nrows == dt.nrows:
            res_dt.internal.groupby = ee.groupby
        return res_dt

    raise RuntimeError("Unable to calculate the result")  # pragma: no cover