Example #1
 def setEpilogue(self, epilogue_formula):
   :param str epilogue_formula: New value for the Epilogue formula
   :return str: Error or None
   self._epilogue = FormulaStatement(epilogue_formula, EPILOGUE_NAME)
   return self._epilogue.do()
Example #2
 def setFormula(self, formula):
 A formula is a valid python expression for the execution context.
 Inputs: formula - valid python expression
 Outputs: error - string giving error encountered
     self._formula_statement = FormulaStatement(
         formula, self.getName(is_global_name=False))
     return self._formula_statement.do()
Example #3
 def __init__(self,
 :param str name: Name of column
 :param DataClass data_class: Class for data
 :param bool asis: opaque data if True
     super(Column, self).__init__(name)
     if name.find(".") > 0:
         import pdb
     self._cells = []
     self._formula_statement = FormulaStatement(
         None, self.getName(is_global_name=False))
     self._data_class = data_class
Example #4
 def _formulaStatementFromFile(self, filepath, name):
   Reads the file contents and creates the FormulaStatement object.
   :param str filepath: path to file to read
   :param str name: name of the formula
   :returns str: file contents
   with open(filepath, 'r') as f:
     lines = f.readlines()
   statements = ''.join(lines)
   return FormulaStatement(statements, name)
Example #5
class Column(NamedTree):
  Representation of a column in a table. A column is a container
  of cells.

    is_always_leaf = True  # Cannot add/modify children

    def __init__(self,
    :param str name: Name of column
    :param DataClass data_class: Class for data
    :param bool asis: opaque data if True
        super(Column, self).__init__(name)
        if name.find(".") > 0:
            import pdb
        self._cells = []
        self._formula_statement = FormulaStatement(
            None, self.getName(is_global_name=False))
        self._data_class = data_class

    def getSerializationDict(self, class_variable):
    Method required to serialize this class.
    :param str class_variable: key to use for class name
    :return dict:
    1. Does not save self._parent (in Tree). This is set by fixups
       done in deserialize().
        if self.getDataClass().cls != ExtendedArray:
            raise ValueError("Only serialize ExtendedArray")
        serialization_dict = {
            class_variable: str(self.__class__),
            "_name": self.getName(is_global_name=False),
            "_asis": self.getAsis(),
            "_cells": self.getCells(),
            "_formula": self.getFormula(),
        return serialization_dict

    def deserialize(cls, serialization_dict):
    Creates a column object and does fixups.
    :param dict serialization_dict: created by getSerializationDict
    :return Column:
        column = Column(serialization_dict["_name"])
        column.addCells(serialization_dict["_cells"], replace=True)
        return column

    def _adjustValue(value):
    Handles the case of iterables vs. single values.
    :param value: list or iterable
    :return list: values as a list
        if isinstance(value, list):
            new_data_list = value
        elif 'tolist' in dir(value):
            new_data_list = value.tolist()
            new_data_list = [value]
        return new_data_list

    def addCells(self, value, replace=False):
    :param value: value(s) to add
    :param bool replace: if True, then replace existing cells
        new_data_list = Column._adjustValue(value)
        # Construct the full list
        if replace:
            full_data_list = new_data_list
            full_data_list = self._cells

    def copy(self, instance=None):
    :param Column column:
    :returns Column: copy of this object
        # Create an object if one is not provided
        if instance is None:
            instance = Column(self.getName(is_global_name=False))
        # Copy properties from inherited classes
        instance = super(Column, self).copy(instance=instance)
        # Set properties specific to this class
        return instance

    def deleteCells(self, indicies):
    Input: indicies - list of indicies to delete
        data_list = self._cells
        for index in indicies:
            del data_list[index]

    def getAsis(self):
    :return bool asis:
        return self._asis

    def getCell(self, index):
    Returns the value of a single cell
    Input: index - index of the cell to select
        return self._cells[index]

    def getCells(self):
    Returns the cells of the column as a numpy array
        return self._cells

    def getTypeForCells(self):
    :return cell_type.XType: type assigned to the column of cells
        return cell_types.getIterableType(self._cells)

    def getDataClass(self):
    Returns the class (e.g., np.array, Trinary)
        return self._data_class

    def getArrayType(self):
    :return: np.ndarray type if array; else, None
        if (self._data_class.cls == np.ndarray)  \
            or (self._data_class.cls == ExtendedArray):
            return np.array(self._cells).dtype
            return None

    def getFormula(self):
    Returns formula for the column
        return self._formula_statement.getFormula()

    def getFormulaStatement(self):
    Returns the formula as a python statement
        return self._formula_statement.getStatement()

    def getFormulaStatementObject(self):
    Returns the FormulaStatement object
        return self._formula_statement

    def insertCell(self, val, index=None):
    :param val: value to insert
    :param index: where it is inserted, appended to end if None
        data_list = self._cells
        if index is None:
            index = len(self._cells)
        data_list.insert(index, val)

    def isEquivalent(self, other, is_exception=False):
    Compares the internal state of this and the input other,
    except the owning table.
    :param Column other:
    :param bool is_exception:
    :return bool:
        msg = None
        if not super(Column, self).isEquivalent(other,
            msg = "Columns %s do not agree because of ancestor." % self.getName(
        elif not self.getFormulaStatementObject().isEquivalent(
            msg = "Columns %s do not agree on formulas." % self.getName()
        elif not self.getAsis() == other.getAsis():
            msg = "Columns %s do not agree on asis property." % self.getName()
        elif not self.getDataClass() == other.getDataClass():
            type_list = [np.ndarray, ExtendedArray]
            is_ok = (self.getDataClass().cls in type_list)  \
               and (other.getDataClass().cls in type_list)
            if not is_ok:
                msg = "Columns %s do not agree on data class." % self.getName()
        elif not cell_types.isEquivalentData(self._cells, other.getCells()):
            msg = "Columns %s do not agree on data." % self.getName()
        if msg is None:
            return True
        if is_exception:
            raise AssertionError(msg)
            return False

    def isExpression(self):
        return self._formula_statement.isExpression()

    def isFloats(self):
    :return: True if a column of numbers
        return cell_types.isFloats(self.getCells())

    def numCells(self):
    Returns the number of cells in the column
        return len(self._cells)

    def prunedCells(self):
    Returns cells in the column, excluding ending Nulls
        return pruneNulls(self._cells)

    # ToDo: Test
    def replaceCells(self, new_data):
    :param new_data: array to replace existing data
        #if len(new_data) != len(self._cells):
        #  raise RuntimeError("Inconsistent lengths")

    def _setDatavalues(self, values):
    Sets the values for the cell
    :param values: singleton or iterable
        if self._asis:
            self._cells = values
            self._cells = cell_types.coerceData(values)

    def setAsis(self, asis):
    :param bool asis:
        self._asis = asis

    def setDataClass(self, data_class):
    Sets the class (e.g., np.array, Trinary)
        self._data_class = data_class

    def setFormula(self, formula):
    A formula is a valid python expression for the execution context.
    Inputs: formula - valid python expression
    Outputs: error - string giving error encountered
        self._formula_statement = FormulaStatement(
            formula, self.getName(is_global_name=False))
        return self._formula_statement.do()

    def cleanName(name):
    Removes blanks and other junk.
    :param str name:
    :return str:
        return name.replace(" ", "")

    def setName(self, name):
    Sets the column name
        stripped_name = Column.cleanName(name)
        if Column.isPermittedName(stripped_name) is None:
            super(Column, self).setName(stripped_name)
            raise RuntimeError("%s is an invalid name" % name)

    def setTable(self, table):
    Sets the table being used for this column

    def isPermittedName(name):
    Verifies that this is a valid name for a column
    Input: name - proposed column name (str)
    Output: error - string if an error
                    None if not an error
            statement = "%s = 3" % name
            _ = compile(statement, "string", "exec")
            error = None
        except SyntaxError as err:
            error = "%s produced the error: %s" % (name, str(err))
        return error

    def updateCell(self, val, index):
    Input: val - value to insert
           index - index of cell being updated
                   appended to end if None
        values = self._cells
        values[index] = val
Example #6
 def setPrologue(self, prologue_formula):
   :param str prologue_formula: New value for the Prologue formula
   self._prologue = FormulaStatement(prologue_formula, PROLOGUE_NAME)
   return self._prologue.do()
Example #7
 def setEpilogue(self, epilogue_formula):
   :param str epilogue_formula: New value for the Epilogue formula
   self._epilogue = FormulaStatement(epilogue_formula, EPILOGUE_NAME)
   return self._epilogue.do()
Example #8
class Table(ColumnContainer):
  Implements full table functionality.
  Feature 1: Maintains consistency
    between columns as to column lengths
    column names are unique
  Feature 2: Knows about rows
    add rows
    delete rows
    rows have a name as specified in the row column
  The primary object for referencing a column is the column object.
  The primary object for referencing a row is the row index

  def __init__(self, name):
    super(Table, self).__init__(name)
    self._namespace = {}  # Namespace for formula evaluation
    self._prologue = self._formulaStatementFromFile(PROLOGUE_FILEPATH,
    self._epilogue = self._formulaStatementFromFile(EPILOGUE_FILEPATH,
    self._is_evaluate_formulas = True

  def getSerializationDict(self, class_variable):
    :param str class_variable: key to use for the class name
    :return dict: dictionary encoding the Table object and its columns
    serialization_dict = {}
    serialization_dict[class_variable] = str(self.__class__)
    filepath = self.getFilepath()
    if self.getFilepath() is not None:
      if ut.getFileExtension(self.getFilepath()) != settings.SCISHEETS_EXT:
        filepath = ut.changeFileExtension(self.getFilepath(), 
    more_dict = {
        "_name": self.getName(is_global_name=False),
        "_prologue_formula": self.getPrologue().getFormula(),
        "_epilogue_formula": self.getEpilogue().getFormula(),
        "_is_evaluate_formulas": self.getIsEvaluateFormulas(),
        "_filepath": filepath,
    _children = []
    for child in self.getChildren():
      if not Table.isNameColumn(child):
    serialization_dict["_children"] = _children
    return serialization_dict

  def deserialize(cls, serialization_dict, instance=None):
    Deserializes a table object and does fix ups.
    :param dict serialization_dict: container of parameters for deserialization
    :return Table:
    if instance is None:
      table = Table(serialization_dict["_name"])
      table = instance
    if serialization_dict["_filepath"] is not None:
    if "_children" in serialization_dict.keys():
      child_dicts = serialization_dict["_children"]
    elif "_columns" in serialization_dict.keys():
      child_dicts = serialization_dict["_columns"]
      raise ValueError("Cannot find children for %s" % table.getName())
    for child_dict in child_dicts:
      # Handle older serializations
      if not child_dict['_name'] == NAME_COLUMN_STR:
        new_child = deserialize(json.dumps(child_dict))
    return table

  # The following methods are used in debugging

  def d(self):
    return [(c.getName(), c.getCells()) for c in self.getColumns()]

  def f(self):
    return [(c.getName(), c.getFormula()) for c in self.getColumns()]

  def setCapture(self, filename, data):
    dc = DataCapture(filename)
  def getIsEvaluateFormulas(self):
    return self._is_evaluate_formulas
  # Internal and other methods

  # TODO: Tests with multiple levels of subtable
  def _updateNameColumn(self, nrows_table=None):
    Changes the cells in the name column of the table
    to be consecutive ints.
    :paam int nrows_table: Number of rows in the table
    if nrows_table is None:
      nrows_table = self.numRows()
    names = []
    for row_num in range(nrows_table):
    for column in self.getLeaves():
      if Table.isNameColumn(column):
        column.addCells(list(names), replace=True)

  def _formulaStatementFromFile(self, filepath, name):
    Reads the file contents and creates the FormulaStatement object.
    :param str filepath: path to file to read
    :param str name: name of the formula
    :returns str: file contents
    with open(filepath, 'r') as f:
      lines = f.readlines()
    statements = ''.join(lines)
    return FormulaStatement(statements, name)

  # Data columns are those that have user data. The "row" column is excluded.
  def getDataColumns(self):
    Returns the columns other than the name column
    return [c for c in self.getColumns() if not Table.isNameColumn(c)]

  def getNameColumn(self):
    Gets the name column for this table.
    :return Column:
    columns = [c for c in self.getColumns() 
               if Table.isNameColumn(c) and c.getParent() == self]
    if len(columns) != 1:
      raise RuntimeError("Should have exactly one name column")
    return columns[0]

  def getData(self):
    :return dict: keys are global column names
    return {c.getName(): list(c.getCells())
            for c in self.getColumns(is_recursive=True)
            if not Table.isNameColumn(c)}

  def getEpilogue(self):
    :return FormulaStatement:
    return self._epilogue

  def getFormulaColumns(self):
    :return list-of-Column:
    result = [c for c in self.getColumns() if c.getFormula() is not None]
    return result

  def getRow(self, row_index=None):
    :param row_index: row desired
           if None, then a row of None is returned
    :return: Row object
    row = Row()
    for column in self.getColumns(is_recursive=True):
      if row_index is None:
        if column.isFloats():
          row[column.getName()] = np.nan  # pylint: disable=E1101
          row[column.getName()] = None
        row[column.getName()] = column.getCells()[row_index]
    return row

  def getNamespace(self):
    return self._namespace

  def getPrologue(self):
    :return FormulaStatement:
    return self._prologue

  # TODO: Verify the index
  def _rowNameFromIndex(index):
    Create the row name from its index
    return str(index + 1)

  def _coerceNameColumnToStr(self):
    Makes sure that row names are strings
    column = self.columnFromName(NAME_COLUMN_STR)
    if column is None:
      import pdb; pdb.set_trace()
    values = [str(v) for v in column.getCells()]

  # TODO: Verify the index
  def _rowNamesFromSize(size):
    :param size: number of rows
    :return: array of names
    return [str(n) for n in range(1, size+1)]

  def _createNameColumn(self):
    Creates the name column for the table
    column = Column(NAME_COLUMN_STR, asis=True)

  def adjustColumnLength(self):
    Inserts values of None or np.nan so that column
        has the same length as the table
    none_array = np.array([None])
    num_rows = self.numRows()
    for column in self.getColumns():
      adj_rows = num_rows - column.numCells()
      if adj_rows > 0:
        if column.isFloats():
          column.addCells(np.repeat(np.nan, adj_rows))  # pylint:disable=E1101
          column.addCells(np.repeat(none_array, adj_rows))

  def _validateTable(self):
    Checks that the table is internally consistent
    Verify that there is at least one column
    if len(self.getColumns()) < 1:
      raise er.InternalError("Table %s has no columns." % self._name)
    # Verify that all columns have the same number of cells
    name_column = self.columnFromName(NAME_COLUMN_STR)
    if name_column is None:
      import pdb; pdb.set_trace()
    num_rows = self.numRows()
    for column in self.getColumns():
      if  column.numCells() != num_rows:
        import pdb; pdb.set_trace()
        msg = "In Table %s, Column %s differs in its number of rows." \
            % (self.getName(), column.getName())
        raise er.InternalError(msg)
    # Verify that the first Column is the Name Column
    if self.getChildAtPosition(0).getName(is_global_name=False) != NAME_COLUMN_STR:
      msg = "In Table %s, first column is not 'row' column" % self.getName()
      raise er.InternalError(msg)
    # Verify that names are unique
    if self.validateTree() is not None:
      raise RuntimeError(self.validateTree())
    # Verify the sequence of row names
    for nrow in range(self.numRows()):
      expected_row_name = Table._rowNameFromIndex(nrow)
      actual_row_name =  \
      if actual_row_name != expected_row_name:
        import pdb; pdb.set_trace()
        msg = "In Table %s, invalid row name at index %d: %s" % \
                (self.getName(), nrow, actual_row_name)
        raise er.InternalError(msg)
    # Verify that the name columns are identical
    for column in self.getColumns():
      if Table.isNameColumn(column):
        if not column.getCells() == name_column.getCells():
          raise RuntimeError("%s is not a consistent name column" % column.getName())

  def addCells(self, column, cells, replace=False):
    Adds to the column
    :param Column column:
    :param list cells:
    column.addCells(cells, replace=replace)

  def addColumn(self, column, index=None):
    Adds a column to the table.
    Adjusts the Column length to that of the table
    :param column: column object
    :param int index: position for the new column
    :return: error text if there is a problem with the column
                    None if no problem
    Notes: (1) A new column may have either no cells
               or the same number as the existing table
    error = None
    # Check for problems with this column
    is_ok = all([c.getName(is_global_name=False) 
        != column.getName(is_global_name=False) 
        for c in self.getChildren()])
    if not is_ok:
      error = "**%s is a duplicate name" % column.getName()
      return error
      error = cl.Column.isPermittedName(  \
      if error is not None:
        return error
    if index is None:
      index = len(self.getColumns())
    # Handle the different cases of adding a column
    self.addChild(column, position=index)
    # Case 1: First column after name column
    if self.numColumns() == 1:
    # Case 2: Subsequent columns

  def addRow(self, row, row_index=None):
    :param Row row: Row to add
    :param int row_index: index where Row is added, may be a float
                       if None, then appended
    # Determine the actual desired name
    if row_index is None:
      proposed_name = Table._rowNameFromIndex(self.numRows())
      proposed_name = Table._rowNameFromIndex(row_index)
    # Assign values to the last row of each column cells
    for column in self.getColumns(is_recursive=True):
      if column.getName(is_global_name=False) != NAME_COLUMN_STR:
        cur_name = column.getName()
        if cur_name in row:
    last_index = self.numRows() - 1
    self.renameRow(last_index, proposed_name)  # put the row in the right place

  def copy(self, instance=None):
    Returns a copy of this object
    :param Table instance:
    # Create an object if none provided
    if instance is None:
      instance = Table(self.getName(is_global_name=False))
    name_column = instance.columnFromName(NAME_COLUMN_STR)
    instance.deleteColumn(name_column)  # Avoid duplicate
    # Copy everything required from inherited classes
    super(Table, self).copy(instance=instance)
    # Set properties specific to this class
    return instance

  def deleteColumn(self, column):
    Deletes a column from the table.
    :param column: column obj to delete

  def deleteRows(self, indicies):
    Deletes rows
    :param indicies: index of rows to delete
    for column in self.getColumns():

  def export(self, **kwargs):
    Exports the table to a python program
    :return: error - string from the file export
    table_evaluator = TableEvaluator(self)
    error = table_evaluator.export(**kwargs)
    return error

  def evaluate(self, user_directory=None):
    Evaluates formulas in the table
    :param user_directory: full directory path where user modules
                            are placed
    :return: error from table evaluation or None
    evaluator = TableEvaluator(self)
    error = evaluator.evaluate(user_directory=user_directory)
    return error

  def isColumnPresent(self, column_name):
    :param str column_name:
    :return bool: True if column is present
    return any([c.getName() == column_name for c in self.getColumns()])

  def isEquivalent(self, other_table):
    Checks that the tables have the same values of their properties,
    excluding the VersionedFile.
    :param Table other_table:
    :returns bool:
    local_debug = True # Breaks on specifc reasons for non-equiv
    if not isinstance(other_table, self.__class__):
      if local_debug:
        import pdb; pdb.set_trace()
      return False
    is_same_properties = (self.getName(is_global_name=False) == other_table.getName(is_global_name=False)) and  \
        (self.numColumns() == other_table.numColumns()) and  \
        (self.getPrologue().isEquivalent(other_table.getPrologue())) and  \
    if not is_same_properties:
      if local_debug:
        import pdb; pdb.set_trace()
      return False
    if not super(Table, self).isEquivalent(other_table):
      if local_debug:
        import pdb; pdb.set_trace()
      return False
    return True

  def isNameColumn(column):
    Determines if this is a name column
    :param Column column:
    :return bool: True if name column
    path = column.pathFromGlobalName(column.getName())
    return path[-1] == NAME_COLUMN_STR
  def insertRow(self, row, index=None):
    Inserts the row in the desired index in the table and
    assigns the value of the NAME_COLUMN
    :param row: a Row
    :param index: index in the table where the row is inserted
    idx = index
    if idx is None:
      idx = self.numRows()
    for child in self.getLeaves():
      if ColumnContainer.isColumn(child):
        name = child.getName(is_global_name=False)
        if name in row.keys():
          child.insertCell(row[name], idx)
          child.insertCell(None, idx)

  def moveRow(self, index1, index2):
    Moves the row at index1 to index2
    row = self.getRow(row_index=index1)
    self.insertRow(row, index2)

  def numRows(self):
    Returns the number of rows in the table
    return max([c.numCells() for c in self.getColumns()])

  # TODO: This won't work with nested columns
  def refactorColumn(self, cur_colnm, new_colnm):
    Changes the column name and its occurrences in formulas in the table.
    :param str cur_colnm: Current name of the column
    :param str new_colnm: New name of the column
    :returns list-of-str changed_columns:
    :raises ValueError: column name is unknown
    def changeFormula(formula_statement):
      Changes the formula by replacing occurrences of
      cur_colnm with new_colnm
      :param FormulaStatement formula_satement:
      :returns str/None: new formula or None
      formula = formula_statement.getFormula()
      if cur_colnm in formula:
        return formula.replace(cur_colnm, new_colnm)
        return None

    column = self.columnFromName(cur_colnm)
    if column is None:
      raise ValueError("Column %s does not exist." % cur_colnm)
    columns = self.getColumns()
    changed_columns = []
      # Do the Columns
      for col in self.getFormulaColumns():
        new_formula = changeFormula(col.getFormulaStatementObject())
        if new_formula is not None:
      # Handle Prologue
      new_formula = changeFormula(self.getPrologue())
      if new_formula is not None:
      # Handle Epilogue
      new_formula = changeFormula(self.getEpilogue())
      if new_formula is not None:
    except Exception as err:
      msg = '''Changing column name from %s to %s.
Encountered error %s.
Changed formulas in columns %s.''' % (cur_colnm, new_colnm,
    str(err), ' '.join(changed_columns))
    return changed_columns

  def rowIndexFromName(name):
    Returns the row index for the row name
    return int(name) - 1

  def renameColumn(self, column, proposed_name):
    Renames the column, checking for a duplicate
    :param column: column object
    :param proposed_name: str, proposed name
    :return: Boolean indicating success or failure
    names = [c.getName(is_global_name=False) for c in self.getChildren()]
    bool_test = all([name != proposed_name for name in names])
    if bool_test:
    return bool_test

  def renameRow(self, row_index, proposed_name):
    Renames the row so that it is an integer value
    that creates the row ordering desired.
    Handles subtrees by making their name columns
    the same length as the root.
    :param row_index: index of the row to change
    :param proposed_name: string of a number
    root = self.getRoot()
    name_column = root.columnFromName(NAME_COLUMN_STR)
    names = name_column.getCells()
      names[row_index] = str(proposed_name)
      import pdb; pdb.set_trace()
      float_names = [float(x) for x in names]
      import pdb; pdb.set_trace()
    sel_index = np.argsort(float_names)
    new_names = Table._rowNamesFromSize(len(names))
    for column in self.getChildren(is_recursive=True):
      if Table.isNameColumn(column):
    # Update the order of values in each column
    for column in self.getLeaves():
      if not Table.isNameColumn(column):
        data = column.getCells()
        new_data = [data[n] for n in sel_index]

  def setNamespace(self, namespace):
    self._namespace = namespace
  def setIsEvaluateFormulas(self, setting):
    self._is_evaluate_formulas = setting

  def setEpilogue(self, epilogue_formula):
    :param str epilogue_formula: New value for the Epilogue formula
    self._epilogue = FormulaStatement(epilogue_formula, EPILOGUE_NAME)
    return self._epilogue.do()

  def setPrologue(self, prologue_formula):
    :param str prologue_formula: New value for the Prologue formula
    self._prologue = FormulaStatement(prologue_formula, PROLOGUE_NAME)
    return self._prologue.do()

  def trimRows(self):
    Removes all consequative rows at the end of the table
    that have None values in the data columns
    num_rows = self.numRows()
    row_indexes = range(num_rows)
    for index in row_indexes:
      row = self.getRow(row_index=index)
      # Delete all of the name columns
      for colnm in row.keys():
        column = self.columnFromName(colnm, is_relative=False)
        if column is None:
          import pdb; pdb.set_trace()
        if Table.isNameColumn(column):
          del row[column.getName()]
      delete_row = True
      for name in row.keys():
        column = self.columnFromName(name)
        if not isNull(row[name]):
          delete_row = False
      if delete_row:

  def updateCell(self, value, row_index, column_index):
    Changes the value of the identified cell
    :param value: new value for the cell
    :param row_index: 0-based index of the row
    :param column_index: 0-based index of the column
    column = self.columnFromIndex(column_index)
    column.updateCell(value, row_index)

  def updateColumn(self, column, cells):
    Replaces the cells in the column with those provided
    :param column: column to update
    :param cells: cells to change
    column.addCells(cells, replace=True)

  def updateRow(self, row, index):
    Updates the row in place. Only changes values
    Assigns the value of the NAME_COLUMN
    that are specified in row.
    :param row: Row
    :param index: index of row to change
    row[NAME_COLUMN_STR] = Table._rowNameFromIndex(index)
    for name in row:
      column = self.columnFromName(name)
      if not Table.isNameColumn(column):
        column.updateCell(row[name], index)
Example #9
 def setPrologue(self, prologue_formula):
   :param str prologue_formula: New value for the Prologue formula
   self._prologue = FormulaStatement(prologue_formula, PROLOGUE_NAME)
   return self._prologue.do()
Example #10
class Table(ColumnContainer):
  Implements full table functionality.
  Feature 1: Maintains consistency
    between columns as to column lengths
    column names are unique
  Feature 2: Knows about rows
    add rows
    delete rows
    rows have a name as specified in the row column
  The primary object for referencing a column is the column object.
  The primary object for referencing a row is the row index

  def __init__(self, name):
    super(Table, self).__init__(name)
    self._namespace = {}  # Namespace for formula evaluation
    if self.getParent() is None:
      self._prologue = self._formulaStatementFromFile(PROLOGUE_FILEPATH,
      self._epilogue = self._formulaStatementFromFile(EPILOGUE_FILEPATH,
      self._prologue = None
      self._epilogue = None
    self._is_evaluate_formulas = True

  def createRandomTable(cls, name, nrow, ncol, ncolstr=0,
        low_int=0, hi_int=100, table_cls=None):
    Creates a table with random integers as values
    Input: name - name of the table
           nrow - number of rows
           ncol - number of columns
           ncolstr - number of columns with strings
           low_int - smallest integer
           hi_int - largest integer
           table_cls - Table class to use; default is Table
    if table_cls is None:
      table_cls = cls
    ncol = int(ncol)
    nrow = int(nrow)
    table = cls(name)
    ncolstr = min(ncol, ncolstr)
    ncolint = ncol - ncolstr
    c_list = range(ncol)
    for n in range(ncol):
      column = Column("Col_" + str(n))
      if c_list[n] <= ncolint - 1:
        values = np.random.randint(low_int, hi_int, nrow)
        values_ext = values.tolist()
        values_ext = ut.randomWords(nrow)
    return table

  def createRandomHierarchicalTable(cls, name, nrow, num_nodes, 
      prob_child, ncolstr=0, low_int=0, hi_int=100, prob_detach=0,
    Creates a table with random integers as values
    :param str name: name of the table
    :param int nrow: number of rows
    :param float prob_child: probability that next node is a child
    :param str ncolstr: number of columns with strings
    :param int low_int: smallest integer
    :param int hi_int: largest integer
    :param float prob_detach: probability that a subtree is detached
    :parm Type table_cls: Table class to use; default is Table
    :return table_cls:
    if table_cls is None:
      table_cls = cls
    # Create the schema for the Hierarchical Table
    htable = super(Table, cls).createRandomNamedTree(num_nodes, 
        prob_child, leaf_cls=Column, prob_detach=prob_detach, 
    leaves = [c for c in htable.getLeaves() 
              if c.getName(is_global_name=False) != NAME_COLUMN_STR]
    num_leaves = len(htable.getLeaves()) -1  # Don't include the name column
    # Create the values for the leaves of the Hierarchical Table
    flat_table = Table.createRandomTable(name, nrow, num_leaves, ncolstr=ncolstr,
        low_int=low_int, hi_int=hi_int, table_cls=table_cls)
    data_columns = flat_table.getDataColumns()
    pairs = zip(leaves, data_columns)
    # Populate the leaves of the Hierarchical Table
    [l.getParent().addCells(l, d.getCells(), replace=True) for l, d in pairs]
    # Validate the table
    if NAME_COLUMN_STR in  \
        [n.getName(is_global_name=False) for n in htable.getNonLeaves()]:
      import pdb; pdb.set_trace()
    return htable

  def getSerializationDict(self, class_variable):
    :param str class_variable: key to use for the class name
    :return dict: dictionary encoding the Table object and its columns
    serialization_dict = {}
    serialization_dict[class_variable] = str(self.__class__)
    filepath = self.getFilepath()
    if self.getFilepath() is not None:
      if ut.getFileExtension(self.getFilepath()) != settings.SCISHEETS_EXT:
        filepath = ut.changeFileExtension(self.getFilepath(), 
    more_dict = {
        "_name": self.getName(is_global_name=False),
        "_prologue_formula": self.getPrologue().getFormula(),
        "_epilogue_formula": self.getEpilogue().getFormula(),
        "_is_evaluate_formulas": self.getIsEvaluateFormulas(),
        "_filepath": filepath,
        "_attached": self.isAttached(),
    _children = []
    for child in self.getChildren():
      if not Table.isNameColumn(child):
    serialization_dict["_children"] = _children
    return serialization_dict

  def deserialize(cls, serialization_dict, instance=None):
    Deserializes a table object and does fix ups.
    :param dict serialization_dict: container of parameters for deserialization
    :return Table:
    if instance is None:
      table = Table(serialization_dict["_name"])
      table = instance
    if serialization_dict["_filepath"] is not None:
    if "_attached" in serialization_dict.keys():
    if "_children" in serialization_dict.keys():
      child_dicts = serialization_dict["_children"]
    elif "_columns" in serialization_dict.keys():
      child_dicts = serialization_dict["_columns"]
      raise ValueError("Cannot find children for %s" % table.getName())
    for child_dict in child_dicts:
      # Handle older serializations
      if not child_dict['_name'] == NAME_COLUMN_STR:
        new_child = deserialize(json.dumps(child_dict))
    return table

  # The following methods are used in debugging

  def d(self):
    return [(c.getName(), c.getCells()) for c 
            in self.getLeaves()]

  def f(self):
    return [(c.getName(), c.getFormula()) 
            for c in self.getColumns(is_attached=False)]

  def setCapture(self, filename, data):
    dc = DataCapture(filename)
  def getIsEvaluateFormulas(self):
    return self._is_evaluate_formulas
  # Internal and other methods

  # TODO: Tests with multiple levels of subtable
  def _updateNameColumn(self, nrows_table=None):
    Changes the cells in the name column of the table
    to be consecutive ints.
    :paam int nrows_table: Number of rows in the table
    if nrows_table is None:
      nrows_table = self.numRows()
    names = []
    for row_num in range(nrows_table):
    for column in self.getLeaves(is_attached=True):
      if Table.isNameColumn(column):
        column.addCells(list(names), replace=True)

  def _formulaStatementFromFile(self, filepath, name):
    Reads the file contents and creates the FormulaStatement object.
    :param str filepath: path to file to read
    :param str name: name of the formula
    :returns str: file contents
    with open(filepath, 'r') as f:
      lines = f.readlines()
    statements = ''.join(lines)
    return FormulaStatement(statements, name)

  # Data columns are those that have user data. The "row" column is excluded.
  def getDataColumns(self, is_recursive=True, is_attached=True):
    Returns the columns other than the name column
    return [c for c in self.getColumns(is_recursive=is_recursive, 
            is_attached=is_attached) if not Table.isNameColumn(c)]

  def getNameColumn(self):
    Gets the name column for this table.
    :return Column:
    columns = [c for c in self.getColumns() 
               if Table.isNameColumn(c) and c.getParent() == self]
    if len(columns) != 1:
      raise RuntimeError("Should have exactly one name column")
    return columns[0]

  def getData(self):
    :return dict: keys are global column names
    return {c.getName(): list(c.getCells()) for c in self.getColumns()
            if not Table.isNameColumn(c)}

  def getEpilogue(self):
    :return FormulaStatement:
    return self._epilogue

  def getFormulaColumns(self):
    :return list-of-Column:
    result = [c for c in self.getColumns(is_attached=False) 
              if c.getFormula() is not None]
    return result

  def getRow(self, row_index=None):
    :param row_index: row desired
           if None, then a row of None is returned
    :return: Row object
    row = Row()
    for column in self.getColumns():
      if row_index is None:
        if column.isFloats():
          row[column.getName()] = np.nan  # pylint: disable=E1101
          row[column.getName()] = None
        row[column.getName()] = column.getCells()[row_index]
    return row

  def getNamespace(self):
    return self._namespace

  def getPrologue(self):
    :return FormulaStatement:
    return self._prologue

  # TODO: Verify the index
  def _rowNameFromIndex(index):
    Create the row name from its index
    return str(index + 1)

  def _coerceNameColumnToStr(self):
    Makes sure that row names are strings
    column = self.columnFromName(NAME_COLUMN_STR, is_relative=False)
    if column is None:
      import pdb; pdb.set_trace()
    values = [str(v) for v in column.getCells()]

  # TODO: Verify the index
  def _rowNamesFromSize(size):
    :param size: number of rows
    :return: array of names
    return [str(n) for n in range(1, size+1)]

  def _createNameColumn(self):
    Creates the name column for the table
    column = Column(NAME_COLUMN_STR, asis=True)

  def adjustColumnLength(self):
    Inserts values of None or np.nan so that column
        has the same length as the table
    none_array = np.array([None])
    num_rows = self.numRows()
    for column in self.getColumns():
      adj_rows = num_rows - column.numCells()
      if adj_rows > 0:
        if column.isFloats():
          column.addCells(np.repeat(np.nan, adj_rows))  # pylint:disable=E1101
          column.addCells(np.repeat(none_array, adj_rows))

  def _validateTable(self):
    Checks that the table is internally consistent
    Verify that there is at least one column
    if len(self.getColumns()) < 1:
      raise er.InternalError("Table %s has no columns." % self._name)
    # Verify that all columns have the same number of cells
      name_column = [c for c in self.getChildren() 
                     if c.getName(is_global_name=False) == NAME_COLUMN_STR][0]
    except Exception as e:
      import pdb; pdb.set_trace()
    if name_column is None:
      import pdb; pdb.set_trace()
    num_rows = self.numRows()
    for column in self.getColumns():
      if  column.numCells() != num_rows:
        import pdb; pdb.set_trace()
        msg = "In Table %s, Column %s differs in its number of rows." \
            % (self.getName(), column.getName())
        raise er.InternalError(msg)
    # Verify that the first Column is the Name Column
    if self.getChildAtPosition(0).getName(is_global_name=False) != NAME_COLUMN_STR:
      msg = "In Table %s, first column is not 'row' column" % self.getName()
      raise er.InternalError(msg)
    # Verify that names are unique
    if self.validateTree() is not None:
      raise RuntimeError(self.validateTree())
    # Verify the sequence of row names
    for nrow in range(self.numRows()):
      expected_row_name = Table._rowNameFromIndex(nrow)
      actual_row_name =  \
      if actual_row_name != expected_row_name:
        import pdb; pdb.set_trace()
        msg = "In Table %s, invalid row name at index %d: %s" % \
                (self.getName(), nrow, actual_row_name)
        raise er.InternalError(msg)
    # Verify that the name columns are identical
    for column in self.getColumns():
      if Table.isNameColumn(column):
        if not column.getCells() == name_column.getCells():
          raise RuntimeError("%s is not a consistent name column" % column.getName())

  def addCells(self, column, cells, replace=False):
    Adds to the column
    :param Column column:
    :param list cells:
    column.addCells(cells, replace=replace)

  def addColumn(self, column, index=None):
    Adds a column to the table.
    Adjusts the Column length to that of the table
    :param column: column object
    :param int index: position for the new column
    :return: error text if there is a problem with the column
                    None if no problem
    Notes: (1) A new column may have either no cells
               or the same number as the existing table
    error = None
    # Check for problems with this column
    is_ok = all([c.getName(is_global_name=False) 
        != column.getName(is_global_name=False) 
        for c in self.getChildren()])
    if not is_ok:
      error = "**%s is a duplicate name" % column.getName()
      return error
      error = Column.isPermittedName(  \
      if error is not None:
        return error
    if index is None:
      index = len(self.getColumns(is_attached=False))
    # Handle the different cases of adding a column
    self.addChild(column, position=index)
    # Case 1: First column after name column
    if self.numColumns() == 1:
    # Case 2: Subsequent columns

  def addRow(self, row, row_index=None):
    :param Row row: Row to add
    :param int row_index: index where Row is added, may be a float
                       if None, then appended
    # Determine the actual desired name
    if row_index is None:
      proposed_name = Table._rowNameFromIndex(self.numRows())
      proposed_name = Table._rowNameFromIndex(row_index)
    # Assign values to the last row of each column cells
    for column in self.getColumns():
      if column.getName(is_global_name=False) != NAME_COLUMN_STR:
        cur_name = column.getName()
        if cur_name in row:
    last_index = self.numRows() - 1
    self.renameRow(last_index, proposed_name)  # put the row in the right place

  def copy(self, instance=None):
    Returns a copy of this object
    :param Table instance:
    # Create an object if none provided
    if instance is None:
      instance = Table(self.getName(is_global_name=False))
    name_column = instance.columnFromName(NAME_COLUMN_STR,
    name_column.removeTree()  # Avoid duplicate
    # Copy everything required from inherited classes
    super(Table, self).copy(instance=instance)
    # Set properties specific to this class
    return instance

  def deleteRows(self, indicies):
    Deletes rows
    :param indicies: index of rows to delete
    for column in self.getColumns():

  def export(self, **kwargs):
    Exports the table to a python program
    :return: error - string from the file export
    table_evaluator = TableEvaluator(self)
    error = table_evaluator.export(**kwargs)
    return error

  def evaluate(self, user_directory=None):
    Evaluates formulas in the table
    :param user_directory: full directory path where user modules
                            are placed
    :return: error from table evaluation or None
    evaluator = TableEvaluator(self)
    error = evaluator.evaluate(user_directory=user_directory)
    return error

  def isColumnPresent(self, column_name):
    :param str column_name: local column name
    :return bool: True if column is present
    return any([c.getName(is_global_name=False) == column_name 
                for c in self.getColumns(is_attached=False)])

  def isEquivalent(self, other_table, is_exception=False):
    Checks that the tables have the same values of their properties,
    excluding the VersionedFile.
    :param Table other_table:
    :param bool is_exception: generate an AssertionError if false
    :returns bool:
    msg = None
    if not isinstance(other_table, self.__class__):
      msg = "Table is not equivalent to a non-table."
    elif not (self.getName(is_global_name=False) == other_table.getName(is_global_name=False)):
      msg = "Table has a different name."
    elif not (self.numColumns() == other_table.numColumns()):
      msg = "Table has a different number of columns."
    elif not (self.getPrologue().isEquivalent(other_table.getPrologue())):
      msg = "Table has a different Prologue."
    elif not  (self.getEpilogue().isEquivalent(other_table.getEpilogue())):
      msg = "Table has a different Epilogue."
    elif not super(Table, self).isEquivalent(other_table,
      msg = "Differs because of ancestor of Table."
    if msg is None:
      return True
    elif is_exception:
      raise AssertionError(msg)
      return False

  def isNameColumn(column):
    Determines if this is a name column
    :param Column column:
    :return bool: True if name column
    path = column.pathFromGlobalName(column.getName())
    return path[-1] == NAME_COLUMN_STR

  def isTable(cls, child):
    :param NamedTree child:
    :return bool: True if is a Column
    return isinstance(child, Table)
  def insertRow(self, row, index=None):
    Inserts the row in the desired index in the table and
    assigns the value of the NAME_COLUMN
    :param row: a Row
    :param index: index in the table where the row is inserted
    idx = index
    if idx is None:
      idx = self.numRows()
    for child in self.getLeaves(is_attached=True):
      if ColumnContainer.isColumn(child):
        name = child.getName(is_global_name=False)
        if name in row.keys():
          child.insertCell(row[name], idx)
          child.insertCell(None, idx)

  def moveRow(self, index1, index2):
    Moves the row at index1 to index2
    row = self.getRow(row_index=index1)
    self.insertRow(row, index2)

  def numRows(self):
    Returns the number of rows in the table
    attached_leaves = self.getAttachedNodes(self.getColumns())
    return max([c.numCells() for c in attached_leaves])

  # TODO: This won't work with nested columns
  def refactorColumn(self, cur_colnm, new_colnm):
    Changes the column name and its occurrences in formulas in the table.
    :param str cur_colnm: Current name of the column
    :param str new_colnm: New name of the column
    :returns list-of-str changed_columns:
    :raises ValueError: column name is unknown
    def changeFormula(formula_statement):
      Changes the formula by replacing occurrences of
      cur_colnm with new_colnm
      :param FormulaStatement formula_satement:
      :returns str/None: new formula or None
      formula = formula_statement.getFormula()
      if cur_colnm in formula:
        return formula.replace(cur_colnm, new_colnm)
        return None

    column = self.childFromName(cur_colnm, is_relative=True)
    if column is None:
      raise ValueError("Column %s does not exist." % cur_colnm)
    columns = self.getColumns(is_attached=False)
    changed_columns = []
      # Do the Columns
      for col in self.getFormulaColumns():
        new_formula = changeFormula(col.getFormulaStatementObject())
        if new_formula is not None:
      # Handle Prologue
      new_formula = changeFormula(self.getPrologue())
      if new_formula is not None:
      # Handle Epilogue
      new_formula = changeFormula(self.getEpilogue())
      if new_formula is not None:
    except Exception as err:
      msg = '''Changing column name from %s to %s.
Encountered error %s.
Changed formulas in columns %s.''' % (cur_colnm, new_colnm,
    str(err), ' '.join(changed_columns))
    return changed_columns

  def rowIndexFromName(name):
    Returns the row index for the row name
    return int(name) - 1

  def renameColumn(self, column, proposed_name):
    Renames the column, checking for a duplicate
    :param column: column object
    :param proposed_name: str, proposed name
    :return: Boolean indicating success or failure
    names = [c.getName(is_global_name=False) for c in self.getChildren()]
    bool_test = all([name != proposed_name for name in names])
    if bool_test:
    return bool_test

  def renameRow(self, row_index, proposed_name):
    Renames the row so that it is an integer value
    that creates the row ordering desired.
    :param row_index: index of the row to change
    :param proposed_name: string of a number
    name_column = self.childFromName(NAME_COLUMN_STR,
    names = name_column.getCells()
      names[row_index] = str(proposed_name)
      import pdb; pdb.set_trace()
      float_names = [float(x) for x in names]
      import pdb; pdb.set_trace()
    sel_index = np.argsort(float_names)
    new_names = Table._rowNamesFromSize(len(names))
    for column in self.getChildren(is_recursive=True):
      if Table.isNameColumn(column):
    # Update the order of values in each column
    for column in self.getLeaves(is_attached=True):
      if not Table.isNameColumn(column):
        data = column.getCells()
        new_data = [data[n] for n in sel_index]

  def setNamespace(self, namespace):
    self._namespace = namespace
  def setIsEvaluateFormulas(self, setting):
    self._is_evaluate_formulas = setting

  def setEpilogue(self, epilogue_formula):
    :param str epilogue_formula: New value for the Epilogue formula
    :return str: Error or None
    self._epilogue = FormulaStatement(epilogue_formula, EPILOGUE_NAME)
    return self._epilogue.do()

  def setPrologue(self, prologue_formula):
    :param str prologue_formula: New value for the Prologue formula
    self._prologue = FormulaStatement(prologue_formula, PROLOGUE_NAME)
    return self._prologue.do()
  def tableFromName(self, name, is_relative=True):
    Finds the table with the specified name or None.
    Note that Columns must be leaves in the Tree.
    :param str name: name of the column
    :return NamedTree:
    leaf = self.childFromName(name, is_relative=is_relative)
    if Table.isTable(leaf):
      return leaf

  def trimRows(self):
    Removes all consequative rows at the end of the table
    that have None values in the data columns
    num_rows = self.numRows()
    row_indexes = range(num_rows)
    for index in row_indexes:
      row = self.getRow(row_index=index)
      # Delete all of the name columns
      for colnm in row.keys():
        column = self.childFromName(colnm, is_relative=False)
        if column is None:
          import pdb; pdb.set_trace()
        if Table.isNameColumn(column):
          del row[column.getName()]
      delete_row = True
      for name in row.keys():
        column = self.childFromName(name, is_relative=False)
        if not isNull(row[name]):
          delete_row = False
      if delete_row:

  def updateCell(self, value, row_index, column_id):
    Changes the value of the identified cell
    :param obj value: new value for the cell
    :param int row_index: 0-based index of the row
    :param int/str column_id: 0-based index of the column or its name
    if isinstance(column_id, int):
      column = self.columnFromIndex(column_id)
      column = self.childFromName(column_id, is_relative=False)
    column.updateCell(value, row_index)

  def updateColumn(self, column, cells):
    Replaces the cells in the column with those provided
    :param column: column to update
    :param cells: cells to change
    column.addCells(cells, replace=True)

  def updateRow(self, row, index):
    Updates the row in place. Only changes values
    Assigns the value of the NAME_COLUMN
    that are specified in row.
    :param row: Row
    :param index: index of row to change
    row[NAME_COLUMN_STR] = Table._rowNameFromIndex(index)
    for name in row:
      column = self.childFromName(name, is_relative=False)
      if not Table.isNameColumn(column):
        column.updateCell(row[name], index)