def apply(self, transformation): """ Applies a transformation on the current space. All transformations affect the data matrix. If the transformation reduces the dimensionality of the space, the column indexing structures are also updated. The operation applied is appended to the list of operations that the space holds. Args: transformation: of type Scaling, DimensionalityReduction or FeatureSelection Returns: A new space on which the transformation has been applied. """ start = time.time() #TODO , FeatureSelection, DimReduction .. assert_is_instance( transformation, (Scaling, DimensionalityReduction, FeatureSelection)) op = transformation.create_operation() new_matrix = op.apply(self.cooccurrence_matrix) new_operations = list(self.operations) new_operations.append(op) id2row, row2id = list(self.id2row), self.row2id.copy() if isinstance(op, DimensionalityReductionOperation): self.assert_1dim_element() id2column, column2id = [], {} elif isinstance(op, FeatureSelectionOperation): self.assert_1dim_element() op.original_columns = self.id2column if op.original_columns: id2column = list( array(op.original_columns)[op.selected_columns]) column2id = list2dict(id2column) else: id2column, column2id = [], {} else: id2column, column2id = list(self.id2column), self.column2id.copy() log.print_transformation_info(logger, transformation, 1, "\nApplied transformation:") log.print_matrix_info(logger, self.cooccurrence_matrix, 2, "Original semantic space:") log.print_matrix_info(logger, new_matrix, 2, "Resulted semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(new_matrix, id2row, id2column, row2id, column2id, operations=new_operations)
def test_list2dict(self): test_cases = [(["a", "v", "d"], {"a": 0, "v": 1, "d": 2}), ([], {})] for list_, expected in test_cases: outcome = list2dict(list_) self.assertDictEqual(outcome, expected) self.assertRaises(ValueError, list2dict, ["a", "v", "a"])
def test_list2dict(self): test_cases = [(["a","v","d"], {"a":0, "v":1, "d":2}), ([], {})] for list_, expected in test_cases: outcome = list2dict(list_) self.assertDictEqual(outcome, expected) self.assertRaises(ValueError, list2dict, ["a","v","a"])
def apply(self, transformation): """ Applies a transformation on the current space. All transformations affect the data matrix. If the transformation reduces the dimensionality of the space, the column indexing structures are also updated. The operation applied is appended to the list of operations that the space holds. Args: transformation: of type Scaling, DimensionalityReduction or FeatureSelection Returns: A new space on which the transformation has been applied. """ start = time.time() #TODO , FeatureSelection, DimReduction .. assert_is_instance(transformation, (Scaling, DimensionalityReduction, FeatureSelection)) op = transformation.create_operation() new_matrix = op.apply(self.cooccurrence_matrix) new_operations = list(self.operations) new_operations.append(op) id2row, row2id = list(self.id2row), self.row2id.copy() if isinstance(op, DimensionalityReductionOperation): self.assert_1dim_element() id2column, column2id = [], {} elif isinstance(op, FeatureSelectionOperation): self.assert_1dim_element() op.original_columns = self.id2column if op.original_columns: id2column = list(array(op.original_columns)[op.selected_columns]) column2id = list2dict(id2column) else: id2column, column2id = [],{} else: id2column, column2id = list(self.id2column), self.column2id.copy() log.print_transformation_info(logger, transformation, 1, "\nApplied transformation:") log.print_matrix_info(logger, self.cooccurrence_matrix, 2, "Original semantic space:") log.print_matrix_info(logger, new_matrix, 2, "Resulted semantic space:") log.print_time_info(logger, time.time(), start, 2) return Space(new_matrix, id2row, id2column, row2id, column2id, operations = new_operations)
def train_all_spaces(core_space, an_dn_space, pn_space, sv_space, vo_space): core_space = core_space.apply(RowNormalization()) print "train adj, det" a_d_space = train_one_space(core_space, an_dn_space, 0, 3) print "train prep" prep_space = train_one_space(core_space, pn_space, 1, 3) print "train vo" v_obj_space = train_one_space(core_space, vo_space, 0, 4) print "train sv" v_subj_space = train_one_space(core_space, sv_space, 1, 4) new_v_obj_rows = [row + ".objmat" for row in v_obj_space.id2row] v_obj_space._id2row = new_v_obj_rows v_obj_space._row2id = list2dict(new_v_obj_rows) new_v_subj_rows = [row + ".subjmat" for row in v_subj_space.id2row] v_subj_space._id2row = new_v_subj_rows v_subj_space._row2id = list2dict(new_v_subj_rows) all_mat_space = Space.vstack(a_d_space, prep_space) all_mat_space = Space.vstack(v_obj_space, all_mat_space) all_mat_space = Space.vstack(v_subj_space, all_mat_space) return all_mat_space
def _project_core_operations(self, matrix_): for operation in self._operations: if isinstance(operation, DimensionalityReductionOperation): self._id2column, self._column2id = [], {} if isinstance(operation, FeatureSelectionOperation): if operation.original_columns: self._id2column = list(array(operation.original_columns)[operation.selected_columns]) self._column2id = list2dict(self._id2column) else: self._id2column, self._column2id = [],{} matrix_ = operation.project(matrix_) return matrix_
def __init__(self, core_space, matrix_, id2row, row2id=None): """ Constructor. Args: core_space: Space type, the core space that this is peripheral to. matrix_: Matrix type, the data matrix of the space id2row: list, the row elements row2id: dictionary, maps row strings to ids. Optional, built from id2row by default. Returns: A peripheral semantic space (type PeripheralSpace) on which the core space operations have been projected. Column indexing structures and operations are taken over from the core space. Raises: TypeError: if matrix_ or core_space are not of the correct type ValueError: if element shape is not consistent with the size of matrix rows if the matrix and the provided row and column indexing structures are not of consistent shapes. """ assert_is_instance(matrix_, Matrix) assert_is_instance(core_space, Space) assert_is_instance(id2row, list) # TODO: assert it is not a peripheral space here! if row2id is None: row2id = list2dict(id2row) else: assert_dict_match_list(row2id, id2row) column2id = core_space.column2id id2column = core_space.id2column self._operations = list(core_space.operations) self._row2id = row2id self._id2row = id2row self._column2id = column2id self._id2column = id2column self._cooccurrence_matrix = self._project_core_operations(matrix_) assert_shape_consistent(self.cooccurrence_matrix, self._id2row, self._id2column, self._row2id, self._column2id) self._element_shape = (self._cooccurrence_matrix.shape[1],)
def __init__(self, matrix_, id2row, id2column, row2id=None, column2id=None, **kwargs): """ Constructor. Args: matrix_: Matrix type, the data matrix of the space id2row: list, the row elements id2column: list, the column elements row2id: dictionary, maps row strings to ids. Optional, built from id2row by default. column2id: dictionary, maps col strings to ids. Optional, built from id2column by default operations: list of operations already performed on the input matrix, Optional, by default set to empty. element_shape: tuple of int, the shape on row elements. Optional, by default row elements are one-dimensional and element_shape is (no_cols, ). Used in 3D composition. Returns: A semantic space (type Space) Raises: TypeError: if matrix_ is not of the correct type ValueError: if element shape is not consistent with the size of matrix rows if the matrix and the provided row and column indexing structures are not of consistent shapes. """ assert_is_instance(matrix_, Matrix) assert_valid_kwargs(kwargs, ["operations", "element_shape"]) assert_is_instance(id2row, list) assert_is_instance(id2column, list) if row2id is None: row2id = list2dict(id2row) else: assert_dict_match_list(row2id, id2row) if column2id is None: column2id = list2dict(id2column) else: assert_dict_match_list(column2id, id2column) assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id) self._cooccurrence_matrix = matrix_ self._row2id = row2id self._id2row = id2row self._column2id = column2id self._id2column = id2column if "operations" in kwargs: self._operations = kwargs["operations"] else: self._operations = [] if "element_shape" in kwargs: elem_shape = kwargs["element_shape"] if prod(elem_shape) != self._cooccurrence_matrix.shape[1]: raise ValueError("Trying to assign invalid element shape:\ element_shape: %s, matrix columns: %s" % (str(elem_shape), str(self._cooccurrence_matrix.shape[1]))) # NOTE: watch out here, can cause bugs, if we change the dimension # of a regular space and we do not create a new space self._element_shape = kwargs["element_shape"] else: self._element_shape = (self._cooccurrence_matrix.shape[1],)
def __init__(self, matrix_, id2row, id2column, row2id=None, column2id=None, operations=[], element_shape=None): """ Constructor. Args: matrix_: Matrix type, the data matrix of the space id2row: list, the row elements id2column: list, the column elements row2id: dictionary, maps row strings to ids. Optional, built from id2row by default. column2id: dictionary, maps col strings to ids. Optional, built from id2column by default operations: list of operations already performed on the input matrix, Optional, by default set to empty. element_shape: tuple of int, the shape on row elements. Optional, by default row elements are one-dimensional and element_shape is (no_cols, ). Used in 3D composition. Returns: A semantic space (type Space) Raises: TypeError: if matrix_ is not of the correct type ValueError: if element shape is not consistent with the size of matrix rows if the matrix and the provided row and column indexing structures are not of consistent shapes. """ assert_is_instance(matrix_, Matrix) assert_is_instance(id2row, list) assert_is_instance(id2column, list) if row2id is None: row2id = list2dict(id2row) else: assert_dict_match_list(row2id, id2row) if column2id is None: column2id = list2dict(id2column) else: assert_dict_match_list(column2id, id2column) assert_shape_consistent(matrix_, id2row, id2column, row2id, column2id) self._cooccurrence_matrix = matrix_ self._row2id = row2id self._id2row = id2row self._column2id = column2id self._id2column = id2column self._operations = operations if element_shape: if prod(element_shape) != self._cooccurrence_matrix.shape[1]: raise ValueError("Trying to assign invalid element shape:\ element_shape: %s, matrix columns: %s" % (str(element_shape), str(self._cooccurrence_matrix.shape[1]))) # NOTE: watch out here, can cause bugs, if we change the dimension # of a regular space and we do not create a new space self._element_shape = element_shape else: self._element_shape = (self._cooccurrence_matrix.shape[1], )