Beispiel #1
1
    def __init__(self,
                 X=None,
                 Y=None,
                 metas=None,
                 domain=None,
                 text_features=None):
        """
        Args:
            X (numpy.ndarray): attributes
            Y (numpy.ndarray): class variables
            metas (numpy.ndarray): meta attributes; e.g. text
            domain (Orange.data.domain): the domain for this Corpus
            text_features (list): meta attributes that are used for
                text mining. Infer them if None.
        """
        n_doc = _check_arrays(X, Y, metas)

        self.X = X if X is not None else np.zeros((n_doc, 0))
        self.Y = Y if Y is not None else np.zeros((n_doc, 0))
        self.metas = metas if metas is not None else np.zeros((n_doc, 0))
        self.W = np.zeros((n_doc, 0))
        self.domain = domain
        self.text_features = None  # list of text features for mining
        self._tokens = None
        self.attributes = {}

        if domain is not None and text_features is None:
            self._infer_text_features()
        elif domain is not None:
            self.set_text_features(text_features)

        Table._init_ids(self)
Beispiel #2
1
    def extend_corpus(self, metadata, Y):
        """
        Append documents to corpus.

        Args:
            metadata (numpy.ndarray): Meta data
            Y (numpy.ndarray): Class variables
        """
        if np.prod(self.X.shape) != 0:
            raise ValueError("Extending corpus only works when X is empty"
                             "while the shape of X is {}".format(self.X.shape))

        self.metas = np.vstack((self.metas, metadata))

        cv = self.domain.class_var
        for val in set(filter(None, Y)):
            if val not in cv.values:
                cv.add_value(val)
        new_Y = np.array([cv.to_val(i) for i in Y])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((self.metas.shape[0], 0))
        Table._init_ids(self)

        self._tokens = None     # invalidate tokens
Beispiel #3
0
    def __init__(self, X=None, Y=None, metas=None, domain=None, text_features=None):
        """
        Args:
            X (numpy.ndarray): attributes
            Y (numpy.ndarray): class variables
            metas (numpy.ndarray): meta attributes; e.g. text
            domain (Orange.data.domain): the domain for this Corpus
            text_features (list): meta attributes that are used for
                text mining. Infer them if None.
        """
        n_doc = _check_arrays(X, Y, metas)

        self.X = X if X is not None else np.zeros((n_doc, 0))
        self.Y = Y if Y is not None else np.zeros((n_doc, 0))
        self.metas = metas if metas is not None else np.zeros((n_doc, 0))
        self.W = np.zeros((n_doc, 0))
        self.domain = domain
        self.text_features = None    # list of text features for mining

        if domain is not None and text_features is None:
            self._infer_text_features()
        elif domain is not None:
            self.set_text_features(text_features)

        Table._init_ids(self)
Beispiel #4
0
    def extend_corpus(self, metadata, Y):
        """
        Append documents to corpus.

        Args:
            metadata (numpy.ndarray): Meta data
            Y (numpy.ndarray): Class variables
        """
        if np.prod(self.X.shape) != 0:
            raise ValueError("Extending corpus only works when X is empty"
                             "while the shape of X is {}".format(self.X.shape))

        self.metas = np.vstack((self.metas, metadata))

        cv = self.domain.class_var
        for val in set(filter(None, Y)):
            if val not in cv.values:
                cv.add_value(val)
        new_Y = np.array([cv.to_val(i) for i in Y])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((self.metas.shape[0], 0))
        Table._init_ids(self)

        self._tokens = None     # invalidate tokens
        self._set_unique_titles()
Beispiel #5
0
    def __init__(self, domain=None, X=None, Y=None, metas=None, W=None, text_features=None):
        """
        Args:
            domain (Orange.data.Domain): the domain for this Corpus
            X (numpy.ndarray): attributes
            Y (numpy.ndarray): class variables
            metas (numpy.ndarray): meta attributes; e.g. text
            W (numpy.ndarray): instance weights
            text_features (list): meta attributes that are used for
                text mining. Infer them if None.
        """
        n_doc = _check_arrays(X, Y, metas)

        self.X = X if X is not None and X.size else sp.csr_matrix((n_doc, 0))   # prefer sparse (BoW compute values)
        self.Y = Y if Y is not None else np.zeros((n_doc, 0))
        self.metas = metas if metas is not None else np.zeros((n_doc, 0))
        self.W = W if W is not None else np.zeros((n_doc, 0))
        self.domain = domain
        self.text_features = None    # list of text features for mining
        self._tokens = None
        self._dictionary = None
        self._ngrams_corpus = None
        self.ngram_range = (1, 1)
        self.attributes = {}
        self.pos_tags = None
        self.used_preprocessor = None   # required for compute values

        if domain is not None and text_features is None:
            self._infer_text_features()
        elif domain is not None:
            self.set_text_features(text_features)

        Table._init_ids(self)
Beispiel #6
0
    def extend_corpus(self, metadata, Y):
        self.metas = np.vstack((self.metas, metadata))

        cv = self.domain.class_var
        for val in set(Y):
            if val not in cv.values:
                cv.add_value(val)
        new_Y = np.array([cv.to_val(i) for i in Y])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((len(self), 0))
        Table._init_ids(self)
Beispiel #7
0
    def extend_corpus(self, documents, metadata, class_values):
        # TODO check if Domains match!
        self.metas = np.vstack((self.metas, metadata))
        self.documents += documents

        for val in set(class_values):
            if val not in self.domain.class_var.values:
                self.domain.class_var.add_value(val)
        new_Y = np.array([self.domain.class_var.to_val(cv) for cv in class_values])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((len(self.documents), 0))
        Table._init_ids(self)
Beispiel #8
0
    def __init__(self,
                 domain=None,
                 X=None,
                 Y=None,
                 metas=None,
                 W=None,
                 text_features=None,
                 ids=None):
        """
        Args:
            domain (Orange.data.Domain): the domain for this Corpus
            X (numpy.ndarray): attributes
            Y (numpy.ndarray): class variables
            metas (numpy.ndarray): meta attributes; e.g. text
            W (numpy.ndarray): instance weights
            text_features (list): meta attributes that are used for
                text mining. Infer them if None.
            ids (numpy.ndarray): Indices
        """
        super().__init__()
        n_doc = _check_arrays(X, Y, metas)

        with self.unlocked_reference():
            self.X = X if X is not None else np.zeros((n_doc, 0))
            self.Y = Y if Y is not None else np.zeros((n_doc, 0))
            self.metas = metas if metas is not None else np.zeros((n_doc, 0))
            self.W = W if W is not None else np.zeros((n_doc, 0))
        self.domain = domain
        self.text_features = []  # list of text features for mining
        self._tokens = None
        self._dictionary = None
        self._ngrams_corpus = None
        self.ngram_range = (1, 1)
        self.attributes = {}
        self._pos_tags = None
        from orangecontrib.text.preprocess import PreprocessorList
        self.__used_preprocessor = PreprocessorList(
            [])  # required for compute values
        self._titles: Optional[np.ndarray] = None
        self._pp_documents = None  # preprocessed documents

        if domain is not None and text_features is None:
            self._infer_text_features()
        elif domain is not None:
            self.set_text_features(text_features)

        if ids is not None:
            self.ids = ids
        else:
            Table._init_ids(self)
        self._set_unique_titles()
Beispiel #9
0
 def __init__(self, documents, X, Y, metas, domain):
     self.documents = documents
     if X is not None:
         self.X = X
     else:
         self.X = np.zeros((len(documents), 0))
     if Y is not None:
         self.Y = Y
     else:
         self.Y = np.zeros((len(documents), 0))
     self.metas = metas
     self.W = np.zeros((len(documents), 0))
     self.domain = domain
     Table._init_ids(self)
Beispiel #10
0
    def extend_corpus(self, documents, metadata, class_values, meta_vars):
        # TODO check if Domains match!
        self.metas = np.vstack((self.metas, metadata))
        self.documents += documents

        for val in set(class_values):
            if val not in self.domain.class_var.values:
                self.domain.class_var.add_value(val)
        new_Y = np.array([self.domain.class_var.to_val(cv) for cv in class_values])[:, None]
        new_Y[np.isnan(new_Y)] = 0
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((len(self.documents), 0))
        Table._init_ids(self)
Beispiel #11
0
 def __init__(self, documents, X, Y, metas, domain):
     self.documents = documents
     if X is not None:
         self.X = X
     else:
         self.X = np.zeros((len(documents), 0))
     if Y is not None:
         self.Y = Y
     else:
         self.Y = np.zeros((len(documents), 0))
     self.metas = metas
     self.W = np.zeros((len(documents), 0))
     self.domain = domain
     Table._init_ids(self)
Beispiel #12
0
    def __init__(self,
                 domain=None,
                 X=None,
                 Y=None,
                 metas=None,
                 W=None,
                 text_features=None,
                 ids=None):
        """
        Args:
            domain (Orange.data.Domain): the domain for this Corpus
            X (numpy.ndarray): attributes
            Y (numpy.ndarray): class variables
            metas (numpy.ndarray): meta attributes; e.g. text
            W (numpy.ndarray): instance weights
            text_features (list): meta attributes that are used for
                text mining. Infer them if None.
            ids (numpy.ndarray): Indices
        """
        n_doc = _check_arrays(X, Y, metas)

        self.X = X if X is not None else sp.csr_matrix(
            (n_doc, 0))  # prefer sparse (BoW compute values)
        self.Y = Y if Y is not None else np.zeros((n_doc, 0))
        self.metas = metas if metas is not None else np.zeros((n_doc, 0))
        self.W = W if W is not None else np.zeros((n_doc, 0))
        self.domain = domain
        self.text_features = None  # list of text features for mining
        self._tokens = None
        self._dictionary = None
        self._ngrams_corpus = None
        self.ngram_range = (1, 1)
        self.attributes = {}
        self.pos_tags = None
        self.used_preprocessor = None  # required for compute values

        if domain is not None and text_features is None:
            self._infer_text_features()
        elif domain is not None:
            self.set_text_features(text_features)

        if ids is not None:
            self.ids = ids
        else:
            Table._init_ids(self)
    def __init__(self, relation):
        """Create a wrapper for fusion.Relation.

        Parameters:
        -----------
        relation: An instance of `skfusion.fusion.Relation`
        """
        self.relation = relation
        meta_vars, self.metas = self._create_metas(relation)
        self._Y = self.W = np.zeros((len(relation.data), 0))

        if relation.col_names is not None:
            attr_names = relation.col_names
        else:
            attr_names = range(relation.data.shape[1])
        self.domain = Domain([ContinuousVariable(name)
                              for name in map(str, attr_names)],
                             metas=meta_vars)
        Table._init_ids(self)
Beispiel #14
0
    def extend_corpus(self, metadata, Y):
        """
        Append documents to corpus.

        Args:
            metadata (numpy.ndarray): Meta data
            Y (numpy.ndarray): Class variables
        """

        self.metas = np.vstack((self.metas, metadata))

        cv = self.domain.class_var
        for val in set(Y):
            if val not in cv.values:
                cv.add_value(val)
        new_Y = np.array([cv.to_val(i) for i in Y])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((len(self), 0))
        Table._init_ids(self)

        self._tokens = None     # invalidate tokens
Beispiel #15
0
    def extend_corpus(self, metadata, Y):
        """
        Append documents to corpus.

        Args:
            metadata (numpy.ndarray): Meta data
            Y (numpy.ndarray): Class variables
        """

        self.metas = np.vstack((self.metas, metadata))

        cv = self.domain.class_var
        for val in set(Y):
            if val not in cv.values:
                cv.add_value(val)
        new_Y = np.array([cv.to_val(i) for i in Y])[:, None]
        self._Y = np.vstack((self._Y, new_Y))

        self.X = self.W = np.zeros((len(self), 0))
        Table._init_ids(self)

        self._tokens = None     # invalidate tokens