Beispiel #1
0
    def fit(self, corpus, window=10, max_map_size=1000, ignore_missing=False):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix. 

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        - int max_map_size: the maximum size of map-based row storage.
                            When exceeded a row will be converted to
                            more efficient array storage. Setting this
                            to a higher value will increase speed at
                            the expense of higher memory usage.
        - bool ignore_missing: whether to ignore words missing from
                               the dictionary (if it was supplied).
                               Context window distances will be preserved
                               even if out-of-vocabulary words are
                               ignored.
                               If False, a KeyError is raised.
        """

        self.matrix = construct_cooccurrence_matrix(
            corpus, self.dictionary, int(self.dictionary_supplied),
            int(window), int(ignore_missing), max_map_size)
Beispiel #2
0
    def fit(self, corpus, window=10, max_map_size=1000, ignore_missing=False):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix. 

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        - int max_map_size: the maximum size of map-based row storage.
                            When exceeded a row will be converted to
                            more efficient array storage. Setting this
                            to a higher value will increase speed at
                            the expense of higher memory usage.
        - bool ignore_missing: whether to ignore words missing from
                               the dictionary (if it was supplied).
                               Context window distances will be preserved
                               even if out-of-vocabulary words are
                               ignored.
                               If False, a KeyError is raised.
        """
        
        self.matrix = construct_cooccurrence_matrix(corpus,
                                                    self.dictionary,
                                                    int(self.dictionary_supplied),
                                                    int(window),
                                                    int(ignore_missing),
                                                    max_map_size)
Beispiel #3
0
    def fit(self, corpus, window=10):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix. 

        You must call fit_dictionary first.

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        """

        self.dictionary, self.matrix = construct_cooccurrence_matrix(
            corpus, int(window))
Beispiel #4
0
    def fit(self, corpus, window=10):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix. 

        You must call fit_dictionary first.

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        """

        self.dictionary, self.matrix = construct_cooccurrence_matrix(corpus, 
                                                                     int(window))
Beispiel #5
0
    def fit_matrix(self, corpus, window=10):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix. 

        You must call fit_dictionary first.

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        """
        if self.dictionary is None:
            raise Exception('You must fit the dictionary before transforming the corpus')

        self.matrix = construct_cooccurrence_matrix(corpus, 
                                                   self.dictionary, int(window))
Beispiel #6
0
    def fit(self, corpus, window=10, ignore_missing=False):
        """
        Perform a pass through the corpus to construct
        the cooccurrence matrix.

        Parameters:
        - iterable of lists of strings corpus
        - int window: the length of the (symmetric)
          context window used for cooccurrence.
        - bool ignore_missing: whether to ignore words missing from
                               the dictionary (if it was supplied).
                               Context window distances will be preserved
                               even if out-of-vocabulary words are
                               ignored.
                               If False, a KeyError is raised.
        """

        self.matrix = construct_cooccurrence_matrix(
            corpus, self.dictionary, int(self.dictionary_supplied),
            int(window), int(ignore_missing))