Beispiel #1
0
    def __init__(self,
                 embedding,
                 analyzer,
                 name="WCD",
                 n_jobs=1,
                 normalize=True,
                 verbose=0,
                 oov=None,
                 matching=True,
                 **kwargs):
        self.name = name
        self._embedding = embedding
        self._normalize = normalize
        self._oov = oov
        self.verbose = verbose
        self.n_jobs = n_jobs
        self._neighbors = NearestNeighbors(**kwargs)

        self._analyzer = analyzer

        if matching is True:
            self._matching = Matching()
        elif matching is False or matching is None:
            self._matching = None
        else:
            self._matching = Matching(**dict(matching))
Beispiel #2
0
 def __init__(self, embedding, analyzer='word', matching=None, name="FWCD",
              n_jobs=1, use_idf=True):
     """TODO: to be defined1. """
     self.name = name
     self.matching = Matching(**dict(matching)) if matching else None
     self.vect = EmbeddedVectorizer(embedding, analyzer=analyzer, norm='l2',
                                    use_idf=use_idf)
     self.nn = NearestNeighbors(n_jobs=n_jobs, metric='cosine',
                                algorithm='brute')
Beispiel #3
0
 def __init__(self, embedding, analyzer=None, oov=None,
              matching_params=None,
              name="ppwmd", verbose=0, n_jobs=1):
     """initalize parameters"""
     self.embedding = embedding
     self.analyzer = analyzer
     self.oov = oov
     self.matching = (Matching(**dict(matching_params)) if matching_params
                      else None)
     self.verbose = verbose
     self.name = name
Beispiel #4
0
    def __init__(self,
                 analyzer=None,
                 matching=None,
                 name=None,
                 verbose=0,
                 n_epochs=10,
                 alpha=0.25,
                 min_alpha=0.05,
                 n_jobs=4,
                 **kwargs):
        # self.model = model
        self.alpha = alpha
        self.min_alpha = min_alpha
        self.verbose = verbose
        self.name = "paragraph-vectors" if name is None else name

        if matching is True:
            self._matching = Matching()
        elif matching is False or matching is None:
            self._matching = None
        else:
            self._matching = Matching(**dict(matching))

        self.analyzer = analyzer
        self.model = Doc2Vec(
            alpha=alpha,
            min_alpha=alpha,
            size=500,
            window=8,
            min_count=1,
            sample=1e-5,
            workers=n_jobs,
            negative=20,
            dm=0,
            dbow_words=1,  # words only with dm!=0?
            dm_mean=0,  # unused when in concat mode
            dm_concat=1,
            dm_tag_count=1)
        self.n_epochs = n_epochs
        self._neighbors = NearestNeighbors(**kwargs)