Python get_brown_cluster_data Examples

Programming Language: Python

Namespace/Package Name: model.utils

Method/Function: get_brown_cluster_data

Examples at hotexamples.com: 4

Python get_brown_cluster_data - 4 examples found. These are the top rated real world Python examples of model.utils.get_brown_cluster_data extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: transforms.py Project: anubrata/fc-aaai18

    def transform(self, X):
        bc_data, bc_data_idx = get_brown_cluster_data(self.cluster_size)

        y_dim = len(bc_data_idx.values())
        mat = dok_matrix((len(X), y_dim * 2), dtype=np.float32)

        def set_cluster_pair(i, s, offset=0):
            cx = _get_bigram_clusters(s, bc_data)
            for x in cx:
                idx = bc_data_idx[x]
                mat[i, idx + (y_dim * offset)] = 1

        for i, (_, s) in enumerate(X.iterrows()):
            set_cluster_pair(i, s.claimHeadline)
            set_cluster_pair(i, s.articleHeadline, 1)

        return mat

Example #2

Show file

File: transforms.py Project: paris5020/athene_system

    def transform(self, X):
        bc_data, bc_data_idx = get_brown_cluster_data(self.cluster_size)

        y_dim = len(bc_data_idx.values())
        mat = dok_matrix((len(X), y_dim * 2), dtype=np.float32)

        def set_cluster_pair(i, s, offset=0):
            cx = _get_bigram_clusters(s, bc_data)
            for x in cx:
                idx = bc_data_idx[x]
                mat[i, idx + (y_dim * offset)] = 1

        for i, (_, s) in enumerate(X.iterrows()):
            set_cluster_pair(i, s.claimHeadline)
            set_cluster_pair(i, s.articleHeadline, 1)

        return mat

Example #3

Show file

File: transforms.py Project: anubrata/fc-aaai18

    def transform(self, X):
        bc_data, bc_data_idx = get_brown_cluster_data(self.cluster_size)
        mat = dok_matrix((len(X), len(bc_data_idx.values())), dtype=np.float32)
        for i, (_, s) in enumerate(X.iterrows()):
            claim_headline = get_tokenized_lemmas(s.claimHeadline)
            article_headline = get_tokenized_lemmas(s.articleHeadline)
            word_pairs = it.product(article_headline, claim_headline)

            for v, w in word_pairs:
                v_cluster = bc_data.get(v)
                w_cluster = bc_data.get(w)
                if v_cluster is None or w_cluster is None:
                    continue

                idx = bc_data_idx[(v_cluster, w_cluster)]
                mat[i, idx] = 1
        return mat

Example #4

Show file

File: transforms.py Project: paris5020/athene_system

    def transform(self, X):
        bc_data, bc_data_idx = get_brown_cluster_data(self.cluster_size)
        mat = dok_matrix((len(X), len(bc_data_idx.values())), dtype=np.float32)
        for i, (_, s) in enumerate(X.iterrows()):
            claim_headline = get_tokenized_lemmas(s.claimHeadline)
            article_headline = get_tokenized_lemmas(s.articleHeadline)
            word_pairs = it.product(article_headline, claim_headline)

            for v, w in word_pairs:
                v_cluster = bc_data.get(v)
                w_cluster = bc_data.get(w)
                if v_cluster is None or w_cluster is None:
                    continue

                idx = bc_data_idx[(v_cluster, w_cluster)]
                mat[i, idx] = 1
        return mat