예제 #1
0
    def transform(self, X):
        stanparse_depths = get_stanparse_depths()
        stanparse_data = get_stanparse_data()

        mat = np.zeros((len(X), 2))
        for i, (_, s) in enumerate(X.iterrows()):
            try:
                sp_data = stanparse_data[s.articleId]
                sp_depths = stanparse_depths[s.articleId]
                min_hedge_depth = min_refute_depth = 100

                for j, sentence in enumerate(sp_data['sentences']):
                    grph, grph_labels, grph_depths = sp_depths[j]
                    lemmas = list(enumerate([d[1]['Lemma'].lower() for d in sentence['words']], start=1))
                    hedge_match = self._find_matching(lemmas, _hedging_words)
                    refute_match = self._find_matching(lemmas, _refuting_words)

                    hedge_depths = [grph_depths[d] for d in hedge_match if d > 0]
                    refute_depths = [grph_depths[d] for d in refute_match if d > 0]

                    hedge_depths.append(min_hedge_depth)
                    refute_depths.append(min_refute_depth)

                    min_hedge_depth = min(hedge_depths)
                    min_refute_depth = min(refute_depths)
            except:
                pass
            mat[i, 0] = min_hedge_depth
            mat[i, 1] = min_refute_depth
        return mat
예제 #2
0
    def transform(self, X):
        mat = np.zeros((len(X), 1))
        stanparse_data = get_stanparse_data()

        def strip_idx(x):
            return x[:x.rfind('-')]

        for i, (_, s) in enumerate(X.iterrows()):
            try:
                for sentence in stanparse_data[s.articleId]['sentences']:
                    for dependency in sentence['dependencies']:
                        rel, head, dependent = dependency
                        if (rel == 'neg' or (rel == 'nn' and strip_idx(dependent).lower() == 'not')) \
                                and strip_idx(head).lower() in _refuting_words:
                            mat[i, 0] = 1
            except KeyError:
                pass
        return mat
예제 #3
0
    def transform(self, X):
        mat = np.zeros((len(X), 1))
        stanparse_data = get_stanparse_data()

        def strip_idx(x):
            return x[:x.rfind('-')]

        for i, (_, s) in enumerate(X.iterrows()):
            try:
                for sentence in stanparse_data[s.articleId]['sentences']:
                    for dependency in sentence['dependencies']:
                        rel, head, dependent = dependency
                        if (rel == 'neg' or (rel == 'nn' and strip_idx(dependent).lower() == 'not')) \
                                and strip_idx(head).lower() in _refuting_words:
                            mat[i, 0] = 1
            except KeyError:
                pass
        return mat
예제 #4
0
    def transform(self, X):
        stanparse_depths = get_stanparse_depths()
        stanparse_data = get_stanparse_data()

        min_hedge_depth = min_refute_depth = 100

        mat = np.zeros((len(X), 2))
        for i, (_, s) in enumerate(X.iterrows()):
            try:
                sp_data = stanparse_data[s.articleId]
                sp_depths = stanparse_depths[s.articleId]
                min_hedge_depth = min_refute_depth = 100

                for j, sentence in enumerate(sp_data['sentences']):
                    grph, grph_labels, grph_depths = sp_depths[j]
                    lemmas = list(
                        enumerate(
                            [d[1]['Lemma'].lower() for d in sentence['words']],
                            start=1))
                    hedge_match = self._find_matching(lemmas, _hedging_words)
                    refute_match = self._find_matching(lemmas, _refuting_words)

                    hedge_depths = [
                        grph_depths[d] for d in hedge_match if d > 0
                    ]
                    refute_depths = [
                        grph_depths[d] for d in refute_match if d > 0
                    ]

                    hedge_depths.append(min_hedge_depth)
                    refute_depths.append(min_refute_depth)

                    min_hedge_depth = min(hedge_depths)
                    min_refute_depth = min(refute_depths)
            except:
                pass
            mat[i, 0] = min_hedge_depth
            mat[i, 1] = min_refute_depth
        return mat
        _, head_idx = get_stanford_idx(head)
        _, dep_idx = get_stanford_idx(dep)
        dep_graph.setdefault(head_idx, set()).add(dep_idx)
        dep_graph_labels[(head_idx, dep_idx)] = rel
    return dep_graph, dep_graph_labels


def _calc_depths(grph, n=0, d=0, depths=None):
    if depths is None:
        depths = {n: d}
    sx = grph.get(n)
    if sx:
        for s in sx:
            depths[s] = d+1
            _calc_depths(grph, s, d+1, depths)
    return depths


if __name__ == "__main__":
    dep_parse_data = get_stanparse_data()
    data = {}
    for id, dep_parse in dep_parse_data.items():
        for i, s in enumerate(dep_parse['sentences']):
            grph, grph_labels = _build_dep_graph(s['dependencies'])
            grph_depths = _calc_depths(grph)
            d = data.setdefault(id, {})
            d[i] = grph, grph_labels, grph_depths

    with open(os.path.join('..', 'data', 'pickled', 'stanparse-depths.pickle'), 'wb') as f:
        pickle.dump(data, f, pickle.HIGHEST_PROTOCOL)
예제 #6
0
 def _get_word_in_sentence_at_pos(id, s_num, pos):
     stanparse_data = get_stanparse_data()
     sentence = stanparse_data[id]['sentences'][s_num]
     return sentence['words'][pos-1][1]['Lemma'].lower()
예제 #7
0
 def _get_word_in_sentence_at_pos(id, s_num, pos):
     stanparse_data = get_stanparse_data()
     sentence = stanparse_data[id]['sentences'][s_num]
     return sentence['words'][pos - 1][1]['Lemma'].lower()