Exemple #1
0
from conceptnet5.edges import make_edge
from conceptnet5.whereami import get_project_filename

FRAME_DATA = json.load(
    codecs.open(get_project_filename('data/info/zh_frames.json'))
)


def handle_raw_assertion(line):
    if not line:
        return
    parts = line.split(', ')
    user, frame_id, concept1, concept2 = parts
    fdata = FRAME_DATA[frame_id]
    ftext = fdata['text']
    rel = fdata['relation']

    surfaceText = ftext.replace(u'{1}', u'[['+concept1+u']]').replace(u'{2}', u'[['+concept2+u']]')
    start = make_concept_uri(concept1, 'zh_TW')
    end = make_concept_uri(concept2, 'zh_TW')
    sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user]
    edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh',
                     license='/l/CC/By', sources=sources,
                     surfaceText=surfaceText, weight=1)
    yield json.dumps(edge, ensure_ascii=False)

if __name__ == '__main__':
    from conceptnet5.readers import transform_stream
    transform_stream(handle_raw_assertion)

                    contributors = [
                        s for s in source_list
                        if s.startswith('/s/contributor')
                    ]
                    assert len(contributors) <= 1, contributors
                    edge = make_edge(relation,
                                     start,
                                     end,
                                     dataset,
                                     LICENSE,
                                     source_list,
                                     '/ctx/all',
                                     frame_text,
                                     weight=weight)
                    okay = True
                    if contributors:
                        uri = edge['uri']
                        contributor = contributors[0]
                        if (uri, contributor) in self.seen_sources:
                            okay = False
                        else:
                            self.seen_sources.add((uri, contributor))
                    if okay:
                        yield json.dumps(edge, ensure_ascii=False)


if __name__ == '__main__':
    from conceptnet5.readers import transform_stream
    builder = CN4Builder()
    transform_stream(builder.handle_raw_assertion)

def handle_raw_assertion(line):
    if not line:
        return
    parts = line.split(', ')
    user, frame_id, concept1, concept2 = parts
    fdata = FRAME_DATA[frame_id]
    ftext = fdata['text']
    rel = fdata['relation']

    surfaceText = ftext.replace(u'{1}', u'[[' + concept1 + u']]').replace(
        u'{2}', u'[[' + concept2 + u']]')
    start = make_concept_uri(concept1, 'zh_TW')
    end = make_concept_uri(concept2, 'zh_TW')
    sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user]
    edge = make_edge(rel,
                     start,
                     end,
                     dataset='/d/conceptnet/4/zh',
                     license='/l/CC/By',
                     sources=sources,
                     surfaceText=surfaceText,
                     weight=1)
    yield json.dumps(edge, ensure_ascii=False)


if __name__ == '__main__':
    from conceptnet5.readers import transform_stream
    transform_stream(handle_raw_assertion)
Exemple #4
0
        dataset = build_data_set(parts_dict)
        sources = build_sources(parts_dict, preposition_fix)

        reject = False
        for source_list, weight in sources:
            if 'commons2_reject' in ' '.join(source_list):
                reject = True

        if not reject:
            for source_list, weight in sources:
                if not by_bedume_and_bad(source_list,start,end):
                    contributors = [s for s in source_list if s.startswith('/s/contributor')]
                    assert len(contributors) <= 1, contributors
                    edge = make_edge(relation, start, end, dataset, LICENSE, source_list, '/ctx/all', frame_text, weight=weight)
                    okay = True
                    if contributors:
                        uri = edge['uri']
                        contributor = contributors[0]
                        if (uri, contributor) in self.seen_sources:
                            okay = False
                        else:
                            self.seen_sources.add((uri, contributor))
                    if okay:
                        yield json.dumps(edge, ensure_ascii=False)


if __name__ == '__main__':
    from conceptnet5.readers import transform_stream
    builder = CN4Builder()
    transform_stream(builder.handle_raw_assertion)