from conceptnet5.edges import make_edge from conceptnet5.whereami import get_project_filename FRAME_DATA = json.load( codecs.open(get_project_filename('data/info/zh_frames.json')) ) def handle_raw_assertion(line): if not line: return parts = line.split(', ') user, frame_id, concept1, concept2 = parts fdata = FRAME_DATA[frame_id] ftext = fdata['text'] rel = fdata['relation'] surfaceText = ftext.replace(u'{1}', u'[['+concept1+u']]').replace(u'{2}', u'[['+concept2+u']]') start = make_concept_uri(concept1, 'zh_TW') end = make_concept_uri(concept2, 'zh_TW') sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user] edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh', license='/l/CC/By', sources=sources, surfaceText=surfaceText, weight=1) yield json.dumps(edge, ensure_ascii=False) if __name__ == '__main__': from conceptnet5.readers import transform_stream transform_stream(handle_raw_assertion)
contributors = [ s for s in source_list if s.startswith('/s/contributor') ] assert len(contributors) <= 1, contributors edge = make_edge(relation, start, end, dataset, LICENSE, source_list, '/ctx/all', frame_text, weight=weight) okay = True if contributors: uri = edge['uri'] contributor = contributors[0] if (uri, contributor) in self.seen_sources: okay = False else: self.seen_sources.add((uri, contributor)) if okay: yield json.dumps(edge, ensure_ascii=False) if __name__ == '__main__': from conceptnet5.readers import transform_stream builder = CN4Builder() transform_stream(builder.handle_raw_assertion)
def handle_raw_assertion(line): if not line: return parts = line.split(', ') user, frame_id, concept1, concept2 = parts fdata = FRAME_DATA[frame_id] ftext = fdata['text'] rel = fdata['relation'] surfaceText = ftext.replace(u'{1}', u'[[' + concept1 + u']]').replace( u'{2}', u'[[' + concept2 + u']]') start = make_concept_uri(concept1, 'zh_TW') end = make_concept_uri(concept2, 'zh_TW') sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user] edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh', license='/l/CC/By', sources=sources, surfaceText=surfaceText, weight=1) yield json.dumps(edge, ensure_ascii=False) if __name__ == '__main__': from conceptnet5.readers import transform_stream transform_stream(handle_raw_assertion)
dataset = build_data_set(parts_dict) sources = build_sources(parts_dict, preposition_fix) reject = False for source_list, weight in sources: if 'commons2_reject' in ' '.join(source_list): reject = True if not reject: for source_list, weight in sources: if not by_bedume_and_bad(source_list,start,end): contributors = [s for s in source_list if s.startswith('/s/contributor')] assert len(contributors) <= 1, contributors edge = make_edge(relation, start, end, dataset, LICENSE, source_list, '/ctx/all', frame_text, weight=weight) okay = True if contributors: uri = edge['uri'] contributor = contributors[0] if (uri, contributor) in self.seen_sources: okay = False else: self.seen_sources.add((uri, contributor)) if okay: yield json.dumps(edge, ensure_ascii=False) if __name__ == '__main__': from conceptnet5.readers import transform_stream builder = CN4Builder() transform_stream(builder.handle_raw_assertion)