def batch_import(filename): graph = get_graph() # Make sure the DB has the appropriate global functions loaded. with open(get_project_filename('gremlin/setup.gremlin')) as setup_file: graph.gremlin_query(setup_file.read()) with open(filename) as data_file: queue = [] for line in data_file: queue.append(line.decode('utf-8')) if len(queue) >= 50: run_in_db(graph, u''.join(queue)) queue = [] run_in_db(graph, u''.join(queue))
from __future__ import with_statement from conceptnet5.whereami import get_project_filename phoneticDict = {} with open(get_project_filename('data/info/cmudict.0.7a')) as rhymelist: for line in rhymelist: if line.startswith(';;;'): continue word, phon = line.strip().split(' ') phon = phon.split(' ') phoneticDict[word] = phon def get_phonetic(text): parts = [] for word in text.split(): parts.extend(phoneticDict.get(word.upper(), list(word.upper()))) return parts def edit_distance(list1, list2): m = len(list1) n = len(list2) data = [[0 for col in range(n + 1)] for row in range(m + 1)] for col in range(n + 1): data[0][col] = col for row in range(m + 1): data[row][0] = row for a in range(1, m + 1): for b in range(1, n + 1): if list1[a - 1] == list2[b - 1]: data[a][b] = data[a - 1][b - 1]
import os import codecs import sys import json from collections import defaultdict from conceptnet5.nodes import make_concept_uri from conceptnet5.edges import make_edge from conceptnet5.whereami import get_project_filename FRAME_DATA = json.load( codecs.open(get_project_filename('data/info/zh_frames.json')) ) def handle_raw_assertion(line): if not line: return parts = line.split(', ') user, frame_id, concept1, concept2 = parts fdata = FRAME_DATA[frame_id] ftext = fdata['text'] rel = fdata['relation'] surfaceText = ftext.replace(u'{1}', u'[['+concept1+u']]').replace(u'{2}', u'[['+concept2+u']]') start = make_concept_uri(concept1, 'zh_TW') end = make_concept_uri(concept2, 'zh_TW') sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user] edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh', license='/l/CC/By', sources=sources, surfaceText=surfaceText, weight=1) yield json.dumps(edge, ensure_ascii=False)
import os import codecs import sys import json from collections import defaultdict from conceptnet5.nodes import make_concept_uri from conceptnet5.edges import make_edge from conceptnet5.whereami import get_project_filename FRAME_DATA = json.load( codecs.open(get_project_filename('data/info/zh_frames.json'))) def handle_raw_assertion(line): if not line: return parts = line.split(', ') user, frame_id, concept1, concept2 = parts fdata = FRAME_DATA[frame_id] ftext = fdata['text'] rel = fdata['relation'] surfaceText = ftext.replace(u'{1}', u'[[' + concept1 + u']]').replace( u'{2}', u'[[' + concept2 + u']]') start = make_concept_uri(concept1, 'zh_TW') end = make_concept_uri(concept2, 'zh_TW') sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user] edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh',
from __future__ import with_statement from conceptnet5.whereami import get_project_filename phoneticDict = {} with open(get_project_filename('data/info/cmudict.0.7a')) as rhymelist: for line in rhymelist: if line.startswith(';;;'): continue word, phon = line.strip().split(' ') phon = phon.split(' ') phoneticDict[word] = phon def get_phonetic(text): parts = [] for word in text.split(): parts.extend(phoneticDict.get(word.upper(), list(word.upper()))) return parts def edit_distance(list1, list2): m = len(list1) n = len(list2) data = [[0 for col in range(n+1)] for row in range(m+1)] for col in range(n+1): data[0][col] = col for row in range(m+1): data[row][0] = row for a in range(1, m+1): for b in range(1, n+1): if list1[a-1] == list2[b-1]: data[a][b] = data[a-1][b-1] else: data[a][b] = 1 + min(data[a-1][b], data[a][b-1], data[a-1][b-1])