コード例 #1
0
ファイル: mass_import.py プロジェクト: zitoliv/conceptnet5
def batch_import(filename):
    graph = get_graph()

    # Make sure the DB has the appropriate global functions loaded.
    with open(get_project_filename('gremlin/setup.gremlin')) as setup_file:
        graph.gremlin_query(setup_file.read())

    with open(filename) as data_file:
        queue = []
        for line in data_file:
            queue.append(line.decode('utf-8'))
            if len(queue) >= 50:
                run_in_db(graph, u''.join(queue))
                queue = []
        run_in_db(graph, u''.join(queue))
コード例 #2
0
def batch_import(filename):
    graph = get_graph()
    
    # Make sure the DB has the appropriate global functions loaded.
    with open(get_project_filename('gremlin/setup.gremlin')) as setup_file:
        graph.gremlin_query(setup_file.read())

    with open(filename) as data_file:
        queue = []
        for line in data_file:
            queue.append(line.decode('utf-8'))
            if len(queue) >= 50:
                run_in_db(graph, u''.join(queue))
                queue = []
        run_in_db(graph, u''.join(queue))
コード例 #3
0
from __future__ import with_statement
from conceptnet5.whereami import get_project_filename

phoneticDict = {}
with open(get_project_filename('data/info/cmudict.0.7a')) as rhymelist:
    for line in rhymelist:
        if line.startswith(';;;'): continue
        word, phon = line.strip().split('  ')
        phon = phon.split(' ')
        phoneticDict[word] = phon


def get_phonetic(text):
    parts = []
    for word in text.split():
        parts.extend(phoneticDict.get(word.upper(), list(word.upper())))
    return parts


def edit_distance(list1, list2):
    m = len(list1)
    n = len(list2)
    data = [[0 for col in range(n + 1)] for row in range(m + 1)]
    for col in range(n + 1):
        data[0][col] = col
    for row in range(m + 1):
        data[row][0] = row
    for a in range(1, m + 1):
        for b in range(1, n + 1):
            if list1[a - 1] == list2[b - 1]:
                data[a][b] = data[a - 1][b - 1]
コード例 #4
0
ファイル: ptt_petgame.py プロジェクト: D-Griffin/conceptnet5
import os
import codecs
import sys
import json
from collections import defaultdict
from conceptnet5.nodes import make_concept_uri
from conceptnet5.edges import make_edge
from conceptnet5.whereami import get_project_filename

FRAME_DATA = json.load(
    codecs.open(get_project_filename('data/info/zh_frames.json'))
)


def handle_raw_assertion(line):
    if not line:
        return
    parts = line.split(', ')
    user, frame_id, concept1, concept2 = parts
    fdata = FRAME_DATA[frame_id]
    ftext = fdata['text']
    rel = fdata['relation']

    surfaceText = ftext.replace(u'{1}', u'[['+concept1+u']]').replace(u'{2}', u'[['+concept2+u']]')
    start = make_concept_uri(concept1, 'zh_TW')
    end = make_concept_uri(concept2, 'zh_TW')
    sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user]
    edge = make_edge(rel, start, end, dataset='/d/conceptnet/4/zh',
                     license='/l/CC/By', sources=sources,
                     surfaceText=surfaceText, weight=1)
    yield json.dumps(edge, ensure_ascii=False)
コード例 #5
0
import os
import codecs
import sys
import json
from collections import defaultdict
from conceptnet5.nodes import make_concept_uri
from conceptnet5.edges import make_edge
from conceptnet5.whereami import get_project_filename

FRAME_DATA = json.load(
    codecs.open(get_project_filename('data/info/zh_frames.json')))


def handle_raw_assertion(line):
    if not line:
        return
    parts = line.split(', ')
    user, frame_id, concept1, concept2 = parts
    fdata = FRAME_DATA[frame_id]
    ftext = fdata['text']
    rel = fdata['relation']

    surfaceText = ftext.replace(u'{1}', u'[[' + concept1 + u']]').replace(
        u'{2}', u'[[' + concept2 + u']]')
    start = make_concept_uri(concept1, 'zh_TW')
    end = make_concept_uri(concept2, 'zh_TW')
    sources = ['/s/activity/ptt/petgame', '/s/contributor/petgame/' + user]
    edge = make_edge(rel,
                     start,
                     end,
                     dataset='/d/conceptnet/4/zh',
コード例 #6
0
ファイル: rhyme.py プロジェクト: D-Griffin/conceptnet5
from __future__ import with_statement
from conceptnet5.whereami import get_project_filename

phoneticDict = {}
with open(get_project_filename('data/info/cmudict.0.7a')) as rhymelist:
    for line in rhymelist:
        if line.startswith(';;;'): continue
        word, phon = line.strip().split('  ')
        phon = phon.split(' ')
        phoneticDict[word] = phon

def get_phonetic(text):
    parts = []
    for word in text.split():
        parts.extend(phoneticDict.get(word.upper(), list(word.upper())))
    return parts

def edit_distance(list1, list2):
    m = len(list1)
    n = len(list2)
    data = [[0 for col in range(n+1)] for row in range(m+1)]
    for col in range(n+1):
        data[0][col] = col
    for row in range(m+1):
        data[row][0] = row
    for a in range(1, m+1):
        for b in range(1, n+1):
            if list1[a-1] == list2[b-1]:
                data[a][b] = data[a-1][b-1]
            else:
                data[a][b] = 1 + min(data[a-1][b], data[a][b-1], data[a-1][b-1])