def testPreproc(self):
     oc = pyOceanus.Oceanus()
     ocdata = oc.parse("這是一個測試的句子,有的地方沒有句號。句號後有一個新句子,這就是全部的材料。")
     oc_preproc = OceanusDataPreproc(ocdata)
     self.assertGreater(len(oc_preproc.tokens()), 0)
     self.assertGreater(len(oc_preproc.trees()), 0)
     self.assertGreater(len(oc_preproc.deps()), 0)
Example #2
0
    def test_features(self):
        oc = pyOceanus.Oceanus()
        ocdata = oc.parse("這個項目測試一個簡單的句子,雖然還有一個沒有句號的情況。句號後有一個新句子,這就是全部的材料。")
        oc_preproc = OceanusDataPreproc(ocdata)

        korFeats = KorFeatures("test_sentence",
                               oc_preproc.tokens(),
                               oc_preproc.trees(),
                               oc_preproc.deps(),
                               skipTopic=True)
        feats = korFeats.feats
        print("KorFeatures: ")
        print(korFeats.feats)
        self.assertTrue(len(korFeats.feats) > 0)
        self.assertTrue(feats['CharFreq_Q25'] > 0)
        self.assertTrue(feats['WordFreq_Q25'] > 0)
        self.assertTrue(feats['CharRank_800'] > 0)
        self.assertTrue(feats['WordRank_1000'] > 0)
        self.assertTrue(feats['nChar'] > 0)
        self.assertTrue(feats['CharStrokes_Q50'] > 0)
        self.assertTrue(feats['nWord'] > 0)
        self.assertTrue(feats['WordLen_Q25'] > 0)
        self.assertTrue(feats['ClsLen_Q25'] > 0)
        self.assertTrue(feats['SenLen_Q25'] > 0)
        self.assertTrue(feats['PropDepth'] > 0)
        self.assertTrue(feats['SynSim'] > 0)
        self.assertTrue(feats['nWordBeforeMV'] > 0)
        self.assertTrue(feats['nConn'] > 0)
        self.assertTrue(feats['rPronounNoun'] > 0)
        self.assertTrue(feats['NounOverlap_Local'] > 0)
        self.assertTrue(feats['SemanticOverlap_Local'] > 0)
Example #3
0
def set_Oceanus_Endpoint(url):
    global oc
    oc = pyOceanus.Oceanus(url)
Example #4
0
import pandas as pd
import re
import pyOceanus
import pdb
from itertools import chain

try:
    oc = pyOceanus.Oceanus()
except Exception as ex:
    print(ex)

cache_dict = {}


def set_Oceanus_Endpoint(url):
    global oc
    oc = pyOceanus.Oceanus(url)


def make_example_data(cwn_data):
    rows = []
    for lemma, senses in cwn_data.items():
        for senseid, senseObj in senses.items():
            for ex_i, ex in enumerate(senseObj["example_cont"]):
                widx = ex.find("<")
                sent = re.sub("[<>'\"]", "", ex)
                sent = sent.strip()
                rows.append((lemma, senseid, widx, ex_i, sent))

    sense_data = pd.DataFrame.from_records(
        rows, columns=["lemma", "senseid", "widx", "exid", "example"])
Example #5
0
from os.path import abspath, dirname
from itertools import chain
import pyOceanus
from FluidSeg import FluidSeg
from FluidSeg import TokenData
import config

oc = pyOceanus.Oceanus(config.OCEANUS_ENDPOINT)


def fluid_seg(text, lexicon):

    fseg = FluidSeg(lexicon)
    segData = fseg.segment(text)
    try:
        od = oc.parse(text)
        preseg = list(chain.from_iterable(od.tokens))
        preseg = [TokenData(x[0], x[3], x[4]) for x in preseg]
    except Exception as ex:
        print("cannot process text content")
        return flask.make_response("cannot process text content", 400)

    segData.setPresegment(preseg)
    gran_label = ["0.00", "0.33", "0.66", "1.00", "preseg", "token"]
    seg_list = [
        segData.toSegmentedToken(segData.preseg, granularity=0.00),
        segData.toSegmentedToken(segData.preseg, granularity=0.33),
        segData.toSegmentedToken(segData.preseg, granularity=0.66),
        segData.toSegmentedToken(segData.preseg, granularity=1.00),
        segData.toSegmentedToken(segData.preseg),
        segData.toSegmentedToken(segData.tokens),
Example #6
0
 def test_getNNCompounds(self):
     oc = pyOceanus.Oceanus()
     pp = oc.parse("這是一位食物銀行的金融經理。")
     nns = pyOceanus.get_NN_compounds(pp)
     self.assertTrue(len(nns) == 2)
Example #7
0
 def test_parse(self):
     oc = pyOceanus.Oceanus()
     pp = oc.parse("這是一個測試的句子。")
     self.assertTrue(True)
Example #8
0
 def test_init(self):
     oc = pyOceanus.Oceanus()
     self.assertTrue(True)