def testPreproc(self): oc = pyOceanus.Oceanus() ocdata = oc.parse("這是一個測試的句子,有的地方沒有句號。句號後有一個新句子,這就是全部的材料。") oc_preproc = OceanusDataPreproc(ocdata) self.assertGreater(len(oc_preproc.tokens()), 0) self.assertGreater(len(oc_preproc.trees()), 0) self.assertGreater(len(oc_preproc.deps()), 0)
def test_features(self): oc = pyOceanus.Oceanus() ocdata = oc.parse("這個項目測試一個簡單的句子,雖然還有一個沒有句號的情況。句號後有一個新句子,這就是全部的材料。") oc_preproc = OceanusDataPreproc(ocdata) korFeats = KorFeatures("test_sentence", oc_preproc.tokens(), oc_preproc.trees(), oc_preproc.deps(), skipTopic=True) feats = korFeats.feats print("KorFeatures: ") print(korFeats.feats) self.assertTrue(len(korFeats.feats) > 0) self.assertTrue(feats['CharFreq_Q25'] > 0) self.assertTrue(feats['WordFreq_Q25'] > 0) self.assertTrue(feats['CharRank_800'] > 0) self.assertTrue(feats['WordRank_1000'] > 0) self.assertTrue(feats['nChar'] > 0) self.assertTrue(feats['CharStrokes_Q50'] > 0) self.assertTrue(feats['nWord'] > 0) self.assertTrue(feats['WordLen_Q25'] > 0) self.assertTrue(feats['ClsLen_Q25'] > 0) self.assertTrue(feats['SenLen_Q25'] > 0) self.assertTrue(feats['PropDepth'] > 0) self.assertTrue(feats['SynSim'] > 0) self.assertTrue(feats['nWordBeforeMV'] > 0) self.assertTrue(feats['nConn'] > 0) self.assertTrue(feats['rPronounNoun'] > 0) self.assertTrue(feats['NounOverlap_Local'] > 0) self.assertTrue(feats['SemanticOverlap_Local'] > 0)
def set_Oceanus_Endpoint(url): global oc oc = pyOceanus.Oceanus(url)
import pandas as pd import re import pyOceanus import pdb from itertools import chain try: oc = pyOceanus.Oceanus() except Exception as ex: print(ex) cache_dict = {} def set_Oceanus_Endpoint(url): global oc oc = pyOceanus.Oceanus(url) def make_example_data(cwn_data): rows = [] for lemma, senses in cwn_data.items(): for senseid, senseObj in senses.items(): for ex_i, ex in enumerate(senseObj["example_cont"]): widx = ex.find("<") sent = re.sub("[<>'\"]", "", ex) sent = sent.strip() rows.append((lemma, senseid, widx, ex_i, sent)) sense_data = pd.DataFrame.from_records( rows, columns=["lemma", "senseid", "widx", "exid", "example"])
from os.path import abspath, dirname from itertools import chain import pyOceanus from FluidSeg import FluidSeg from FluidSeg import TokenData import config oc = pyOceanus.Oceanus(config.OCEANUS_ENDPOINT) def fluid_seg(text, lexicon): fseg = FluidSeg(lexicon) segData = fseg.segment(text) try: od = oc.parse(text) preseg = list(chain.from_iterable(od.tokens)) preseg = [TokenData(x[0], x[3], x[4]) for x in preseg] except Exception as ex: print("cannot process text content") return flask.make_response("cannot process text content", 400) segData.setPresegment(preseg) gran_label = ["0.00", "0.33", "0.66", "1.00", "preseg", "token"] seg_list = [ segData.toSegmentedToken(segData.preseg, granularity=0.00), segData.toSegmentedToken(segData.preseg, granularity=0.33), segData.toSegmentedToken(segData.preseg, granularity=0.66), segData.toSegmentedToken(segData.preseg, granularity=1.00), segData.toSegmentedToken(segData.preseg), segData.toSegmentedToken(segData.tokens),
def test_getNNCompounds(self): oc = pyOceanus.Oceanus() pp = oc.parse("這是一位食物銀行的金融經理。") nns = pyOceanus.get_NN_compounds(pp) self.assertTrue(len(nns) == 2)
def test_parse(self): oc = pyOceanus.Oceanus() pp = oc.parse("這是一個測試的句子。") self.assertTrue(True)
def test_init(self): oc = pyOceanus.Oceanus() self.assertTrue(True)