def main(): if len(sys.argv) == 1: sys.argv.append('--help') arg_parser = argparse.ArgumentParser( description='HanLP: Han Language Processing v{}'.format( HANLP_JAR_VERSION)) arg_parser.add_argument('-v', '--version', required=False, action='store_true', help='show installed versions of HanLP') task_parser = arg_parser.add_subparsers(dest="task", help='which task to perform?') segment_parser = task_parser.add_parser(name='segment', help='word segmentation') tag_parser = segment_parser.add_mutually_exclusive_group(required=False) tag_parser.add_argument('--tag', dest='tag', action='store_true', help='show part-of-speech tags') tag_parser.add_argument('--no-tag', dest='tag', action='store_false', help='don\'t show part-of-speech tags') segment_parser.set_defaults(tag=True) segment_parser.add_argument( '-a', '--algorithm', type=str, default='viterbi', help='algorithm of segmentation e.g. perceptron') parse_parser = task_parser.add_parser(name='parse', help='dependency parsing') server_parser = task_parser.add_parser( name='serve', help='start http server', description='A http server for HanLP') server_parser.add_argument('--port', type=int, default=8765) update_parser = task_parser.add_parser(name='update', help='update jar and data of HanLP') def add_args(p): p.add_argument("--config", default=PATH_CONFIG, help='path to hanlp.properties') # p.add_argument("--action", dest="action", default='predict', # help='Which action (train, test, predict)?') add_args(segment_parser) add_args(parse_parser) if '-v' in sys.argv or '--version' in sys.argv: print('jar {}: {}'.format(HANLP_JAR_VERSION, HANLP_JAR_PATH)) data_version = hanlp_installed_data_version() print('data {}: {}'.format(data_version if data_version else '自定义', HANLP_DATA_PATH)) print('config : {}'.format( os.path.join(STATIC_ROOT, 'hanlp.properties'))) exit(0) args = arg_parser.parse_args() def eprint(*args, **kwargs): print(*args, file=sys.stderr, **kwargs) def die(msg): eprint(msg) exit(1) if hasattr(args, 'config') and args.config: if os.path.isfile(args.config): JClass('com.hankcs.hanlp.utility.Predefine' ).HANLP_PROPERTIES_PATH = args.config else: die('Can\'t find config file {}'.format(args.config)) if args.task == 'segment': segmenter = None try: segmenter = HanLP.newSegment(args.algorithm) except JavaException as e: if e.javaClass() == JClass('java.lang.IllegalArgumentException'): die('invalid algorithm {}'.format(args.algorithm)) elif e.javaClass() == JClass('java.lang.RuntimeException'): die('failed to load required model') is_lexical_analyzer = hasattr(segmenter, 'analyze') if not args.tag: if is_lexical_analyzer: segmenter.enablePartOfSpeechTagging(False) JClass('com.hankcs.hanlp.HanLP$Config').ShowTermNature = False else: JClass('com.hankcs.hanlp.HanLP$Config').ShowTermNature = False for line in sys.stdin: line = line.strip() print(' '.join(term.toString() for term in segmenter.seg(any2utf8(line)))) elif args.task == 'parse': for line in sys.stdin: line = line.strip() print(HanLP.parseDependency(any2utf8(line))) elif args.task == 'serve': if PY == 3: from pyhanlp import server server.run(port=args.port) else: die('现在server.py暂时不支持Python2,欢迎参与移植') elif args.task == 'update': if hanlp_installed_data_version() == '手动安装': die('手动配置不支持自动升级,若要恢复自动安装,请清除HANLP相关环境变量') else: from pyhanlp.static import update_hanlp update_hanlp()
def testSyntheticMethod(self): h = jpype.JClass('jpype.attr.SyntheticMethods$GenericImpl')() h.foo(JClass('java.util.ArrayList')())
def testCallStaticUnicodeString(self): h = JClass('jpype.attr.Test1')() v = h.testString(JString(u"abcd"), JString(u"efghi")) self.assertEqual(v[0], 'abcd') self.assertEqual(v[1], 'efghi')
def testCallSuperclassMethod(self): h = JClass('jpype.attr.Test2')() h.test2Method() h.test1Method()
def testSuperToString(self): h = JClass('jpype.attr.Test2')() self.assertEqual(str(h), 'aaa')
def testSetStaticValue(self): JClass('jpype.attr.Test1').objectValue = JClass('java.lang.Integer')( 43) self.assertEqual(str(JClass('jpype.attr.Test1').objectValue), "43") JClass('jpype.attr.Test1').reset()
def testReturnSubClass(self): h = JClass('jpype.attr.Test1')() v = h.getSubClass() self.assertIsInstance(v, JClass('jpype.attr.SubHolder'))
def throwByJavaException(): JClass('jpype.exc.ExceptionTest').throwIOException()
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # # ***************************************************************************** from jpype import startJVM, getDefaultJVMPath, JClass startJVM(getDefaultJVMPath(), '-Djava.class.path=c:/tools/lucene-1.4.3/lucene-1.4.3.jar') QueryParser = JClass("org.apache.lucene.queryParser.QueryParser") IndexSearcher = JClass("org.apache.lucene.search.IndexSearcher") IndexReader = JClass("org.apache.lucene.index.IndexReader") StandardAnalyzer = JClass( "org.apache.lucene.analysis.standard.StandardAnalyzer") FSDirectory = JClass("org.apache.lucene.store.FSDirectory") IndexWriter = JClass("org.apache.lucene.index.IndexWriter") SimpleAnalyzer = JClass("org.apache.lucene.analysis.SimpleAnalyzer") IndexWriter('c:/temp/lucene', SimpleAnalyzer(), True).close() directory = FSDirectory.getDirectory("c:/temp/lucene", False) reader = IndexReader.open(directory) searcher = IndexSearcher(reader) queryparser = QueryParser.parse("wenger", "contents", StandardAnalyzer()) print(queryparser.rewrite)
def setUp(self): common.JPypeTestCase.setUp(self) self.__jp = self.jpype.overloads self._aclass = JClass('jpype.overloads.Test1$A') self._bclass = JClass('jpype.overloads.Test1$B') self._cclass = JClass('jpype.overloads.Test1$C') self._a = self._aclass() self._b = self._bclass() self._c = self._cclass() self._i1impl = JClass('jpype.overloads.Test1$I1Impl')() self._i2impl = JClass('jpype.overloads.Test1$I2Impl')() self._i3impl = JClass('jpype.overloads.Test1$I3Impl')() self._i4impl = JClass('jpype.overloads.Test1$I4Impl')() self._i5impl = JClass('jpype.overloads.Test1$I5Impl')() self._i6impl = JClass('jpype.overloads.Test1$I6Impl')() self._i7impl = JClass('jpype.overloads.Test1$I7Impl')() self._i8impl = JClass('jpype.overloads.Test1$I8Impl')()
from os.path import join from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM if __name__ == '__main__': ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar') startJVM( getDefaultJVMPath(), '-ea', f'-Djava.class.path={ZEMBEREK_PATH}', convertStrings=False ) TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology') DictionaryItem: JClass = JClass( 'zemberek.morphology.lexicon.DictionaryItem' ) RootAttribute: JClass = JClass('zemberek.core.turkish.RootAttribute') PrimaryPos: JClass = JClass('zemberek.core.turkish.PrimaryPos') SecondaryPos: JClass = JClass('zemberek.core.turkish.SecondaryPos') WordAnalysis: JClass = JClass('zemberek.morphology.analysis.WordAnalysis') morphology: TurkishMorphology = TurkishMorphology.createWithDefaults() def test(inp: str, new_item: DictionaryItem): print(f'Parses for {inp} before adding {new_item}') before: WordAnalysis = morphology.analyze(JString(inp)) print_results(before) morphology.invalidateCache()
def __init__(self, parent): QWidget.__init__(self, parent) self.buttonThread = QThread() self.node = 'gui' self.cost = 0 self.hsm_node = None self.d = os.path.dirname(sys.modules['aui.mi'].__file__) self.hid = nx.read_gpickle(os.path.join(self.d, 'networks/hid.gpickle')) self.hsm = nx.get_node_attributes(self.hid, 'HSM') self.hsm_evidence = {} self.question = None self.answerTimer = QTimer() self.answerTimer.setSingleShot(True) self.small_pause = QTimer() self.decisionFormat = QTextCharFormat() self.decisionFormat.setForeground(QtGui.QColor(76, 175, 80)) self.decisionFormat.setFontWeight(QtGui.QFont.Normal) self.questionFormat = QTextCharFormat() self.questionFormat.setForeground(QtGui.QColor(48, 131, 251)) self.questionFormat.setFontWeight(QtGui.QFont.Bold) self.infoFormat = QTextCharFormat() self.infoFormat.setFontWeight(QtGui.QFont.Normal) self.infoFormat.setForeground(Qt.black) self.evidence = { 'battery_level': 'Ok', 'wifi_level': 'Ok', 'LM': 'MV', 'focus': 'S', 'PC': 'AV', 'AS_visible': 'True', 'wifi_visible': 'True', 'battery_visible': 'True', 'C2': 'MV', 'C1': 'MV', 'AV_visible': 'True', 'GM': 'AV', 'joystick_direction': 'Backwards', 'SA': 'L2', 'SL': 'medium', 'CL': 'medium', 'Context': 'Exploration' } self.evidence = {} self.decision_path = [] jvmPath = jpype.getDefaultJVMPath() jarpath = os.path.join(os.path.abspath('.'), '/Library/Java/Extensions/') jpype.startJVM(jvmPath, "-Djava.class.path=/Library/Java/Extensions/smile.jar") self.net = JClass("smile.Network") self.voi = JClass("smile.ValueOfInfo") self.setupUi(self) self.initUI()
@author: Asile """ from os.path import join from typing import List from jpype import JClass, getDefaultJVMPath, java, shutdownJVM, startJVM ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar') startJVM(getDefaultJVMPath(), '-ea', f'-Djava.class.path={ZEMBEREK_PATH}', convertStrings=False) TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology') morphology: TurkishMorphology = TurkishMorphology.createWithDefaults() dictionary = { "aç-Verb": "Açmak fiilinin emri hali.", "aç-Noun": "Yemek yememiş kimse." } def POS(pos, analysis): for i, analysis in enumerate(analysis, start=1): pos.append(f'{str(analysis.getLemmas()[0])}' f'-{analysis.getPos().shortForm}') return pos
from os.path import isfile, join from subprocess import call from jpype import JClass, getDefaultJVMPath, java, shutdownJVM, startJVM if __name__ == '__main__': ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar') startJVM(getDefaultJVMPath(), '-ea', f'-Djava.class.path={ZEMBEREK_PATH}', convertStrings=False) FastTextClassifier: JClass = JClass( 'zemberek.classification.FastTextClassifier') TurkishTokenizer: JClass = JClass('zemberek.tokenization.TurkishTokenizer') ScoredItem: JClass = JClass('zemberek.core.ScoredItem') Paths: JClass = JClass('java.nio.file.Paths') path: str = join('..', '..', 'data', 'classification') if not isfile(join(path, 'news-title-category-set.model')): print('Could not find a model. Training a new one...') if not isfile(join(path, 'news-title-category-set')): raise FileNotFoundError('Could not train a model!' ' Please include news-title-category-set!') call([
def testGetStaticByInstance(self): h = JClass('jpype.attr.Test1')() self.assertEqual(str(h.objectValue), "234")
# coding: utf-8 # In[1]: from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM # In[2]: startJVM(getDefaultJVMPath(), '-ea', '-Djava.class.path=zemberek-full.jar', convertStrings=False) # In[3]: Paths: JClass = JClass('java.nio.file.Paths') # In[33]: modelRoot = Paths.get("./enamex_model") # In[34]: TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology') PerceptronNer: JClass = JClass('zemberek.ner.PerceptronNer') # In[35]: morphology = TurkishMorphology.createWithDefaults() # In[36]:
def testGetNonStatic(self): h = JClass('jpype.attr.Test1')() self.assertEqual(h.stringValue, "Foo")
from jpype import JClass, getDefaultJVMPath, shutdownJVM, startJVM if __name__ == '__main__': zemberek_path: str = join('..', '..', 'Dependencies', 'Zemberek-Python', 'bin', 'zemberek-full.jar') try: startJVM(getDefaultJVMPath(), '-ea', f'-Djava.class.path={zemberek_path}', convertStrings=False) except: exit(False) TurkishSentenceExtractor: JClass = JClass( 'zemberek.tokenization.TurkishSentenceExtractor') extractor: TurkishSentenceExtractor = TurkishSentenceExtractor.DEFAULT sentences = extractor.fromParagraph(( 'Prof. Dr. Veli Davul açıklama yaptı. Kimse %6.5 lik enflasyon oranını beğenmemiş!' 'Kimse %6.5 lik enflasyon oranını beğenmemiş!' 'Oysa maçta ikinci olmuştuk... Değil mi?')) for i, word in enumerate(sentences): print(f'Sentence {i+1}: {word}') try: shutdownJVM() except: exit(False)
def testSetNonStaticValue(self): h = JClass('jpype.attr.Test1')() h.stringValue = "bar" self.assertEqual(h.stringValue, "bar")
def _system(): return JClass('java.lang.System')
def testCallWithClass(self): h = JClass('jpype.attr.Test1')() h.callWithClass(JClass('java.lang.Comparable'))
def __getattr__(self, attr): _attach_jvm_to_thread() self._lazy_load_jclass() return getattr(self._proxy, attr) def _lazy_load_jclass(self): if type(self._proxy) is str: self._proxy = JClass(self._proxy) def __call__(self, *args): self._lazy_load_jclass() if args: proxy = self._proxy(*args) else: proxy = self._proxy() return SafeJClass(proxy) # API列表 CustomDictionary = LazyLoadingJClass( 'com.hankcs.hanlp.dictionary.CustomDictionary') HanLP = SafeJClass('com.hankcs.hanlp.HanLP') HanLP.Config = JClass('com.hankcs.hanlp.HanLP$Config') PerceptronLexicalAnalyzer = SafeJClass( 'com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer') DoubleArrayTrieSegment = SafeJClass( 'com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment') AhoCorasickDoubleArrayTrie = SafeJClass( 'com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie') IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')
def testCreateDate(self): d = JClass('java.util.Date')(1448799485000) self.assertEqual(1448799485000, d.getTime())
def __init__(self, proxy): """ JClass的线程安全版 :param proxy: Java类的完整路径,或者一个Java对象 """ self._proxy = JClass(proxy) if type(proxy) is str else proxy
def testComplexMethodOvlerloading(self): c = JClass('jpype.attr.TestOverloadC')() self.assertEqual(c.foo(1), "foo(int) in C: 1") self.assertEqual(c.foo(), "foo() in A")
def _lazy_load_jclass(self): if type(self._proxy) is str: self._proxy = JClass(self._proxy)
def testCallOverloadedMethodWithCovariance(self): # This is a JDk5-specific problem. h = JClass('java.lang.StringBuffer')() h.delete(0, 0)
import re from trstop import trstop import string from typing import List from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM, java from examples import DATA_PATH, ZEMBEREK_PATH from pathlib import Path startJVM(getDefaultJVMPath(), '-ea', '-Djava.class.path=%s' % (ZEMBEREK_PATH)) TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology') TurkishSentenceNormalizer: JClass = JClass( 'zemberek.normalization.TurkishSentenceNormalizer' ) Paths: JClass = JClass('java.nio.file.Paths') morphology = TurkishMorphology.createWithDefaults() def stem(text: str) -> str: results: WordAnalysis = morphology.analyze(JString(text)) for result in results: return str(result.getLemmas()[0]) normalizer = TurkishSentenceNormalizer( TurkishMorphology.createWithDefaults(), Paths.get(str(DATA_PATH.joinpath('normalization'))),
def testCallUnicodeString(self): v = JClass('jpype.attr.Test1').testStaticString(u"a", u"b") self.assertEqual(v[0], 'a') self.assertEqual(v[1], 'b')
AD_CV_SCIKIT.append(makaleFull) Vocab += makaleN if __name__ == '__main__': np.set_printoptions(threshold=np.inf) ZEMBEREK_PATH: str = join('bin', 'zemberek-full.jar') ### ZEMBEREK INIT startJVM(getDefaultJVMPath(), '-ea', f'-Djava.class.path={ZEMBEREK_PATH}', convertStrings=False) TurkishSpellChecker: JClass = JClass( 'zemberek.normalization.TurkishSpellChecker') TurkishTokenizer: JClass = JClass('zemberek.tokenization.TurkishTokenizer') TurkishLexer: JClass = JClass('zemberek.tokenization.antlr.TurkishLexer') TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology') Token: JClass = JClass('zemberek.tokenization.Token') WordAnalysis: JClass = JClass('zemberek.morphology.analysis.WordAnalysis') tokenizer: TurkishTokenizer = TurkishTokenizer.ALL morphology: TurkishMorphology = TurkishMorphology.createWithDefaults() spell_checker: TurkishSpellChecker = TurkishSpellChecker(morphology) Paths: JClass = JClass('java.nio.file.Paths') TurkishSentenceNormalizer: JClass = JClass( 'zemberek.normalization.TurkishSentenceNormalizer') Paths: JClass = JClass('java.nio.file.Paths')