예제 #1
0
파일: main.py 프로젝트: panxiangwei/pyhanlp
def main():
    if len(sys.argv) == 1:
        sys.argv.append('--help')

    arg_parser = argparse.ArgumentParser(
        description='HanLP: Han Language Processing v{}'.format(
            HANLP_JAR_VERSION))
    arg_parser.add_argument('-v',
                            '--version',
                            required=False,
                            action='store_true',
                            help='show installed versions of HanLP')
    task_parser = arg_parser.add_subparsers(dest="task",
                                            help='which task to perform?')
    segment_parser = task_parser.add_parser(name='segment',
                                            help='word segmentation')
    tag_parser = segment_parser.add_mutually_exclusive_group(required=False)
    tag_parser.add_argument('--tag',
                            dest='tag',
                            action='store_true',
                            help='show part-of-speech tags')
    tag_parser.add_argument('--no-tag',
                            dest='tag',
                            action='store_false',
                            help='don\'t show part-of-speech tags')
    segment_parser.set_defaults(tag=True)
    segment_parser.add_argument(
        '-a',
        '--algorithm',
        type=str,
        default='viterbi',
        help='algorithm of segmentation e.g. perceptron')
    parse_parser = task_parser.add_parser(name='parse',
                                          help='dependency parsing')
    server_parser = task_parser.add_parser(
        name='serve',
        help='start http server',
        description='A http server for HanLP')
    server_parser.add_argument('--port', type=int, default=8765)
    update_parser = task_parser.add_parser(name='update',
                                           help='update jar and data of HanLP')

    def add_args(p):
        p.add_argument("--config",
                       default=PATH_CONFIG,
                       help='path to hanlp.properties')
        # p.add_argument("--action", dest="action", default='predict',
        #                help='Which action (train, test, predict)?')

    add_args(segment_parser)
    add_args(parse_parser)

    if '-v' in sys.argv or '--version' in sys.argv:
        print('jar  {}: {}'.format(HANLP_JAR_VERSION, HANLP_JAR_PATH))
        data_version = hanlp_installed_data_version()
        print('data {}: {}'.format(data_version if data_version else '自定义',
                                   HANLP_DATA_PATH))
        print('config    : {}'.format(
            os.path.join(STATIC_ROOT, 'hanlp.properties')))
        exit(0)

    args = arg_parser.parse_args()

    def eprint(*args, **kwargs):
        print(*args, file=sys.stderr, **kwargs)

    def die(msg):
        eprint(msg)
        exit(1)

    if hasattr(args, 'config') and args.config:
        if os.path.isfile(args.config):
            JClass('com.hankcs.hanlp.utility.Predefine'
                   ).HANLP_PROPERTIES_PATH = args.config
        else:
            die('Can\'t find config file {}'.format(args.config))

    if args.task == 'segment':
        segmenter = None
        try:
            segmenter = HanLP.newSegment(args.algorithm)
        except JavaException as e:
            if e.javaClass() == JClass('java.lang.IllegalArgumentException'):
                die('invalid algorithm {}'.format(args.algorithm))
            elif e.javaClass() == JClass('java.lang.RuntimeException'):
                die('failed to load required model')

        is_lexical_analyzer = hasattr(segmenter, 'analyze')
        if not args.tag:
            if is_lexical_analyzer:
                segmenter.enablePartOfSpeechTagging(False)
                JClass('com.hankcs.hanlp.HanLP$Config').ShowTermNature = False
            else:
                JClass('com.hankcs.hanlp.HanLP$Config').ShowTermNature = False
        for line in sys.stdin:
            line = line.strip()
            print(' '.join(term.toString()
                           for term in segmenter.seg(any2utf8(line))))
    elif args.task == 'parse':
        for line in sys.stdin:
            line = line.strip()
            print(HanLP.parseDependency(any2utf8(line)))
    elif args.task == 'serve':
        if PY == 3:
            from pyhanlp import server
            server.run(port=args.port)
        else:
            die('现在server.py暂时不支持Python2,欢迎参与移植')
    elif args.task == 'update':
        if hanlp_installed_data_version() == '手动安装':
            die('手动配置不支持自动升级,若要恢复自动安装,请清除HANLP相关环境变量')
        else:
            from pyhanlp.static import update_hanlp
            update_hanlp()
예제 #2
0
 def testSyntheticMethod(self):
     h = jpype.JClass('jpype.attr.SyntheticMethods$GenericImpl')()
     h.foo(JClass('java.util.ArrayList')())
예제 #3
0
    def testCallStaticUnicodeString(self):
        h = JClass('jpype.attr.Test1')()
        v = h.testString(JString(u"abcd"), JString(u"efghi"))

        self.assertEqual(v[0], 'abcd')
        self.assertEqual(v[1], 'efghi')
예제 #4
0
 def testCallSuperclassMethod(self):
     h = JClass('jpype.attr.Test2')()
     h.test2Method()
     h.test1Method()
예제 #5
0
 def testSuperToString(self):
     h = JClass('jpype.attr.Test2')()
     self.assertEqual(str(h), 'aaa')
예제 #6
0
 def testSetStaticValue(self):
     JClass('jpype.attr.Test1').objectValue = JClass('java.lang.Integer')(
         43)
     self.assertEqual(str(JClass('jpype.attr.Test1').objectValue), "43")
     JClass('jpype.attr.Test1').reset()
예제 #7
0
 def testReturnSubClass(self):
     h = JClass('jpype.attr.Test1')()
     v = h.getSubClass()
     self.assertIsInstance(v, JClass('jpype.attr.SubHolder'))
예제 #8
0
def throwByJavaException():
    JClass('jpype.exc.ExceptionTest').throwIOException()
예제 #9
0
# 	   http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
#
# *****************************************************************************

from jpype import startJVM, getDefaultJVMPath, JClass

startJVM(getDefaultJVMPath(),
         '-Djava.class.path=c:/tools/lucene-1.4.3/lucene-1.4.3.jar')

QueryParser = JClass("org.apache.lucene.queryParser.QueryParser")
IndexSearcher = JClass("org.apache.lucene.search.IndexSearcher")
IndexReader = JClass("org.apache.lucene.index.IndexReader")
StandardAnalyzer = JClass(
    "org.apache.lucene.analysis.standard.StandardAnalyzer")
FSDirectory = JClass("org.apache.lucene.store.FSDirectory")
IndexWriter = JClass("org.apache.lucene.index.IndexWriter")
SimpleAnalyzer = JClass("org.apache.lucene.analysis.SimpleAnalyzer")

IndexWriter('c:/temp/lucene', SimpleAnalyzer(), True).close()

directory = FSDirectory.getDirectory("c:/temp/lucene", False)
reader = IndexReader.open(directory)
searcher = IndexSearcher(reader)
queryparser = QueryParser.parse("wenger", "contents", StandardAnalyzer())
print(queryparser.rewrite)
예제 #10
0
 def setUp(self):
     common.JPypeTestCase.setUp(self)
     self.__jp = self.jpype.overloads
     self._aclass = JClass('jpype.overloads.Test1$A')
     self._bclass = JClass('jpype.overloads.Test1$B')
     self._cclass = JClass('jpype.overloads.Test1$C')
     self._a = self._aclass()
     self._b = self._bclass()
     self._c = self._cclass()
     self._i1impl = JClass('jpype.overloads.Test1$I1Impl')()
     self._i2impl = JClass('jpype.overloads.Test1$I2Impl')()
     self._i3impl = JClass('jpype.overloads.Test1$I3Impl')()
     self._i4impl = JClass('jpype.overloads.Test1$I4Impl')()
     self._i5impl = JClass('jpype.overloads.Test1$I5Impl')()
     self._i6impl = JClass('jpype.overloads.Test1$I6Impl')()
     self._i7impl = JClass('jpype.overloads.Test1$I7Impl')()
     self._i8impl = JClass('jpype.overloads.Test1$I8Impl')()
from os.path import join

from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM

if __name__ == '__main__':

    ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar')

    startJVM(
        getDefaultJVMPath(),
        '-ea',
        f'-Djava.class.path={ZEMBEREK_PATH}',
        convertStrings=False
    )

    TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology')
    DictionaryItem: JClass = JClass(
        'zemberek.morphology.lexicon.DictionaryItem'
    )
    RootAttribute: JClass = JClass('zemberek.core.turkish.RootAttribute')
    PrimaryPos: JClass = JClass('zemberek.core.turkish.PrimaryPos')
    SecondaryPos: JClass = JClass('zemberek.core.turkish.SecondaryPos')
    WordAnalysis: JClass = JClass('zemberek.morphology.analysis.WordAnalysis')

    morphology: TurkishMorphology = TurkishMorphology.createWithDefaults()

    def test(inp: str, new_item: DictionaryItem):
        print(f'Parses for {inp} before adding {new_item}')
        before: WordAnalysis = morphology.analyze(JString(inp))
        print_results(before)
        morphology.invalidateCache()
예제 #12
0
    def __init__(self, parent):
        QWidget.__init__(self, parent)
        self.buttonThread = QThread()
        self.node = 'gui'
        self.cost = 0
        self.hsm_node = None
        self.d = os.path.dirname(sys.modules['aui.mi'].__file__)
        self.hid = nx.read_gpickle(os.path.join(self.d,
                                                'networks/hid.gpickle'))

        self.hsm = nx.get_node_attributes(self.hid, 'HSM')
        self.hsm_evidence = {}
        self.question = None

        self.answerTimer = QTimer()
        self.answerTimer.setSingleShot(True)

        self.small_pause = QTimer()

        self.decisionFormat = QTextCharFormat()
        self.decisionFormat.setForeground(QtGui.QColor(76, 175, 80))
        self.decisionFormat.setFontWeight(QtGui.QFont.Normal)

        self.questionFormat = QTextCharFormat()
        self.questionFormat.setForeground(QtGui.QColor(48, 131, 251))
        self.questionFormat.setFontWeight(QtGui.QFont.Bold)

        self.infoFormat = QTextCharFormat()
        self.infoFormat.setFontWeight(QtGui.QFont.Normal)
        self.infoFormat.setForeground(Qt.black)

        self.evidence = {
            'battery_level': 'Ok',
            'wifi_level': 'Ok',
            'LM': 'MV',
            'focus': 'S',
            'PC': 'AV',
            'AS_visible': 'True',
            'wifi_visible': 'True',
            'battery_visible': 'True',
            'C2': 'MV',
            'C1': 'MV',
            'AV_visible': 'True',
            'GM': 'AV',
            'joystick_direction': 'Backwards',
            'SA': 'L2',
            'SL': 'medium',
            'CL': 'medium',
            'Context': 'Exploration'
        }
        self.evidence = {}

        self.decision_path = []
        jvmPath = jpype.getDefaultJVMPath()
        jarpath = os.path.join(os.path.abspath('.'),
                               '/Library/Java/Extensions/')
        jpype.startJVM(jvmPath,
                       "-Djava.class.path=/Library/Java/Extensions/smile.jar")
        self.net = JClass("smile.Network")
        self.voi = JClass("smile.ValueOfInfo")

        self.setupUi(self)
        self.initUI()
예제 #13
0
@author: Asile
"""

from os.path import join
from typing import List

from jpype import JClass, getDefaultJVMPath, java, shutdownJVM, startJVM

ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar')
startJVM(getDefaultJVMPath(),
         '-ea',
         f'-Djava.class.path={ZEMBEREK_PATH}',
         convertStrings=False)

TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology')

morphology: TurkishMorphology = TurkishMorphology.createWithDefaults()

dictionary = {
    "aç-Verb": "Açmak fiilinin emri hali.",
    "aç-Noun": "Yemek yememiş kimse."
}


def POS(pos, analysis):
    for i, analysis in enumerate(analysis, start=1):
        pos.append(f'{str(analysis.getLemmas()[0])}'
                   f'-{analysis.getPos().shortForm}')
    return pos
from os.path import isfile, join
from subprocess import call

from jpype import JClass, getDefaultJVMPath, java, shutdownJVM, startJVM

if __name__ == '__main__':

    ZEMBEREK_PATH: str = join('..', '..', 'bin', 'zemberek-full.jar')

    startJVM(getDefaultJVMPath(),
             '-ea',
             f'-Djava.class.path={ZEMBEREK_PATH}',
             convertStrings=False)

    FastTextClassifier: JClass = JClass(
        'zemberek.classification.FastTextClassifier')
    TurkishTokenizer: JClass = JClass('zemberek.tokenization.TurkishTokenizer')
    ScoredItem: JClass = JClass('zemberek.core.ScoredItem')
    Paths: JClass = JClass('java.nio.file.Paths')

    path: str = join('..', '..', 'data', 'classification')

    if not isfile(join(path, 'news-title-category-set.model')):

        print('Could not find a model. Training a new one...')

        if not isfile(join(path, 'news-title-category-set')):
            raise FileNotFoundError('Could not train a model!'
                                    ' Please include news-title-category-set!')

        call([
예제 #15
0
 def testGetStaticByInstance(self):
     h = JClass('jpype.attr.Test1')()
     self.assertEqual(str(h.objectValue), "234")
예제 #16
0
# coding: utf-8

# In[1]:

from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM

# In[2]:

startJVM(getDefaultJVMPath(),
         '-ea',
         '-Djava.class.path=zemberek-full.jar',
         convertStrings=False)

# In[3]:

Paths: JClass = JClass('java.nio.file.Paths')

# In[33]:

modelRoot = Paths.get("./enamex_model")

# In[34]:

TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology')
PerceptronNer: JClass = JClass('zemberek.ner.PerceptronNer')

# In[35]:

morphology = TurkishMorphology.createWithDefaults()

# In[36]:
예제 #17
0
 def testGetNonStatic(self):
     h = JClass('jpype.attr.Test1')()
     self.assertEqual(h.stringValue, "Foo")
예제 #18
0
from jpype import JClass, getDefaultJVMPath, shutdownJVM, startJVM

if __name__ == '__main__':

    zemberek_path: str = join('..', '..', 'Dependencies', 'Zemberek-Python',
                              'bin', 'zemberek-full.jar')

    try:
        startJVM(getDefaultJVMPath(),
                 '-ea',
                 f'-Djava.class.path={zemberek_path}',
                 convertStrings=False)
    except:
        exit(False)

    TurkishSentenceExtractor: JClass = JClass(
        'zemberek.tokenization.TurkishSentenceExtractor')

    extractor: TurkishSentenceExtractor = TurkishSentenceExtractor.DEFAULT

    sentences = extractor.fromParagraph((
        'Prof. Dr. Veli Davul açıklama yaptı. Kimse %6.5 lik enflasyon oranını beğenmemiş!'
        'Kimse %6.5 lik enflasyon oranını beğenmemiş!'
        'Oysa maçta ikinci olmuştuk... Değil mi?'))

    for i, word in enumerate(sentences):
        print(f'Sentence {i+1}: {word}')

    try:
        shutdownJVM()
    except:
        exit(False)
예제 #19
0
 def testSetNonStaticValue(self):
     h = JClass('jpype.attr.Test1')()
     h.stringValue = "bar"
     self.assertEqual(h.stringValue, "bar")
예제 #20
0
 def _system():
     return JClass('java.lang.System')
예제 #21
0
 def testCallWithClass(self):
     h = JClass('jpype.attr.Test1')()
     h.callWithClass(JClass('java.lang.Comparable'))
예제 #22
0
    def __getattr__(self, attr):
        _attach_jvm_to_thread()
        self._lazy_load_jclass()
        return getattr(self._proxy, attr)

    def _lazy_load_jclass(self):
        if type(self._proxy) is str:
            self._proxy = JClass(self._proxy)

    def __call__(self, *args):
        self._lazy_load_jclass()
        if args:
            proxy = self._proxy(*args)
        else:
            proxy = self._proxy()
        return SafeJClass(proxy)


# API列表
CustomDictionary = LazyLoadingJClass(
    'com.hankcs.hanlp.dictionary.CustomDictionary')
HanLP = SafeJClass('com.hankcs.hanlp.HanLP')
HanLP.Config = JClass('com.hankcs.hanlp.HanLP$Config')
PerceptronLexicalAnalyzer = SafeJClass(
    'com.hankcs.hanlp.model.perceptron.PerceptronLexicalAnalyzer')
DoubleArrayTrieSegment = SafeJClass(
    'com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment')
AhoCorasickDoubleArrayTrie = SafeJClass(
    'com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie')
IOUtil = SafeJClass('com.hankcs.hanlp.corpus.io.IOUtil')
예제 #23
0
 def testCreateDate(self):
     d = JClass('java.util.Date')(1448799485000)
     self.assertEqual(1448799485000, d.getTime())
예제 #24
0
 def __init__(self, proxy):
     """
     JClass的线程安全版
     :param proxy: Java类的完整路径,或者一个Java对象
     """
     self._proxy = JClass(proxy) if type(proxy) is str else proxy
예제 #25
0
 def testComplexMethodOvlerloading(self):
     c = JClass('jpype.attr.TestOverloadC')()
     self.assertEqual(c.foo(1), "foo(int) in C: 1")
     self.assertEqual(c.foo(), "foo() in A")
예제 #26
0
 def _lazy_load_jclass(self):
     if type(self._proxy) is str:
         self._proxy = JClass(self._proxy)
예제 #27
0
 def testCallOverloadedMethodWithCovariance(self):
     # This is a JDk5-specific problem.
     h = JClass('java.lang.StringBuffer')()
     h.delete(0, 0)
예제 #28
0
import re
from trstop import trstop
import string
from typing import List
from jpype import JClass, JString, getDefaultJVMPath, shutdownJVM, startJVM, java
from examples import DATA_PATH, ZEMBEREK_PATH
from pathlib import Path

startJVM(getDefaultJVMPath(), '-ea',
         '-Djava.class.path=%s' % (ZEMBEREK_PATH))

TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology')
TurkishSentenceNormalizer: JClass = JClass(
    'zemberek.normalization.TurkishSentenceNormalizer'
)


Paths: JClass = JClass('java.nio.file.Paths')

morphology = TurkishMorphology.createWithDefaults()


def stem(text: str) -> str:
    results: WordAnalysis = morphology.analyze(JString(text))
    for result in results:
        return str(result.getLemmas()[0])


normalizer = TurkishSentenceNormalizer(
    TurkishMorphology.createWithDefaults(),
    Paths.get(str(DATA_PATH.joinpath('normalization'))),
예제 #29
0
 def testCallUnicodeString(self):
     v = JClass('jpype.attr.Test1').testStaticString(u"a", u"b")
     self.assertEqual(v[0], 'a')
     self.assertEqual(v[1], 'b')
    AD_CV_SCIKIT.append(makaleFull)
    Vocab += makaleN


if __name__ == '__main__':
    np.set_printoptions(threshold=np.inf)

    ZEMBEREK_PATH: str = join('bin', 'zemberek-full.jar')

    ### ZEMBEREK INIT
    startJVM(getDefaultJVMPath(),
             '-ea',
             f'-Djava.class.path={ZEMBEREK_PATH}',
             convertStrings=False)

    TurkishSpellChecker: JClass = JClass(
        'zemberek.normalization.TurkishSpellChecker')
    TurkishTokenizer: JClass = JClass('zemberek.tokenization.TurkishTokenizer')
    TurkishLexer: JClass = JClass('zemberek.tokenization.antlr.TurkishLexer')
    TurkishMorphology: JClass = JClass('zemberek.morphology.TurkishMorphology')
    Token: JClass = JClass('zemberek.tokenization.Token')
    WordAnalysis: JClass = JClass('zemberek.morphology.analysis.WordAnalysis')

    tokenizer: TurkishTokenizer = TurkishTokenizer.ALL
    morphology: TurkishMorphology = TurkishMorphology.createWithDefaults()
    spell_checker: TurkishSpellChecker = TurkishSpellChecker(morphology)

    Paths: JClass = JClass('java.nio.file.Paths')

    TurkishSentenceNormalizer: JClass = JClass(
        'zemberek.normalization.TurkishSentenceNormalizer')
    Paths: JClass = JClass('java.nio.file.Paths')