Esempio n. 1
0
 def setUp(self):
     self.mocked_src = MockedNewsSource()
     root = logging.getLogger('marbles')
     root.addHandler(logging.NullHandler())
     logger = logging.getLogger(__name__)
     self.state = MockedState(logger)
     self.svc = grpc.CcgParserService(daemon='easysrl')
Esempio n. 2
0
def make_derivations(daemon):
    global pypath, projdir, datapath, idsrch
    allfiles = []
    esrlpath = os.path.join(projdir, 'data', 'ldc', daemon, 'ccgbank')
    if not os.path.exists(esrlpath):
        os.makedirs(esrlpath)

    progress = 0
    svc = grpc.CcgParserService(daemon)
    stub = svc.open_client()

    failed_total = 0
    ldcpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data',
                           'RAW')
    dirlist = os.listdir(ldcpath)

    try:
        for fname in dirlist:
            ldcpath1 = os.path.join(ldcpath, fname)
            with open(ldcpath1, 'r') as fd:
                lines = fd.readlines()

            m = idsrch.match(os.path.basename(ldcpath1))
            if m is None:
                continue

            derivations = []
            failed_parse = []
            for ln in lines:
                # Parse with EasySRL via gRPC
                try:
                    ccg = grpc.ccg_parse(stub, ln)
                    derivations.append(safe_utf8_encode(ccg.replace('\n', '')))
                except Exception as e:
                    failed_parse.append(safe_utf8_encode(ln.strip()))
                    # Add comment so line numbers match id's
                    derivations.append(
                        safe_utf8_encode('# FAILED: ' + ln.strip()))
                progress = print_progress(progress, 10)
            id = m.group('id')
            if len(derivations) != 0:
                with open(os.path.join(esrlpath, 'ccg_derivation%s.txt' % id),
                          'w') as fd:
                    fd.write(b'\n'.join(derivations))

            failed_total += len(failed_parse)
            if len(failed_parse) != 0:
                with open(os.path.join(esrlpath, 'ccg_failed%s.txt' % id),
                          'w') as fd:
                    fd.write(b'\n'.join(failed_parse))
    finally:
        print_progress(progress, 10, done=True)
        svc.shutdown()

    if failed_total != 0:
        print('THERE WERE %d PARSE FAILURES' % failed_total)
Esempio n. 3
0
 def on_start(self, workdir):
     # Start dependent gRPC CCG parser service
     self.grpc_daemon = grpc.CcgParserService(self.grpc_daemon_name,
                                              workdir=workdir,
                                              extra_args=self.extra_args,
                                              jarfile=self.jar_file)
     # If we run multiple threads then each thread needs its own resources (S3, SQS etc).
     res = AwsNewsQueueReaderResources(self.grpc_daemon.open_client(),
                                       news_queue_name, ccg_queue_name)
     self.parsers = [AwsNewsQueueReader(res, state, CO_NO_WIKI_SEARCH)]
 def setUp(self):
     # Print log messages to console
     self.logger = logging.getLogger('marbles')
     self.logger.setLevel(logging.DEBUG)
     if DPRINT_ON:
         console_handler = logging.StreamHandler()
         console_handler.setLevel(logging.DEBUG)
         self.logger.addHandler(console_handler)
     # Load gRPC service
     self.svc = grpc.CcgParserService('neuralccg')
     self.stub = self.svc.open_client()
Esempio n. 5
0
 def on_start(self, workdir):
     # Start dependent gRPC CCG parser service
     self.grpc_daemon = gsvc.CcgParserService(
         self.grpc_daemon_name,
         workdir=workdir,
         extra_args=self.extra_args,
         jarfile=self.jar_file,
         debug=not self.state.daemonize)
     # Start InfoX gRPC service
     svc_handler = InfoxService(self.grpc_daemon.open_client(), self.state)
     self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     infox_service_pb2.add_InfoxServiceServicer_to_server(
         svc_handler, self.server)
     self.server.add_insecure_port('[::]:%d' % self.port)
     self.server.start()
Esempio n. 6
0
 def setUp(self):
     self.svc = grpc.CcgParserService('easysrl')
     self.stub = self.svc.open_client()
Esempio n. 7
0
    with open(out_file, 'w') as fd:
        for txt in lines:
            print(txt)
            fd.write(txt)
            fd.write('\n')


SVCLIST = ['neuralccg']

if __name__ == '__main__':
    idsrch = re.compile(r'[^.]+\.(?P<id>\d+)\.raw')
    estub = None
    nstub = None

    if 'easysrl' in SVCLIST:
        esvc = grpc.CcgParserService('easysrl')
        estub = esvc.open_client()
    if 'neuralccg' in SVCLIST:
        nsvc = grpc.CcgParserService('neuralccg')
        nstub = nsvc.open_client()

    try:
        allfiles = []
        autopath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data',
                                'AUTO')
        rawpath = os.path.join(projdir, 'data', 'ldc', 'ccgbank_1_1', 'data',
                               'RAW')
        mappath = os.path.join(projdir, 'data', 'ldc', 'mapping')
        outpath = os.path.join(projdir, 'data', 'ldc', 'compare')
        if not os.path.exists(outpath):
            os.makedirs(outpath)
Esempio n. 8
0
    #from marbles.ie.parse import parse_ccg_derivation
    from marbles.ie.drt.common import SHOW_LINEAR
    from marbles.ie.utils.text import preprocess_sentence

    titleRe = options.title or r'^\s*[A-Z][-A-Z\s\.]*$'
    wordsep = options.wordsep or '-'
    outfile = options.outfile or None
    daemon = options.daemon or 'easysrl'

    if len(args) == 0:
        die('missing filename')

    if daemon not in ['easysrl', 'neuralccg']:
        die('daemon must be easysrl or neuralccg')

    svc = grpc.CcgParserService(daemon)
    stub = svc.open_client()

    try:
        sessionId = grpc.DEFAULT_SESSION
        if options.ofmt is not None:
            if options.ofmt not in [
                    'ccgbank', 'html', 'logic', 'extended', 'drs'
            ]:
                die('bad output format %s, must be ccgbank|html|logic|extended'
                    % options.ofmt)
            # Create a session to match output format, default is CCGBANK
            if options.ofmt != 'ccgbank' and options.ofmt != 'drs':
                sessionId = grpc.create_session(stub, options.ofmt)

        titleSrch = re.compile(titleRe)
Esempio n. 9
0
 def setUp(self):
     self.svc = grpc.CcgParserService('neuralccg')
     self.stub = self.svc.open_client()