Beispiel #1
0
    def _read(self, content_iter, verbose=False, log=False, n_per_proc=None):

        # Process all the content.
        th = None
        trips_started = False
        for content in content_iter:
            if not trips_started:
                p, service_host = _start_trips()

                # Set up the trips monitor
                th = threading.Thread(target=self._monitor_trips_service,
                                      args=[p])
                th.start()
                trips_started = True

            if not self.running:
                logger.error("Breaking loop: trips is down.")
                break

            # Clean up the text string a bit.
            # - remove all excess white space.
            # - remove special greek letters
            # - remove all special unicode, replace with ascii
            raw_text = content.get_text()
            raw_text = re.sub('\s+', ' ', raw_text)
            for greek_letter, spelled_letter in greek_alphabet.items():
                raw_text = raw_text.replace(greek_letter, spelled_letter)
            text = unidecode(raw_text)

            # Process the text
            html = client.send_query(text,
                                     service_host=service_host,
                                     service_endpoint=service_endpoint)
            if html:
                xml = client.get_xml(html)
                self.add_result(content.get_id(), xml)
            else:
                self.add_result(content.get_id(), None)

        # Stop TRIPS if it hasn't stopped already.
        logger.info("Killing TRIPS")
        _kill_trips()  # Kill all instances of trips-drum.

        logger.info("Signalling observation loop to stop.")
        self.stopping = True  # Sends signal to the loop to stop

        if th is not None:
            logger.info("Waiting for observation loop thread to join.")
            th.join(timeout=5)
            if th.is_alive():
                logger.warning("Thread did not end, TRIPS is still running.")

        return self.results
Beispiel #2
0
def get_example_extractions(fname):
    "Get extractions from one of the examples in `cag_examples`."
    with open(fname, 'r') as f:
        sentences = f.read().splitlines()
    rdf_xml_dict = {}
    for sentence in sentences:
        logger.info("Reading \"%s\"..." % sentence)
        html = tc.send_query(sentence, 'cwms')
        try:
            rdf_xml_dict[sentence] = tc.get_xml(html, 'rdf:RDF',
                                                fail_if_empty=True)
        except AssertionError as e:
            logger.error("Got error for %s." % sentence)
            logger.exception(e)
    return rdf_xml_dict
Beispiel #3
0
def get_example_extractions(fname):
    "Get extractions from one of the examples in `cag_examples`."
    with open(fname, 'r') as f:
        sentences = f.read().splitlines()
    rdf_xml_dict = {}
    for sentence in sentences:
        logger.info("Reading \"%s\"..." % sentence)
        html = tc.send_query(sentence, 'cwms')
        try:
            rdf_xml_dict[sentence] = tc.get_xml(html,
                                                'rdf:RDF',
                                                fail_if_empty=True)
        except AssertionError as e:
            logger.error("Got error for %s." % sentence)
            logger.exception(e)
    return rdf_xml_dict
Beispiel #4
0
def process_text(text,
                 save_xml_name='trips_output.xml',
                 save_xml_pretty=True,
                 offline=False,
                 service_endpoint='drum',
                 service_host=None):
    """Return a TripsProcessor by processing text.

    Parameters
    ----------
    text : str
        The text to be processed.
    save_xml_name : Optional[str]
        The name of the file to save the returned TRIPS extraction knowledge
        base XML. Default: trips_output.xml
    save_xml_pretty : Optional[bool]
        If True, the saved XML is pretty-printed. Some third-party tools
        require non-pretty-printed XMLs which can be obtained by setting this
        to False. Default: True
    offline : Optional[bool]
        If True, offline reading is used with a local instance of DRUM, if
        available. Default: False
    service_endpoint : Optional[str]
        Selects the TRIPS/DRUM web service endpoint to use. Is a choice between
        "drum" (default) and "drum-dev", a nightly build.
    service_host : Optional[str]
        Address of a service host different from the public IHMC server (e.g., a
        locally running service).

    Returns
    -------
    tp : TripsProcessor
        A TripsProcessor containing the extracted INDRA Statements
        in tp.statements.
    """
    if not offline:
        html = client.send_query(text,
                                 service_endpoint=service_endpoint,
                                 service_host=service_host)
        xml = client.get_xml(html)
    else:
        if offline_reading:
            try:
                dr = DrumReader()
                if dr is None:
                    raise Exception('DrumReader could not be instantiated.')
            except BaseException as e:
                logger.error(e)
                logger.error('Make sure drum/bin/trips-drum is running in'
                             ' a separate process')
                return None
            try:
                dr.read_text(text)
                dr.start()
            except SystemExit:
                pass
            xml = dr.extractions[0]
        else:
            logger.error('Offline reading with TRIPS/DRUM not available.')
            logger.error('Error message was: %s' % offline_err)
            msg = """
                To install DRUM locally, follow instructions at
                https://github.com/wdebeaum/drum.
                Next, install the pykqml package either from pip or from
                https://github.com/bgyori/pykqml.
                Once installed, run drum/bin/trips-drum in a separate process.
                """
            logger.error(msg)
            return None
    if save_xml_name:
        client.save_xml(xml, save_xml_name, save_xml_pretty)
    return process_xml(xml)
Beispiel #5
0
def process_text(text, save_xml_name='trips_output.xml', save_xml_pretty=True,
                 offline=False, service_endpoint='drum'):
    """Return a TripsProcessor by processing text.

    Parameters
    ----------
    text : str
        The text to be processed.
    save_xml_name : Optional[str]
        The name of the file to save the returned TRIPS extraction knowledge
        base XML. Default: trips_output.xml
    save_xml_pretty : Optional[bool]
        If True, the saved XML is pretty-printed. Some third-party tools
        require non-pretty-printed XMLs which can be obtained by setting this
        to False. Default: True
    offline : Optional[bool]
        If True, offline reading is used with a local instance of DRUM, if
        available. Default: False
    service_endpoint : Optional[str]
        Selects the TRIPS/DRUM web service endpoint to use. Is a choice between
        "drum" (default) and "drum-dev", a nightly build.

    Returns
    -------
    tp : TripsProcessor
        A TripsProcessor containing the extracted INDRA Statements
        in tp.statements.
    """
    if not offline:
        html = client.send_query(text, service_endpoint)
        xml = client.get_xml(html)
    else:
        if offline_reading:
            try:
                dr = DrumReader()
                if dr is None:
                    raise Exception('DrumReader could not be instantiated.')
            except BaseException as e:
                logger.error(e)
                logger.error('Make sure drum/bin/trips-drum is running in'
                              ' a separate process')
                return None
            try:
                dr.read_text(text)
                dr.start()
            except SystemExit:
                pass
            xml = dr.extractions[0]
        else:
            logger.error('Offline reading with TRIPS/DRUM not available.')
            logger.error('Error message was: %s' % offline_err)
            msg = """
                To install DRUM locally, follow instructions at
                https://github.com/wdebeaum/drum.
                Next, install the pykqml package either from pip or from
                https://github.com/bgyori/pykqml.
                Once installed, run drum/bin/trips-drum in a separate process.
                """
            logger.error(msg)
            return None
    if save_xml_name:
        client.save_xml(xml, save_xml_name, save_xml_pretty)
    return process_xml(xml)