Beispiel #1
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    #     if not s_parsed:
    #         events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    PETRwriter.write_events(updated_events, 'evts.' + out_file)
Beispiel #2
0
def run_pipeline(data, out_file=None, config=None, write_output=True,
                 parsed=False):
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(utilities._get_data('data/config/',
                                                                 'PETR_config.ini')))
        PETRreader.parse_Config(utilities._get_data('data/config/',
                                                    'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events, None)
    else:
        events = utilities.stanford_parse(events)
        updated_events = do_coding(events, None)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
Beispiel #3
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    print(filepaths)
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)

    #print("events_input:",events)
    flag = gna

    if flag:
        get_nullactor(events)

    else:
        updated_events = do_coding(events)
        print("update_event:")
        # print(json.dumps(updated_events, ensure_ascii=False, encoding='utf-8'))
        if PETRglobals.NullVerbs:
            output_event = PETRwriter.write_nullverbs(updated_events,
                                                      'nullverbs.' + out_file)
        elif PETRglobals.NullActors:
            output_event = PETRwriter.write_nullactors(updated_events,
                                                       'nullactors.txt')
        else:
            output_event = PETRwriter.write_events(updated_events, out_file)
        return output_event
Beispiel #4
0
def run(filepaths, out_file, s_parsed, sub_command_args):
    # this is the routine called from main()
    events = []
    if filepaths == 'javainfo':
        events = PETRreader.read_story_input(sub_command_args.story_content,
                                             sub_command_args.story_title,
                                             sub_command_args.story_date,
                                             sub_command_args.story_src,
                                             sub_command_args.story_url,
                                             sub_command_args.story_id)
        # The StanfordCoreNLP calling in read_story_input has a side effect that a StreamHandler was left,
        # which is owned by the root logger.
        # Remove all handlers associated with the root logger object.
        while len(logging.root.handlers) > 0:
            logging.root.removeHandler(logging.root.handlers[-1])
    else:
        events = PETRreader.read_xml_input(filepaths, s_parsed)
    print("events before coding:", events)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events)
    print("updated_events after coding:", updated_events)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        #         PETRwriter.write_events(updated_events, 'evts.' + out_file)
        #         databasewriter.write_events_to_db(updated_events, 'evts.' + out_file)
        print("updated_events:")
        print(updated_events)
        databasewriter.write_events(updated_events, None, False)
Beispiel #5
0
def process_task(one_task, out_file, multi_log_lock, session):
    events = {}
    #story_date = str(one_task['publishDate'])
    story_date = one_task['publishDate']
    try:
        story_date = time.strptime(story_date, "%Y%m%d%H%M%S")
    except Exception:
        story_date = time.strftime('%Y%m%d%H%M%S',
                                   time.localtime(time.time()))  #todo

    # story_content, story_title, story_date, story_src, story_url
    events = PETRreader.read_story_input(one_task['content'],
                                         one_task['title'], story_date,
                                         one_task['siteName'],
                                         one_task['pageUrl'], one_task['id'])
    # The StanfordCoreNLP calling in read_story_input has a side effect that a StreamHandler was left,
    # which is owned by the root logger.
    # Remove all handlers associated with the root logger object.
    while len(logging.root.handlers) > 0:
        logging.root.removeHandler(logging.root.handlers[-1])
    updated_events = do_coding(events)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        databasewriter.write_events(updated_events, multi_log_lock, session,
                                    True)
Beispiel #6
0
def run_pipeline(data, out_file=None, config=None, write_output=True,
                 parsed=False):
    # this is called externally
    utilities.init_logger('PETRARCH.log')
    logger = logging.getLogger('petr_log')
    if config:
        print('Using user-specified config: {}'.format(config))
        logger.info('Using user-specified config: {}'.format(config))
        PETRreader.parse_Config(config)
    else:
        logger.info('Using default config file.')
        logger.info('Config path: {}'.format(utilities._get_data('data/config/',
                                                                 'PETR_config.ini')))
        PETRreader.parse_Config(utilities._get_data('data/config/',
                                                    'PETR_config.ini'))

    read_dictionaries()

    logger.info('Hitting read events...')
    events = PETRreader.read_pipeline_input(data)
    if parsed:
        logger.info('Hitting do_coding')
        updated_events = do_coding(events, None)
#     else:
#         events = utilities.stanford_parse(events)
#         updated_events = do_coding(events, None)
    if not write_output:
        output_events = PETRwriter.pipe_output(updated_events)
        return output_events
    elif write_output and not out_file:
        print('Please specify an output file...')
        logger.warning('Need an output file. ¯\_(ツ)_/¯')
        sys.exit()
    elif write_output and out_file:
        PETRwriter.write_events(updated_events, out_file)
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
#     if not s_parsed:
#         events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    PETRwriter.write_events(updated_events, 'evts.' + out_file)
Beispiel #8
0
def run(filepaths, out_file, s_parsed):
    logger = logging.getLogger('petr_log')

    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    logger.debug("Incoming data from XML: ", events)
    # if not s_parsed:
    #    events = utilities.stanford_parse(events)
    updated_events = do_coding(events)

    PETRwriter.write_events(updated_events, 'evts.' + out_file)
Beispiel #9
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        PETRwriter.write_events(updated_events, 'evts.' + out_file)
Beispiel #10
0
def run(filepaths, out_file, s_parsed):
    # this is the routine called from main()
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    if PETRglobals.NullVerbs:
        PETRwriter.write_nullverbs(updated_events, 'nullverbs.' + out_file)
    elif PETRglobals.NullActors:
        PETRwriter.write_nullactors(updated_events, 'nullactors.' + out_file)
    else:
        PETRwriter.write_events(updated_events, 'evts.' + out_file)
Beispiel #11
0
def run(filepaths, out_file, s_parsed):
    events = PETRreader.read_xml_input(filepaths, s_parsed)
    if not s_parsed:
        events = utilities.stanford_parse(events)
    updated_events = do_coding(events, out_file)
    PETRwriter.write_events(updated_events, out_file)