def read_dictionaries(validation=False): print('Verb dictionary:', PETRglobals.VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.VerbFileName) PETRreader.read_verb_dictionary(verb_path) print('Actor dictionaries:', PETRglobals.ActorFileList) for actdict in PETRglobals.ActorFileList: actor_path = utilities._get_data('data/dictionaries', actdict) PETRreader.read_actor_dictionary(actor_path) print('Agent dictionary:', PETRglobals.AgentFileName) agent_path = utilities._get_data('data/dictionaries', PETRglobals.AgentFileName) PETRreader.read_agent_dictionary(agent_path) print('Discard dictionary:', PETRglobals.DiscardFileName) discard_path = utilities._get_data('data/dictionaries', PETRglobals.DiscardFileName) PETRreader.read_discard_list(discard_path) if PETRglobals.IssueFileName != "": print('Issues dictionary:', PETRglobals.IssueFileName) issue_path = utilities._get_data('data/dictionaries', PETRglobals.IssueFileName) PETRreader.read_issue_list(issue_path)
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format(utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events, None) else: events = utilities.stanford_parse(events) updated_events = do_coding(events, None) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def read_dictionaries(validation=False): print('Verb dictionary:', PETRglobals.VerbFileName) verb_path = utilities._get_dict_data('dictionary', PETRglobals.VerbFileName) PETRreader.read_verb_dictionary(verb_path) print('Actor dictionaries:', PETRglobals.ActorFileList) for actdict in PETRglobals.ActorFileList: actor_path = utilities._get_data('data/dictionaries', actdict) PETRreader.read_actor_dictionary(actor_path) print('Agent dictionary:', PETRglobals.AgentFileName) agent_path = utilities._get_data('data/dictionaries', PETRglobals.AgentFileName) PETRreader.read_agent_dictionary(agent_path) print('Discard dictionary:', PETRglobals.DiscardFileName) discard_path = utilities._get_data('data/dictionaries', PETRglobals.DiscardFileName) PETRreader.read_discard_list(discard_path) if PETRglobals.IssueFileName != "": print('Issues dictionary:', PETRglobals.IssueFileName) issue_path = utilities._get_data('data/dictionaries', PETRglobals.IssueFileName) PETRreader.read_issue_list(issue_path)
def run_pipeline(data, out_file=None, config=None, write_output=True, parsed=False): # this is called externally utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') if config: print('Using user-specified config: {}'.format(config)) logger.info('Using user-specified config: {}'.format(config)) PETRreader.parse_Config(config) else: logger.info('Using default config file.') logger.info('Config path: {}'.format(utilities._get_data('data/config/', 'PETR_config.ini'))) PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() logger.info('Hitting read events...') events = PETRreader.read_pipeline_input(data) if parsed: logger.info('Hitting do_coding') updated_events = do_coding(events, None) # else: # events = utilities.stanford_parse(events) # updated_events = do_coding(events, None) if not write_output: output_events = PETRwriter.pipe_output(updated_events) return output_events elif write_output and not out_file: print('Please specify an output file...') logger.warning('Need an output file. ¯\_(ツ)_/¯') sys.exit() elif write_output and out_file: PETRwriter.write_events(updated_events, out_file)
def process_target(queue, cli_args, multi_log_lock): # 打印子进程启动消息 write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), u'started.')) # 子进程先读取进程运行所需各种信息 utilities.init_logger() logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() print('\n\n') out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs # 创建一个和数据库交流的session session = Session() while True: if queue.qsize > 0: # 从队列中获取一个任务 task = queue.get() # 打印日志,获取到了任务 write_multiprocess_log( multi_log_lock, '{}Process {} get one task: {}'.format(u'', os.getpid(), task)) # 执行任务 process_task(task, out, multi_log_lock, session) else: time.sleep(0.5 * random.random()) continue
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log', cli_args.debug) logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info( 'Using user-specified config: {}'.format(cli_args.config)) PETRglobals.ConfigFileName = cli_args.config PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRglobals.ConfigFileName = 'PETR_config.ini' PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.') sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out , True) ## <=== print("Coding time:", time.time() - start_time) print("Finished")
def main(): cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info( 'Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.') sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out , True) print("Coding time:", time.time() - start_time) print("Finished")
def __init__(self, config_folder='data/config/', config_file='PETR_config.ini'): #cli_args = petrarch2.parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() logger.info('Using Config file: ' + config_file) PETRreader.parse_Config(utilities._get_data(config_folder, config_file)) petrarch2.read_dictionaries()
def __init__(self, petrGlobal={}, config_folder='data/config/', config_file='PETR_config.ini'): #cli_args = petrarch2.parse_cli_args() if not petrGlobal: utilities.init_logger('PETRARCH.log', debug=False) logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() logger.info('Using Config file: '+config_file) PETRreader.parse_Config(utilities._get_data(config_folder, config_file)) petrarch_ud.read_dictionaries() print("SUCCESSFULL ON LOADING DICTIONARIES") else: print ("LOADING FROM MAP") self.load(petrGlobal)
def read_dictionaries(validation=False): print('Internal Coding Ontology:', PETRglobals.InternalCodingOntologyFileName) pico_path = utilities._get_data('data/dictionaries', PETRglobals.InternalCodingOntologyFileName) PETRreader.read_internal_coding_ontology(pico_path) print('Verb dictionary:', PETRglobals.VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.VerbFileName) PETRreader.read_verb_dictionary(verb_path) if PETRglobals.CodeWithPetrarch1: print('Petrarch 1 Verb dictionary:', PETRglobals.P1VerbFileName) verb_path = utilities._get_data( 'data/dictionaries', PETRglobals.P1VerbFileName) PETRreader.read_petrarch1_verb_dictionary(verb_path) print('Actor dictionaries:', PETRglobals.ActorFileList) for actdict in PETRglobals.ActorFileList: actor_path = utilities._get_data('data/dictionaries', actdict) PETRreader.read_actor_dictionary(actor_path) print('Agent dictionary:', PETRglobals.AgentFileList) for agentdict in PETRglobals.AgentFileList: agent_path = utilities._get_data('data/dictionaries', agentdict) PETRreader.read_agent_dictionary(agent_path) print('Discard dictionary:', PETRglobals.DiscardFileName) discard_path = utilities._get_data('data/dictionaries', PETRglobals.DiscardFileName) PETRreader.read_discard_list(discard_path) if PETRglobals.IssueFileName != "": print('Issues dictionary:', PETRglobals.IssueFileName) issue_path = utilities._get_data('data/dictionaries', PETRglobals.IssueFileName) PETRreader.read_issue_list(issue_path)
def main(cli_args=None): if not cli_args: cli_args = parse_cli_args() utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': events = run(paths, out, cli_args.parsed) else: events = run(paths, out, True) # <=== print("Coding time:", time.time() - start_time) print("Finished") return events
def main(): cli_args = parse_cli_args() """print(cli_args) sys.exit()""" utilities.init_logger('PETRARCH.log') logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.command_name == 'parse' or cli_args.command_name == 'batch': # 16.06.27: no longer needed, right? print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info( 'Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config(utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') PETRglobals.NullVerbs = True # Only get verb phrases that are not in the dictionary but are associated with coded noun phrases elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') PETRglobals.NullActors = True # Only get actor phrases that are not in the dictionary but associated with coded verb phrases PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.') sys.exit() out = "" #PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed) else: run(paths, out , True) ## <=== print("Coding time:", time.time() - start_time) print("Finished")
triple[0], basestring) else triple[0].text target = triple[1] if isinstance( triple[1], basestring) else triple[1].text others = "" for other in triple[3]: others = others + other.text + "," tuples = tuples + "source: " + source + "\ttarget: " + target + "\tverb: " + triple[ 2].text + "\tother_noun: " + others + "\n" ET.SubElement(sentence, "Triplets").text = tuples tree = ET.ElementTree(root) tree.write(outputfile, 'UTF-8') utilities.init_logger('PETRARCH.log', True) config = utilities._get_data('data/config/', 'PETR_config.ini') print("reading config") sys.stdout.write('Mk1\n') PETRreader.parse_Config(config) print("reading dicts") petrarch_ud.read_dictionaries() inputFile = sys.argv[1] #inputFile=sys.argv[1].replace(".xml","")+"_parsed.xml" outputFile = inputFile.replace("_parsed.xml", "") + "_phrase.xml" events = read_xml_input([inputFile], True) ''' print(len(events)) for key in events.keys(): print(len(events[key]['sents'])) for subkey,v in events[key]['sents'].items(): print(subkey)
def process_target_bak(q, l, first_task, cli_args, multi_log_lock): # 子进程先读取进程运行所需各种信息 utilities.init_logger() logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() print('\n\n') out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs # 创建一个和数据库交流的session session = Session() # 子进程先完成第一个任务 write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), first_task)) process_task(first_task, out, multi_log_lock, session) while l.acquire(): # 队列不为空,empty()方法不可靠,使用qsize() if q.qsize() != 0: # 从队列中获取下一个任务 task = q.get() # 任务获取完之后释放锁 l.release() # 完成获取到的任务 write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), task)) process_task(task, out, multi_log_lock, session) # 队列为空 else: # 释放锁 l.release() # 跳出循环 break write_multiprocess_log( multi_log_lock, '{}Process {}: {}'.format(u'', os.getpid(), u'exited...'))
def main(): cli_args = parse_cli_args() # miaoweixin added begin # 作为后台程序无限循环运行 if cli_args.command_name == 'background': try: # infinite loop run_in_background(cli_args) except KeyboardInterrupt: print("Program exited due to keyboard interrupt.\n") return None # miaoweixin added end utilities.init_logger() logger = logging.getLogger('petr_log') PETRglobals.RunTimeString = time.asctime() print(cli_args) if cli_args.config: print('Using user-specified config: {}'.format(cli_args.config)) logger.info('Using user-specified config: {}'.format(cli_args.config)) PETRreader.parse_Config(cli_args.config) else: logger.info('Using default config file.') PETRreader.parse_Config( utilities._get_data('data/config/', 'PETR_config.ini')) if cli_args.nullverbs: print('Coding in null verbs mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get verb phrases that are not in the dictionary but are # associated with coded noun phrases PETRglobals.NullVerbs = True elif cli_args.nullactors: print('Coding in null actors mode; no events will be generated') logger.info('Coding in null verbs mode; no events will be generated') # Only get actor phrases that are not in the dictionary but # associated with coded verb phrases PETRglobals.NullActors = True PETRglobals.NewActorLength = int(cli_args.nullactors) read_dictionaries() start_time = time.time() print('\n\n') paths = PETRglobals.TextFileList if cli_args.inputs: if os.path.isdir(cli_args.inputs): if cli_args.inputs[-1] != '/': paths = glob.glob(cli_args.inputs + '/*.xml') else: paths = glob.glob(cli_args.inputs + '*.xml') elif os.path.isfile(cli_args.inputs): paths = [cli_args.inputs] else: print( '\nFatal runtime error:\n"' + cli_args.inputs + '" could not be located\nPlease enter a valid directory or file of source texts.' ) sys.exit() elif cli_args.command_name == 'javainfo': # add else to java info 0904 paths = 'javainfo' out = "" # PETRglobals.EventFileName if cli_args.outputs: out = cli_args.outputs if cli_args.command_name == 'parse': run(paths, out, cli_args.parsed, cli_args) else: run(paths, out, True, cli_args) # <=== print("Coding time:", time.time() - start_time) print("Finished")