def main(): """Main function""" actors, verbs = parse_config() cli_args = parse_cli_args() cli_command = cli_args.command_name inputs = cli_args.inputs out_path = cli_args.output username = cli_args.username geo_boolean = cli_args.geolocate feature_boolean = cli_args.features if cli_command == 'parse': print 'Reading in data...{}:{}.{}'.format(datetime.now().now().hour, datetime.now().minute, datetime.now().second) chunk = _get_data('ubt_chunker_trained.pickle') ubt_chunker = pickle.load(open(chunk)) tag = _get_data('maxent_treebank_pos_tagger.pickle') pos_tagger = pickle.load(open(tag)) print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) events = read_data(inputs) print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) parse.parse(events, ubt_chunker, pos_tagger, cli_args.n_cores) print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) if geo_boolean or feature_boolean: print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) postprocess.process(events, username, geo_boolean, feature_boolean) print 'Done feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) event_output = str() print 'Writing the events to file...' for event in events: event_output += '\n=======================\n\n' event_output += 'event id: {}\n\n'.format(event) event_output += 'POS tagged sent:\n {}\n\n'.format(events[event]['tagged']) event_output += 'NP tagged sent:\n {}\n\n'.format(events[event]['sent_tree']) event_output += 'Noun phrases: \n {}\n'.format(events[event]['noun_phrases']) event_output += 'Verb phrases: \n {}\n\n'.format(events[event]['verb_phrases']) event_output += 'Parse time: \n {}\n'.format(events[event]['parse_chunk_time']) #event_output += 'Instantiation time: \n {}\n'.format(events[event]['parse_call_time']) if geo_boolean: event_output += '\nGeolocate: \n {}, {}\n'.format(events[event]['lat'], events[event]['lon']) if feature_boolean: event_output += 'Feature extract: \n {}\n'.format(events[event]['num_involved']) with open(out_path, 'w') as f: f.write(event_output) else: print 'Please enter a valid command!'
def process_plot(): print "Post processing" postprocess.process(csvdirname) for key, values in config.map_pid_filename.items(): print "Plot and Store: ", values["filename"] filename = csvdirname + values["filename"] print filename result_folder = resultdirname + values["filename"] intervalplot.plot(filename, seq=result_folder) files = [csvdirname + values["filename"] for key, values in config.map_pid_filename.items()] compareintervalplot.plot(files, seq=resultdirname + "compare") print "Results can be found in: " + csvdirname
def execute(benchmark): excution_path = __file__[:-10] if os.path.exists(excution_path + '/data/0'): x = [int(num) for num in os.listdir(excution_path + '/data')] x.sort() print(x) benchmark_num = x[-1] + 1 else: benchmark_num = 0 print('using folder ' + str(benchmark_num)) output_name = excution_path + '/data/' + str(benchmark_num) + '/out.csv' os.makedirs(os.path.dirname(output_name), exist_ok=True) logging_delay = benchmark.logging_delay # Create the event for when the command is done executing finished = threading.Event() # Start the logging thread and run the task, trigger the event when the task is done, ending the logging thread log = logThread(finished, output_name, logging_delay) log.start() try: score = benchmark.run() print('score', score) finally: finished.set() log.join() return postprocess.process(output_name, score)
def signal_handler(signal,frame): print "You pressed Ctrl+C" print "Post processing" postprocess.process(csvdirname) for key,values in config.map_pid_filename.items(): print "Plot and Store: ", values['filename'] filename = csvdirname+values['filename'] result_folder = resultdirname+values['filename'] intervalplot.plot(filename, seq=result_folder) files = [csvdirname+values['filename'] for key,values in config.map_pid_filename.items() ] compareintervalplot.plot(files, seq = resultdirname+'compare') print "Results can be found in: " + csvdirname sys.exit(0)
def main(): """Main function""" actors, verbs, stanford_dir = parse_config() cli_args = parse_cli_args() cli_command = cli_args.command_name inputs = cli_args.inputs out_path = cli_args.output username = cli_args.username if cli_command == 'parallel_parse': cpus = cli_args.n_cores geo_boolean = cli_args.geolocate feature_boolean = cli_args.features if cli_command == 'parse': print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) events = read_data(inputs) print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) results = parse.parse(events, stanford_dir) print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) if geo_boolean or feature_boolean: print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) postprocess.process(events, username, geo_boolean, feature_boolean) print 'Done...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) elif cli_command == 'batch_parse': print 'Running...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) results = parse.batch_parse(inputs, stanford_dir) print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) if geo_boolean or feature_boolean: print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) postprocess.process(events, username, geo_boolean, feature_boolean) print 'Done...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) elif cli_command == 'parallel_parse': import pp print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) events = read_data(inputs) ppservers = () if cpus == -1: job_server = pp.Server(ppservers=ppservers) else: job_server = pp.Server(cpus, ppservers=ppservers) chunks = list() if len(events) % int(len(events) / (job_server.get_ncpus())) == 1: chunk_size = int(len(events) / (job_server.get_ncpus()-1)) else: chunk_size = int(len(events) / (job_server.get_ncpus())) for i in xrange(0, len(events), chunk_size): chunks.append(dict(events.items()[i:i+chunk_size])) print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) jobs = [job_server.submit(parse.parse, (chunk, stanford_dir,), (parse.parse_sents,), ("corenlp", "nltk.tree", "utilities",)) for chunk in chunks] parallel_results = list() for job in jobs: parallel_results.append(job()) results = dict() for result in parallel_results: results.update(result) print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) if geo_boolean or feature_boolean: print 'Feature extraction...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) postprocess.process(events, username, geo_boolean, feature_boolean) print 'Donel..{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) else: print 'Please enter a valid command!' event_output = str() print 'Writing the events to file...' for event in results: event_output += '\n=======================\n\n' event_output += 'event id: {}\n\n'.format(event) sent_inf = results[event]['sent_info'] for sent in sent_inf['sents']: try: event_output += 'Sentence {}:\n'.format(sent) event_output += 'Word info:\n {}\n\n'.format(sent_inf['sents'][sent]['word_info']) event_output += 'Parse tree:\n {}\n\n'.format(sent_inf['sents'][sent]['parse_tree']) event_output += 'Word dependencies:\n {}\n\n'.format(sent_inf['sents'][sent]['dependencies']) event_output += 'Coref info:\n\n' try: event_output += 'Corefs:\n {}\n\n'.format(sent_inf['coref_info'][sent]['corefs']) event_output += 'Coref tree:\n {}\n\n'.format(sent_inf['sents'][sent]['coref_tree']) except KeyError: pass event_output += '----------------------\n\n' except KeyError: print 'There was a key error' print results[event].keys() print sent_inf['sents'][sent].keys() with open(out_path, 'w') as f: f.write(event_output)
def main(): """Main function""" actors, verbs = parse_config() cli_args = parse_cli_args() cli_command = cli_args.command_name inputs = cli_args.inputs out_path = cli_args.output username = cli_args.username geo_boolean = cli_args.geolocate feature_boolean = cli_args.features if cli_command == 'parse': print 'Reading in data...{}:{}.{}'.format(datetime.now().now().hour, datetime.now().minute, datetime.now().second) chunk = _get_data('ubt_chunker_trained.pickle') ubt_chunker = pickle.load(open(chunk)) tag = _get_data('maxent_treebank_pos_tagger.pickle') pos_tagger = pickle.load(open(tag)) print 'Reading in sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) events = read_data(inputs) print 'Parsing sentences...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) parse.parse(events, ubt_chunker, pos_tagger, cli_args.n_cores) print 'Done processing...{}:{}.{}'.format(datetime.now().hour, datetime.now().minute, datetime.now().second) if geo_boolean or feature_boolean: print 'Feature extraction...{}:{}.{}'.format( datetime.now().hour, datetime.now().minute, datetime.now().second) postprocess.process(events, username, geo_boolean, feature_boolean) print 'Done feature extraction...{}:{}.{}'.format( datetime.now().hour, datetime.now().minute, datetime.now().second) event_output = str() print 'Writing the events to file...' for event in events: event_output += '\n=======================\n\n' event_output += 'event id: {}\n\n'.format(event) event_output += 'POS tagged sent:\n {}\n\n'.format( events[event]['tagged']) event_output += 'NP tagged sent:\n {}\n\n'.format( events[event]['sent_tree']) event_output += 'Noun phrases: \n {}\n'.format( events[event]['noun_phrases']) event_output += 'Verb phrases: \n {}\n\n'.format( events[event]['verb_phrases']) event_output += 'Parse time: \n {}\n'.format( events[event]['parse_chunk_time']) #event_output += 'Instantiation time: \n {}\n'.format(events[event]['parse_call_time']) if geo_boolean: event_output += '\nGeolocate: \n {}, {}\n'.format( events[event]['lat'], events[event]['lon']) if feature_boolean: event_output += 'Feature extract: \n {}\n'.format( events[event]['num_involved']) with open(out_path, 'w') as f: f.write(event_output) else: print 'Please enter a valid command!'