def main(file_details, geo_details, server_details, petrarch_version, logger_file=None, run_filter=None, run_date='', version=''): """ Main function to run all the things. Parameters ---------- file_details: Named tuple. All the other config information not in ``server_details``. geo_details: Named tuple. Settings for geocoding. server_details: Named tuple. Config information specifically related to the remote server for FTP uploading. logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_filter: String. Whether to run the ``oneaday_formatter``. Takes True or False (strings) as values. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') if petrarch_version == '1': from petrarch import petrarch logger.info("Using original Petrarch version") elif petrarch_version == '2': from petrarch2 import petrarch2 as petrarch logger.info("Using Petrarch2") else: logger.error("Invalid Petrarch version. Argument must be '1' or '2'") print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) if run_date: process_date = dateutil.parser.parse(run_date) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) else: process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) results, scraperfilename = scraper_connection.main(process_date, file_details) if geo_details.geo_service == "Mordecai": dest = "{0}:{1}/places".format(geo_details.mordecai_host, geo_details.mordecai_port) try: out = requests.get(dest) assert out.status_code == 200 except (AssertionError, requests.exceptions.ConnectionError): print("Mordecai geolocation service not responding. Continuing anyway...") elif geo_details.geo_service == "CLIFF": print("CLIFF") else: print("Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing...") if scraperfilename: logger.info("Scraper file name: " + scraperfilename) print("Scraper file name:", scraperfilename) logger.info("Running Mongo.formatter.py") print("Running Mongo.formatter.py") formatted = formatter.main(results, file_details, process_date, date_string) logger.info("Running PETRARCH") file_details.fullfile_stem + date_string if run_filter == 'False': print('Running PETRARCH and writing to a file. No one-a-day.') logger.info('Running PETRARCH and writing to a file. No one-a-day.') # Command to write output to a file directly from PETR # petrarch.run_pipeline(formatted, # '{}{}.txt'.format(file_details.fullfile_stem, # date_string), parsed=True) petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) elif run_filter == 'True': print('Running PETRARCH and returning output.') logger.info('Running PETRARCH and returning output.') petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) else: print("""Can't run with the options you've specified. You need to fix something.""") logger.warning("Can't run with the options you've specified. Exiting.") sys.exit() if run_filter == 'True': logger.info("Running oneaday_formatter.py") print("Running oneaday_formatter.py") formatted_results = oneaday_filter.main(petr_results) else: logger.info("Running result_formatter.py") print("Running result_formatter.py") formatted_results = result_formatter.main(petr_results) logger.info("Running postprocess.py") print("Running postprocess.py") if version: postprocess.main(formatted_results, date_string, version, file_details, server_details, geo_details) else: print("Please specify a data version number. Program ending.") logger.info("Running phox_uploader.py") print("Running phox_uploader.py") try: uploader.main(date_string, server_details, file_details) except Exception as e: logger.warning("Error on the upload portion. {}".format(e)) print("""Error on the uploader. This step isn't absolutely necessary. Valid events should still be generated.""") logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())
def main(file_details, server_details, logger_file=None, run_filter=None, run_date='', version=''): """ Main function to run all the things. Parameters ---------- file_details: Named tuple. All the other config information not in ``server_details``. server_details: Named tuple. Config information specifically related to the remote server for FTP uploading. logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_filter: String. Whether to run the ``oneaday_formatter``. Takes True or False (strings) as values. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) if run_date: process_date = dateutil.parser.parse(run_date) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) else: process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) results, scraperfilename = scraper_connection.main(process_date, file_details) if scraperfilename: logger.info("Scraper file name: " + scraperfilename) print("Scraper file name:", scraperfilename) logger.info("Running Mongo.formatter.py") print("Running Mongo.formatter.py") formatted = formatter.main(results, file_details, process_date, date_string) logger.info("Running PETRARCH") file_details.fullfile_stem + date_string if run_filter == 'False': print('Running PETRARCH and writing to a file. No one-a-day.') logger.info('Running PETRARCH and writing to a file. No one-a-day.') # Command to write output to a file directly from PETR # petrarch.run_pipeline(formatted, # '{}{}.txt'.format(file_details.fullfile_stem, # date_string), parsed=True) petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) elif run_filter == 'True': print('Running PETRARCH and returning output.') logger.info('Running PETRARCH and returning output.') petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) else: print("""Can't run with the options you've specified. You need to fix something.""") logger.warning("Can't run with the options you've specified. Exiting.") sys.exit() if run_filter == 'True': logger.info("Running oneaday_formatter.py") print("Running oneaday_formatter.py") formatted_results = oneaday_filter.main(petr_results) else: logger.info("Running result_formatter.py") print("Running result_formatter.py") formatted_results = result_formatter.main(petr_results) logger.info("Running postprocess.py") print("Running postprocess.py") if version: postprocess.main(formatted_results, date_string, version, file_details, server_details) else: print("Please specify a data version number. Program ending.") logger.info("Running phox_uploader.py") print("Running phox_uploader.py") try: uploader.main(date_string, server_details, file_details) except Exception as e: logger.warning("Error on the upload portion. {}".format(e)) print("""Error on the uploader. This step isn't absolutely necessary. Valid events should still be generated.""") logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())
def postprocessing(main_config_fpath): '''Run postprocessing''' print 'Postprocessing results of forward pass...' postprocess.main(main_config_fpath)
import postprocess as PP import GetResults as GR import pandas as pd import numpy as np import time import os Directory = '/gpfs/homefs/artorg/ms20s284/SIMULATIONS/material-model-optimization/neo-hookean_elements-10/simulations/' Values = pd.read_csv('costfunction_exploration/Ranges.txt', sep=',', decimal='.') LastFile = 'sftp://[email protected]/gpfs/homefs/artorg/ms20s284/SIMULATIONS/material-model-optimization/neo-hookean_elements-10/simulations/sim_1330/results.pkl' for Iteration in range(0, len(Values), 2): NuVal = [Values.loc[Iteration].Nu, Values.loc[Iteration + 1].Nu] MuVal = [Values.loc[Iteration].Mu, Values.loc[Iteration + 1].Mu] AlVal = [Values.loc[Iteration].Alpha, Values.loc[Iteration + 1].Alpha] In.Initialization(NuVal, MuVal, AlVal) PP.main('analyze') while not os.path.exists(LastFile): time.sleep(60) DataFrame = GR.GetRes(Directory) # Save Table DataFrame.to_csv('Results' + str(Iteration) + '.csv', index=False)
def main(file_details, server_details, logger_file=None, run_filter=None, run_date=None): """ Main function to run all the things. Parameters ---------- file_details: Named tuple. All the other config information not in ``server_details``. server_details: Named tuple. Config information specifically related to the remote server for FTP uploading. logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_filter: String. Whether to run the ``oneaday_formatter``. Takes True or False (strings) as values. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) if run_date: process_date = dateutil.parser.parse(run_date) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) else: process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) results, scraperfilename = scraper_connection.main(process_date, file_details) if scraperfilename: logger.info("Scraper file name: " + scraperfilename) print("Scraper file name:", scraperfilename) logger.info("Running Mongo.formatter.py") print("Running Mongo.formatter.py") formatted = formatter.main(results, file_details, process_date, date_string) logger.info("Running PETRARCH") file_details.fullfile_stem + date_string if run_filter == 'False': print('Running PETRARCH and writing to a file. No one-a-day.') logger.info('Running PETRARCH and writing to a file. No one-a-day.') #Command to write output to a file directly from PETR # petrarch.run_pipeline(formatted, # '{}{}.txt'.format(file_details.fullfile_stem, # date_string), parsed=True) petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) elif run_filter == 'True': print('Running PETRARCH and returning output.') logger.info('Running PETRARCH and returning output.') petr_results = petrarch.run_pipeline(formatted, write_output=False, parsed=True) else: print("""Can't run with the options you've specified. You need to fix something.""") logger.warning("Can't run with the options you've specified. Exiting.") sys.exit() if run_filter == 'True': logger.info("Running oneaday_formatter.py") print("Running oneaday_formatter.py") formatted_results = oneaday_filter.main(petr_results) else: logger.info("Running result_formatter.py") print("Running result_formatter.py") formatted_results = result_formatter.main(petr_results) logger.info("Running postprocess.py") print("Running postprocess.py") postprocess.main(formatted_results, date_string, file_details) logger.info("Running phox_uploader.py") print("Running phox_uploader.py") uploader.main(date_string, server_details, file_details) logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())
def main(message, logger_file=None, run_date='', version=''): """ Main function to run all the things. Parameters ---------- logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) process_date = datetime.datetime.utcnow() date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) server_details = '' logger.info("Extracting date.") print("Extracting date.") date = formatter.get_date(message, process_date) logger.info("Sending to Hypnos.") story_id = message['entry_id'] print(story_id) text = message['cleaned_text'] headers = {'Content-Type': 'application/json'} payload = {'text': text, 'id': story_id, 'date': date} data = json.dumps(payload) hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR'] hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip) r = requests.get(hypnos_url, data=data, headers=headers) print(r.status_code) if r.status_code == 200: logger.info("Running postprocess.py") print("Running postprocess.py") hypnos_res = r.json() print(hypnos_res) events = [] for k, v in hypnos_res[story_id]['sents'].iteritems(): if 'events' in v: sent = hypnos_res[story_id]['sents'][k] for event in v['events']: event_tup = (date, event[0], event[1], event[2]) formatted, actors = postprocess.main( event_tup, sent, version, server_details) logger.info(formatted) logger.info(actors) print(formatted, actors) logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())
def main(message, logger_file=None, run_date='', version=''): """ Main function to run all the things. Parameters ---------- logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) process_date = datetime.datetime.utcnow() date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) server_details = '' logger.info("Extracting date.") print("Extracting date.") date = formatter.get_date(message, process_date) logger.info("Sending to Hypnos.") story_id = message['entry_id'] print(story_id) text = message['cleaned_text'] headers = {'Content-Type': 'application/json'} payload = {'text': text, 'id': story_id, 'date': date} data = json.dumps(payload) hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR'] hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip) r = requests.get(hypnos_url, data=data, headers=headers) print(r.status_code) if r.status_code == 200: logger.info("Running postprocess.py") print("Running postprocess.py") hypnos_res = r.json() print(hypnos_res) events = [] for k, v in hypnos_res[story_id]['sents'].iteritems(): if 'events' in v: sent = hypnos_res[story_id]['sents'][k] for event in v['events']: event_tup = (date, event[0], event[1], event[2]) formatted, actors = postprocess.main(event_tup, sent, version, server_details) logger.info(formatted) logger.info(actors) print(formatted, actors) logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())
def main(file_details, geo_details, server_details, petrarch_version, run_date, mongo_details, logger_file=None, run_filter=None, version=''): """ Main function to run all the things. Parameters ---------- file_details: Named tuple. All the other config information not in ``server_details``. geo_details: Named tuple. Settings for geocoding. server_details: Named tuple. Config information specifically related to the remote server for FTP uploading. petrarch_version: String. Which version of Petrarch to use. Must be '1' or '2' logger_file: String. Path to a log file. Defaults to ``None`` and opens a ``PHOX_pipeline.log`` file in the current working directory. run_filter: String. Whether to run the ``oneaday_formatter``. Takes True or False (strings) as values. run_date: String. Date of the format YYYYMMDD. The pipeline will run using this date. If not specified the pipeline will run with ``current_date`` minus one day. """ if logger_file: utilities.init_logger(logger_file) else: utilities.init_logger('PHOX_pipeline.log') # get a local copy for the pipeline logger = logging.getLogger('pipeline_log') if petrarch_version == '1': from petrarch import petrarch logger.info("Using original Petrarch version") elif petrarch_version == '2': from petrarch2 import petrarch2 as petrarch logger.info("Using Petrarch2") else: logger.error("Invalid Petrarch version. Argument must be '1' or '2'") print('\nPHOX.pipeline run:', datetime.datetime.utcnow()) if run_date: process_date = dateutil.parser.parse(run_date) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) else: process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1) date_string = '{:02d}{:02d}{:02d}'.format(process_date.year, process_date.month, process_date.day) logger.info('Date string: {}'.format(date_string)) print('Date string:', date_string) # Get the stories for the desired date from the DB results, scraperfilename = scraper_connection.main(process_date, file_details) if geo_details.geo_service == "Mordecai": dest = "{0}:{1}/places".format(geo_details.mordecai_host, geo_details.mordecai_port) try: out = requests.get(dest) assert out.status_code == 200 except (AssertionError, requests.exceptions.ConnectionError): print( "Mordecai geolocation service not responding. Continuing anyway..." ) elif geo_details.geo_service == "CLIFF": print("CLIFF") else: print( "Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing..." ) if scraperfilename: logger.info("Scraper file name: " + scraperfilename) print("Scraper file name:", scraperfilename) logger.info("Running Mongo.formatter.py") print("Running Mongo.formatter.py") formatted = formatter.main(results, file_details, process_date, date_string) logger.info("Running PETRARCH") file_details.fullfile_stem + date_string if run_filter == 'False': print('Running PETRARCH and writing to a file. No one-a-day.') logger.info('Running PETRARCH and writing to a file. No one-a-day.') # Command to write output to a file directly from PETR # petrarch.run_pipeline(formatted, # '{}{}.txt'.format(file_details.fullfile_stem, # date_string), parsed=True) petr_results = petrarch.run_pipeline( formatted, config="petr_config.ini", write_output=False, # DGM TEst # petrarch.run_pipeline(formatted, out_file = "TESTOUT.txt", config = "petr_config.ini", write_output=True, parsed=True) #sys.exit() elif run_filter == 'True': print('Running PETRARCH and returning output.') logger.info('Running PETRARCH and returning output.') petr_results = petrarch.run_pipeline(formatted, config="petr_config.ini", write_output=False, parsed=True) else: print("""Can't run with the options you've specified. You need to fix something.""") logger.warning("Can't run with the options you've specified. Exiting.") sys.exit() if run_filter == 'True': logger.info("Running oneaday_formatter.py") print("Running oneaday_formatter.py") formatted_results = oneaday_filter.main(petr_results) else: logger.info("Running result_formatter.py") print("Running result_formatter.py") formatted_results = result_formatter.main(petr_results) logger.info("Running postprocess.py") print("Running postprocess.py") if version: postprocess.main(formatted_results, date_string, version, file_details, server_details, geo_details) else: print("Please specify a data version number. Program ending.") #logger.info("Running phox_uploader.py") # print("Running phox_uploader.py") # try: # uploader.main(date_string, server_details, file_details) # except Exception as e: # logger.warning("Error on the upload portion. {}".format(e)) # print("""Error on the uploader. This step isn't absolutely necessary. # Valid events should still be generated.""") logger.info('PHOX.pipeline end') print('PHOX.pipeline end:', datetime.datetime.utcnow())