Esempio n. 1
0
def main(file_details, geo_details, server_details, petrarch_version, logger_file=None, run_filter=None,
         run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    geo_details: Named tuple.
                  Settings for geocoding.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    if petrarch_version == '1':
        from petrarch import petrarch
        logger.info("Using original Petrarch version")
    elif petrarch_version == '2':
        from petrarch2 import petrarch2 as petrarch
        logger.info("Using Petrarch2")
    else:
        logger.error("Invalid Petrarch version. Argument must be '1' or '2'")


    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)
    if geo_details.geo_service == "Mordecai":
        dest = "{0}:{1}/places".format(geo_details.mordecai_host, geo_details.mordecai_port)
        try:
            out = requests.get(dest)
            assert out.status_code == 200
        except (AssertionError, requests.exceptions.ConnectionError):
            print("Mordecai geolocation service not responding. Continuing anyway...")
    elif geo_details.geo_service == "CLIFF":
        print("CLIFF")
    else:
        print("Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing...")


    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)
    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details, geo_details)
    else:
        print("Please specify a data version number. Program ending.")

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    try:
        uploader.main(date_string, server_details, file_details)
    except Exception as e:
        logger.warning("Error on the upload portion. {}".format(e))
        print("""Error on the uploader. This step isn't absolutely necessary.
              Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Esempio n. 2
0
def main(file_details,
         server_details,
         logger_file=None,
         run_filter=None,
         run_date='',
         version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details, process_date,
                               date_string)

    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted,
                                             write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted,
                                             write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details)
    else:
        print("Please specify a data version number. Program ending.")

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    try:
        uploader.main(date_string, server_details, file_details)
    except Exception as e:
        logger.warning("Error on the upload portion. {}".format(e))
        print("""Error on the uploader. This step isn't absolutely necessary.
              Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Esempio n. 3
0
def postprocessing(main_config_fpath):
    '''Run postprocessing'''
    print 'Postprocessing results of forward pass...'
    postprocess.main(main_config_fpath)
Esempio n. 4
0
import postprocess as PP
import GetResults as GR
import pandas as pd
import numpy as np
import time
import os

Directory = '/gpfs/homefs/artorg/ms20s284/SIMULATIONS/material-model-optimization/neo-hookean_elements-10/simulations/'
Values = pd.read_csv('costfunction_exploration/Ranges.txt',
                     sep=',',
                     decimal='.')
LastFile = 'sftp://[email protected]/gpfs/homefs/artorg/ms20s284/SIMULATIONS/material-model-optimization/neo-hookean_elements-10/simulations/sim_1330/results.pkl'

for Iteration in range(0, len(Values), 2):

    NuVal = [Values.loc[Iteration].Nu, Values.loc[Iteration + 1].Nu]
    MuVal = [Values.loc[Iteration].Mu, Values.loc[Iteration + 1].Mu]
    AlVal = [Values.loc[Iteration].Alpha, Values.loc[Iteration + 1].Alpha]

    In.Initialization(NuVal, MuVal, AlVal)

    PP.main('analyze')

    while not os.path.exists(LastFile):
        time.sleep(60)

    DataFrame = GR.GetRes(Directory)

    # Save Table
    DataFrame.to_csv('Results' + str(Iteration) + '.csv', index=False)
Esempio n. 5
0
def main(file_details, server_details, logger_file=None, run_filter=None,
         run_date=None):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details,
                               process_date, date_string)

    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        #Command to write output to a file directly from PETR
#        petrarch.run_pipeline(formatted,
#                              '{}{}.txt'.format(file_details.fullfile_stem,
#                                                date_string), parsed=True)
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted, write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    postprocess.main(formatted_results, date_string, file_details)

    logger.info("Running phox_uploader.py")
    print("Running phox_uploader.py")
    uploader.main(date_string, server_details, file_details)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Esempio n. 6
0
def main(message, logger_file=None, run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    process_date = datetime.datetime.utcnow()
    date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                              process_date.month,
                                              process_date.day)
    logger.info('Date string: {}'.format(date_string))
    print('Date string:', date_string)

    server_details = ''

    logger.info("Extracting date.")
    print("Extracting date.")
    date = formatter.get_date(message, process_date)

    logger.info("Sending to Hypnos.")
    story_id = message['entry_id']
    print(story_id)
    text = message['cleaned_text']
    headers = {'Content-Type': 'application/json'}
    payload = {'text': text, 'id': story_id, 'date': date}
    data = json.dumps(payload)
    hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR']
    hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip)
    r = requests.get(hypnos_url, data=data, headers=headers)

    print(r.status_code)

    if r.status_code == 200:
        logger.info("Running postprocess.py")
        print("Running postprocess.py")

        hypnos_res = r.json()
        print(hypnos_res)
        events = []
        for k, v in hypnos_res[story_id]['sents'].iteritems():
            if 'events' in v:
                sent = hypnos_res[story_id]['sents'][k]
                for event in v['events']:
                    event_tup = (date, event[0], event[1], event[2])
                    formatted, actors = postprocess.main(
                        event_tup, sent, version, server_details)
                    logger.info(formatted)
                    logger.info(actors)
                    print(formatted, actors)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Esempio n. 7
0
def main(message, logger_file=None, run_date='', version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    process_date = datetime.datetime.utcnow()
    date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                              process_date.month,
                                              process_date.day)
    logger.info('Date string: {}'.format(date_string))
    print('Date string:', date_string)

    server_details = ''

    logger.info("Extracting date.")
    print("Extracting date.")
    date = formatter.get_date(message, process_date)

    logger.info("Sending to Hypnos.")
    story_id = message['entry_id']
    print(story_id)
    text = message['cleaned_text']
    headers = {'Content-Type': 'application/json'}
    payload = {'text': text, 'id': story_id, 'date': date}
    data = json.dumps(payload)
    hypnos_ip = os.environ['HYPNOS_PORT_5002_TCP_ADDR']
    hypnos_url = 'http://{}:5002/hypnos/extract'.format(hypnos_ip)
    r = requests.get(hypnos_url, data=data, headers=headers)

    print(r.status_code)

    if r.status_code == 200:
        logger.info("Running postprocess.py")
        print("Running postprocess.py")

        hypnos_res = r.json()
        print(hypnos_res)
        events = []
        for k, v in hypnos_res[story_id]['sents'].iteritems():
            if 'events' in v:
                sent = hypnos_res[story_id]['sents'][k]
                for event in v['events']:
                    event_tup = (date, event[0], event[1], event[2])
                    formatted, actors = postprocess.main(event_tup, sent, version, server_details)
                    logger.info(formatted)
                    logger.info(actors)
                    print(formatted, actors)

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())
Esempio n. 8
0
def main(file_details,
         geo_details,
         server_details,
         petrarch_version,
         run_date,
         mongo_details,
         logger_file=None,
         run_filter=None,
         version=''):
    """
    Main function to run all the things.

    Parameters
    ----------

    file_details: Named tuple.
                    All the other config information not in ``server_details``.

    geo_details: Named tuple.
                  Settings for geocoding.

    server_details: Named tuple.
                    Config information specifically related to the remote
                    server for FTP uploading.

    petrarch_version: String.
                       Which version of Petrarch to use. Must be '1' or '2'

    logger_file: String.
                    Path to a log file. Defaults to ``None`` and opens a
                    ``PHOX_pipeline.log`` file in the current working
                    directory.

    run_filter: String.
                Whether to run the ``oneaday_formatter``. Takes True or False
                (strings) as values.

    run_date: String.
                Date of the format YYYYMMDD. The pipeline will run using this
                date. If not specified the pipeline will run with
                ``current_date`` minus one day.
    """
    if logger_file:
        utilities.init_logger(logger_file)
    else:
        utilities.init_logger('PHOX_pipeline.log')
    # get a local copy for the pipeline
    logger = logging.getLogger('pipeline_log')

    if petrarch_version == '1':
        from petrarch import petrarch
        logger.info("Using original Petrarch version")
    elif petrarch_version == '2':
        from petrarch2 import petrarch2 as petrarch
        logger.info("Using Petrarch2")
    else:
        logger.error("Invalid Petrarch version. Argument must be '1' or '2'")

    print('\nPHOX.pipeline run:', datetime.datetime.utcnow())

    if run_date:
        process_date = dateutil.parser.parse(run_date)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)
    else:
        process_date = datetime.datetime.utcnow() - datetime.timedelta(days=1)
        date_string = '{:02d}{:02d}{:02d}'.format(process_date.year,
                                                  process_date.month,
                                                  process_date.day)
        logger.info('Date string: {}'.format(date_string))
        print('Date string:', date_string)

    # Get the stories for the desired date from the DB
    results, scraperfilename = scraper_connection.main(process_date,
                                                       file_details)
    if geo_details.geo_service == "Mordecai":
        dest = "{0}:{1}/places".format(geo_details.mordecai_host,
                                       geo_details.mordecai_port)
        try:
            out = requests.get(dest)
            assert out.status_code == 200
        except (AssertionError, requests.exceptions.ConnectionError):
            print(
                "Mordecai geolocation service not responding. Continuing anyway..."
            )
    elif geo_details.geo_service == "CLIFF":
        print("CLIFF")
    else:
        print(
            "Invalid geo service name. Must be 'CLIFF' or 'Mordecai'. Continuing..."
        )

    if scraperfilename:
        logger.info("Scraper file name: " + scraperfilename)
        print("Scraper file name:", scraperfilename)

    logger.info("Running Mongo.formatter.py")
    print("Running Mongo.formatter.py")
    formatted = formatter.main(results, file_details, process_date,
                               date_string)
    logger.info("Running PETRARCH")
    file_details.fullfile_stem + date_string
    if run_filter == 'False':
        print('Running PETRARCH and writing to a file. No one-a-day.')
        logger.info('Running PETRARCH and writing to a file. No one-a-day.')
        # Command to write output to a file directly from PETR
        #        petrarch.run_pipeline(formatted,
        #                              '{}{}.txt'.format(file_details.fullfile_stem,
        #                                                date_string), parsed=True)

        petr_results = petrarch.run_pipeline(
            formatted,
            config="petr_config.ini",
            write_output=False,

            # DGM TEst
            # petrarch.run_pipeline(formatted, out_file = "TESTOUT.txt", config = "petr_config.ini", write_output=True,
            parsed=True)
        #sys.exit()

    elif run_filter == 'True':
        print('Running PETRARCH and returning output.')
        logger.info('Running PETRARCH and returning output.')
        petr_results = petrarch.run_pipeline(formatted,
                                             config="petr_config.ini",
                                             write_output=False,
                                             parsed=True)
    else:
        print("""Can't run with the options you've specified. You need to fix
              something.""")
        logger.warning("Can't run with the options you've specified. Exiting.")
        sys.exit()

    if run_filter == 'True':
        logger.info("Running oneaday_formatter.py")
        print("Running oneaday_formatter.py")
        formatted_results = oneaday_filter.main(petr_results)
    else:
        logger.info("Running result_formatter.py")
        print("Running result_formatter.py")
        formatted_results = result_formatter.main(petr_results)

    logger.info("Running postprocess.py")
    print("Running postprocess.py")
    if version:
        postprocess.main(formatted_results, date_string, version, file_details,
                         server_details, geo_details)
    else:
        print("Please specify a data version number. Program ending.")

    #logger.info("Running phox_uploader.py")
    # print("Running phox_uploader.py")
    # try:
    #     uploader.main(date_string, server_details, file_details)
    # except Exception as e:
    #     logger.warning("Error on the upload portion. {}".format(e))
    #     print("""Error on the uploader. This step isn't absolutely necessary.
    #           Valid events should still be generated.""")

    logger.info('PHOX.pipeline end')
    print('PHOX.pipeline end:', datetime.datetime.utcnow())