Example #1
0
    def test_formatters(self):
        logging.config.dictConfig(self._make_dict_config(use_queues=False))
        logger = logging.getLogger("root")
        date = datetime.now()
        with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch:
            logger.critical({"date": date})
        submit_batch.assert_called_once()
        self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"], json.dumps({"date": date.isoformat()}))

        del logger.handlers[:]
        handler = CloudWatchLogHandler(json_serialize_default=str, use_queues=False)
        logger.addHandler(handler)
        with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch:
            logger.critical({"date": date})
        submit_batch.assert_called_once()
        self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"], json.dumps({"date": str(date)}))

        handler.formatter.json_serialize_default = None
        with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch:
            logger.critical({"date": date})
        submit_batch.assert_not_called()  # Error serializing message, caught and printed by logging

        del logger.handlers[:]
        handler = CloudWatchLogHandler(json_serialize_default=str, use_queues=False)
        logger.addHandler(handler)
        handler.formatter.add_log_record_attrs = ["levelname"]
        with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch:
            logger.critical("hello")
            logger.critical({"msg": "hello", "metadata": {}})
        self.assertEqual(submit_batch.call_args_list[-2].args[0][0]["message"],
                         json.dumps({"msg": "hello", "levelname": "CRITICAL"}))
        self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"],
                         json.dumps({"msg": "hello", "metadata": {}, "levelname": "CRITICAL"}))
Example #2
0
    def get_handler(cls, settings, testing=False):
        """Turn ExternalIntegration into a log handler."""
        group = settings.setting(cls.GROUP).value or cls.DEFAULT_APP_NAME
        stream = settings.setting(cls.STREAM).value or cls.DEFAULT_APP_NAME
        interval = settings.setting(cls.INTERVAL).value or cls.DEFAULT_INTERVAL
        region = settings.setting(cls.REGION).value or cls.DEFAULT_REGION
        create_group = (settings.setting(cls.CREATE_GROUP).value
                        or cls.DEFAULT_CREATE_GROUP)

        try:
            interval = int(interval)
            if interval <= 0:
                raise CannotLoadConfiguration(
                    "AWS Cloudwatch Logs interval must be a positive integer.")
        except ValueError:
            raise CannotLoadConfiguration(
                "AWS Cloudwatch Logs interval configuration must be an integer."
            )
        client = AwsClient("logs", region_name=region)
        handler = CloudWatchLogHandler(
            log_group_name=group,
            log_stream_name=stream,
            send_interval=interval,
            boto3_client=client,
            create_log_group=create_group == "TRUE",
        )

        # Add a filter that makes sure no messages from botocore are processed by
        # the cloudwatch logs integration, as these messages can lead to an infinite loop.
        class BotoFilter(logging.Filter):
            def filter(self, record):
                return not record.name.startswith("botocore")

        handler.addFilter(BotoFilter())
        return handler
def create_cw_logger(
        name,
        trace_id,
        current_frame=None,
        log_group_name=CommonConst.LOG_GROUP_NAME_STEP_FUNCTIONS,
        format_stream_name=CommonConst.FORMAT_STREAM_NAME_STEP_FUNCTIONS):
    log_stream_name = format_stream_name.format(request_id=trace_id)
    cw_logger = logging.getLogger(log_stream_name)

    env_is_running_step_functions = os.getenv('RUNNING_STEP_FUNCTION', 'False')
    is_running_step_functions = bool(strtobool(env_is_running_step_functions))
    if is_running_step_functions:
        # format message cw logger
        format = "[%(levelname)s] %(asctime)s %(message)s"
        date_format = '%Y-%m-%dT%H:%M:%S'
        msec_format = '%s.%03dZ'
        formatter = logging.Formatter(format)
        formatter.default_time_format = date_format
        formatter.default_msec_format = msec_format

        # create handler cloud watch log
        handler = CloudWatchLogHandler(stream_name=log_stream_name,
                                       log_group=log_group_name,
                                       create_log_group=False,
                                       use_queues=False)
        handler.setFormatter(formatter)
        if (len(cw_logger.handlers) == 0):
            cw_logger.addHandler(handler)
    # setting cw logger
    log_level = os.getenv('LOG_LEVEL', default='INFO')
    cw_logger.setLevel(log_level)
    function_name = inspect.getframeinfo(current_frame)[2]
    extra = {'request_id': trace_id, 'function_name': function_name}
    cw_logger_obj = CwLogAdapter(cw_logger, extra)
    return cw_logger_obj
Example #4
0
    def test_existing_log_stream_does_not_create_log_stream(self):
        log_group = "py_watchtower_test"
        log_stream = "existing_" + str(uuid.uuid4())
        logs = boto3.client("logs")
        config_dict = self._make_dict_config(
            log_group=log_group,
            stream_name=log_stream,
            use_queues=False,
        )
        logging.config.dictConfig(config_dict)
        logger = logging.getLogger("root")

        class h:
            cwl_client = logs
        CloudWatchLogHandler._idempotent_call(h, "create_log_stream", logGroupName=log_group, logStreamName=log_stream)
        self.addCleanup(
            logs.delete_log_stream,
            logGroupName=log_group,
            logStreamName=log_stream,
        )

        with mock.patch("watchtower.CloudWatchLogHandler._idempotent_call") as create_log_stream_mock:
            logger.error("message")

        create_log_stream_mock.assert_not_called()
Example #5
0
 def test_flush_safing(self):
     handler = CloudWatchLogHandler()
     logger = logging.getLogger("l")
     logger.addHandler(handler)
     handler.flush()
     logger.critical("msg")
     handler.close()
     logger.critical("msg")
 def init(cls, reset=True, **kwargs):
     """
     Class init method to set all vars
     :param bool reset:
     :param kwargs:
     """
     #print("AppSettings.init(reset={}, {})".format(reset,kwargs))
     if cls.dirty and reset:
         AppSettings.db_close()
         reset_class(AppSettings)
     if 'prefix' in kwargs and kwargs['prefix'] != cls.prefix:
         cls.__prefix_vars(kwargs['prefix'])
     cls.set_vars(**kwargs)
     test_mode_flag = os.getenv('TEST_MODE', '')
     travis_flag = os.getenv('TRAVIS_BRANCH', '')
     log_group_name = f"{'' if test_mode_flag or travis_flag else cls.prefix}tX" \
                      f"{'_DEBUG' if debug_mode_flag else ''}" \
                      f"{'_TEST' if test_mode_flag else ''}" \
                      f"{'_TravisCI' if travis_flag else ''}"
     boto3_session = Session(
         aws_access_key_id=cls.aws_access_key_id,
         aws_secret_access_key=cls.aws_secret_access_key,
         region_name=cls.aws_region_name)
     cls.watchtower_log_handler = CloudWatchLogHandler(
         boto3_session=boto3_session,
         # use_queues=False, # Because this forked process is quite transient
         log_group=log_group_name,
         stream_name=cls.name)
     setup_logger(cls.logger, cls.watchtower_log_handler,
                  logging.DEBUG if debug_mode_flag else logging.INFO)
     cls.logger.debug(
         f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{cls.aws_access_key_id[-2:]}'."
     )
Example #7
0
 def test_json_logging(self):
     handler = CloudWatchLogHandler()
     logger = logging.getLogger("json")
     logger.addHandler(handler)
     for i in range(10):
         logger.critical(
             dict(src="foo", event=str(i), stack=[1, 2, 3, i], details={}))
Example #8
0
    def get_handler(cls, settings, testing=False):
        """Turn ExternalIntegration into a log handler.
        """
        group = settings.setting(cls.GROUP).value or cls.DEFAULT_APP_NAME
        stream = settings.setting(cls.STREAM).value or cls.DEFAULT_APP_NAME
        interval = settings.setting(cls.INTERVAL).value or cls.DEFAULT_INTERVAL
        region = settings.setting(cls.REGION).value or cls.DEFAULT_REGION
        create_group = settings.setting(
            cls.CREATE_GROUP).value or cls.DEFAULT_CREATE_GROUP

        try:
            interval = int(interval)
            if interval <= 0:
                raise CannotLoadConfiguration(
                    "AWS Cloudwatch Logs interval must be a positive integer.")
        except ValueError:
            raise CannotLoadConfiguration(
                "AWS Cloudwatch Logs interval configuration must be an integer."
            )
        session = AwsSession(region_name=region)
        return CloudWatchLogHandler(log_group=group,
                                    stream_name=stream,
                                    send_interval=interval,
                                    boto3_session=session,
                                    create_log_group=create_group == "TRUE")
Example #9
0
 def test_empty_message(self):
     handler = CloudWatchLogHandler(use_queues=False)
     logger = logging.getLogger("empty")
     logger.addHandler(handler)
     with self.assertWarns(WatchtowerWarning) as cm:
         logger.critical("")
     self.assertEqual(
         str(cm.warning),
         "Received empty message. Empty messages cannot be sent to CloudWatch Logs"
     )
Example #10
0
 def test_json_logging_object_with_datetime(self):
     handler = CloudWatchLogHandler()
     logger = logging.getLogger("json")
     logger.addHandler(handler)
     for i in range(10):
         logger.critical(
             dict(src="foo",
                  event=str(i),
                  stack=[1, 2, 3, i],
                  details=dict(time=datetime(2019, 1, 1))))
Example #11
0
    def __init__(
        self,
        log_group: str,
        log_stream: str,
        aws_access_key: str,
        aws_secret_key: str,
        aws_region: str = "us-east-1",
        send_interval: int = 15,
        formatter="%(asctime)s | %(filename)s (%(lineno)d) | %(levelname)s: %(message)s",
    ):
        """Initialize CloudLogger.

        Parameters
        ----------
        log_group : str
            Name of the CloudWatch log group to write logs to.
        log_stream : str
            Name of the CloudWatch log stream to write logs to.
        aws_access_key : str
            AWS Access Key ID from AWS management console.
        aws_secret_key : str
            AWS Secret Access Key from AWS management console.
        aws_region : str
            AWS Region used for Amazon CloudWatch.
        send_interval : int
            Maximum seconds to hold messages in queue before sending a batch.
        formatter : str
            Logging log message format.
        """

        # Setup basic logging configuration
        logging.basicConfig(level=logging.INFO, format=formatter)

        # Connect to AWS Session through boto3
        boto3_session = Session(
            aws_access_key_id=aws_access_key,
            aws_secret_access_key=aws_secret_key,
            region_name=aws_region,
        )

        # Setup handler and logger objects
        self.handler = CloudWatchLogHandler(
            log_group=log_group,
            stream_name=log_stream,
            boto3_session=boto3_session,
            send_interval=send_interval,
            create_log_group=False,
            create_log_stream=False,
        )
        self.logger = logging.getLogger(log_stream)
        self.logger.addHandler(self.handler)
        connection_log = "Connected to AWS CloudWatch (%s): %s (%s)"
        self.logger.info(connection_log, aws_region, log_group, log_stream)
Example #12
0
 def test_flush_safing(self):
     handler = CloudWatchLogHandler()
     logger = logging.getLogger("l")
     logger.addHandler(handler)
     handler.flush()
     logger.critical("msg")
     handler.close()
     logger.critical("msg")
Example #13
0
 def test_flush_safing(self):
     handler = CloudWatchLogHandler()
     logger = logging.getLogger("l")
     logger.addHandler(handler)
     handler.flush()
     logger.critical("msg")
     handler.close()
     with self.assertWarns(WatchtowerWarning) as cm:
         logger.critical("msg")
     self.assertEqual(
         str(cm.warning),
         "Received message after logging system shutdown",
     )
Example #14
0
 def test_basic_pycwl_statements(self):
     h = CloudWatchLogHandler()
     loggers = []
     for i in range(5):
         logger = logging.getLogger("logger{}".format(i))
         logger.addHandler(h)
         #logger.addHandler(CloudWatchLogHandler(use_queues=False))
         loggers.append(logger)
     for i in range(10001):
         for logger in loggers:
             logger.error("test")
     import time
     time.sleep(1)
     for i in range(9000):
         for logger in loggers:
             logger.error("test")
     for i in range(1001):
         for logger in loggers:
             logger.error("test")
Example #15
0
import logging
import os
import sys

sys.path.insert(0,
                os.path.abspath(os.path.join(os.path.dirname(__file__),
                                             '..')))  # noqa
from watchtower import CloudWatchLogHandler

handler = CloudWatchLogHandler(stream_name='run_logging')
logger = logging.getLogger('run_logging')
logger.setLevel(logging.INFO)
logger.addHandler(handler)
logger.info('msg')
handler.close()
Example #16
0
 def test_empty_message(self):
     handler = CloudWatchLogHandler(use_queues=False)
     logger = logging.getLogger("empty")
     logger.addHandler(handler)
     logger.critical("")
def job(queued_json_payload:Dict[str,Any]) -> None:
    """
    This function is called by the rq package to process a job in the queue(s).
        (Don't rename this function.)

    The job is removed from the queue before the job is started,
        but if the job throws an exception or times out (timeout specified in enqueue process)
            then the job gets added to the 'failed' queue.
    """
    AppSettings.logger.info("Door43-Job-Handler received a callback" + (" (in debug mode)" if debug_mode_flag else ""))
    start_time = time.time()
    stats_client.incr(f'{callback_stats_prefix}.jobs.attempted')

    current_job = get_current_job()
    #print(f"Current job: {current_job}") # Mostly just displays the job number and payload
    #print("id",current_job.id) # Displays job number
    #print("origin",current_job.origin) # Displays queue name
    #print("meta",current_job.meta) # Empty dict

    # AppSettings.logger.info(f"Updating queue statistics…")
    our_queue= Queue(callback_queue_name, connection=current_job.connection)
    len_our_queue = len(our_queue) # Should normally sit at zero here
    # AppSettings.logger.debug(f"Queue '{callback_queue_name}' length={len_our_queue}")
    stats_client.gauge(f'"{door43_stats_prefix}.enqueue-job.callback.queue.length.current', len_our_queue)
    AppSettings.logger.info(f"Updated stats for '{door43_stats_prefix}.enqueue-job.callback.queue.length.current' to {len_our_queue}")

    #print(f"Got a job from {current_job.origin} queue: {queued_json_payload}")
    #print(f"\nGot job {current_job.id} from {current_job.origin} queue")
    #queue_prefix = 'dev-' if current_job.origin.startswith('dev-') else ''
    #assert queue_prefix == prefix
    try:
        job_descriptive_name, door43_webhook_received_at = \
                process_callback_job(prefix, queued_json_payload, current_job.connection)
    except Exception as e:
        # Catch most exceptions here so we can log them to CloudWatch
        prefixed_name = f"{prefix}Door43_Callback"
        AppSettings.logger.critical(f"{prefixed_name} threw an exception while processing: {queued_json_payload}")
        AppSettings.logger.critical(f"{e}: {traceback.format_exc()}")
        AppSettings.close_logger() # Ensure queued logs are uploaded to AWS CloudWatch
        # Now attempt to log it to an additional, separate FAILED log
        import logging
        from boto3 import Session
        from watchtower import CloudWatchLogHandler
        logger2 = logging.getLogger(prefixed_name)
        test_mode_flag = os.getenv('TEST_MODE', '')
        travis_flag = os.getenv('TRAVIS_BRANCH', '')
        log_group_name = f"FAILED_{'' if test_mode_flag or travis_flag else prefix}tX" \
                         f"{'_DEBUG' if debug_mode_flag else ''}" \
                         f"{'_TEST' if test_mode_flag else ''}" \
                         f"{'_TravisCI' if travis_flag else ''}"
        aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID']
        boto3_session = Session(aws_access_key_id=aws_access_key_id,
                            aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'],
                            region_name='us-west-2')
        failure_watchtower_log_handler = CloudWatchLogHandler(boto3_session=boto3_session,
                                                    use_queues=False,
                                                    log_group=log_group_name,
                                                    stream_name=prefixed_name)
        logger2.addHandler(failure_watchtower_log_handler)
        logger2.setLevel(logging.DEBUG)
        logger2.info(f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'.")
        logger2.critical(f"{prefixed_name} threw an exception while processing: {queued_json_payload}")
        logger2.critical(f"{e}: {traceback.format_exc()}")
        failure_watchtower_log_handler.close()
        # NOTE: following line removed as stats recording used too much disk space
        # stats_client.gauge(user_projects_invoked_string, 1) # Mark as 'failed'
        stats_client.gauge(project_types_invoked_string, 1) # Mark as 'failed'
        raise e # We raise the exception again so it goes into the failed queue

    elapsed_milliseconds = round((time.time() - start_time) * 1000)
    stats_client.timing(f'{callback_stats_prefix}.job.duration', elapsed_milliseconds)
    if elapsed_milliseconds < 2000:
        AppSettings.logger.info(f"{prefix}Door43 callback handling for {job_descriptive_name} completed in {elapsed_milliseconds:,} milliseconds.")
    else:
        AppSettings.logger.info(f"{prefix}Door43 callback handling for {job_descriptive_name} completed in {round(time.time() - start_time)} seconds.")

    # Calculate total elapsed time for the job
    total_elapsed_time = datetime.utcnow() - \
                         datetime.strptime(door43_webhook_received_at,
                                           '%Y-%m-%dT%H:%M:%SZ')
    AppSettings.logger.info(f"{prefix}Door43 total job for {job_descriptive_name} completed in {round(total_elapsed_time.total_seconds())} seconds.")
    stats_client.timing(f'{job_handler_stats_prefix}.total.job.duration', round(total_elapsed_time.total_seconds() * 1000))

    # NOTE: following line removed as stats recording used too much disk space
    # stats_client.gauge(user_projects_invoked_string, 0) # Mark as 'succeeded'
    stats_client.gauge(project_types_invoked_string, 0) # Mark as 'succeeded'
    stats_client.incr(f'{callback_stats_prefix}.jobs.succeeded')
    AppSettings.close_logger() # Ensure queued logs are uploaded to AWS CloudWatch
Example #18
0
def job(queued_json_payload: Dict[str, Any]) -> None:
    """
    This function is called by the rq package to process a job in the queue(s).
        (Don't rename this function.)

    The job is removed from the queue before the job is started,
        but if the job throws an exception or times out (timeout specified in enqueue process)
            then the job gets added to the 'failed' queue.
    """
    logger.info(MY_NAME_VERSION_STRING)
    logger.debug("tX PDF JobHandler received a job" +
                 (" (in debug mode)" if debug_mode_flag else ""))
    start_time = time()
    stats_client.incr(f'{job_handler_stats_prefix}.jobs.OBSPDF.attempted')

    logger.info(f"Clearing /tmp folder…")
    empty_folder(
        '/tmp/',
        only_prefix='tX_')  # Stops failed jobs from accumulating in /tmp

    # logger.info(f"Updating queue statistics…")
    our_queue = Queue(webhook_queue_name,
                      connection=get_current_job().connection)
    len_our_queue = len(our_queue)  # Should normally sit at zero here
    # logger.debug(f"Queue '{webhook_queue_name}' length={len_our_queue}")
    stats_client.gauge(
        f'{tx_stats_prefix}.enqueue-job.queue.OBSPDF.length.current',
        len_our_queue)
    logger.info(
        f"Updated stats for '{tx_stats_prefix}.enqueue-job.queue.OBSPDF.length.current' to {len_our_queue}"
    )

    # Save some stats
    stats_client.incr(
        f"{job_handler_stats_prefix}.jobs.OBSPDF.input.{queued_json_payload['input_format']}"
    )
    stats_client.incr(
        f"{job_handler_stats_prefix}.jobs.OBSPDF.subject.{queued_json_payload['resource_type']}"
    )

    try:
        job_descriptive_name = process_PDF_job(prefix, queued_json_payload)
    except Exception as e:
        # Catch most exceptions here so we can log them to CloudWatch
        prefixed_name = f"{prefix}tX_PDF_Job_Handler"
        logger.critical(
            f"{prefixed_name} threw an exception while processing: {queued_json_payload}"
        )
        logger.critical(f"{e}: {traceback.format_exc()}")
        main_watchtower_log_handler.close(
        )  # Ensure queued logs are uploaded to AWS CloudWatch
        # Now attempt to log it to an additional, separate FAILED log
        logger2 = logging.getLogger(prefixed_name)
        log_group_name = f"FAILED_{'' if test_mode_flag or travis_flag else prefix}tX" \
                         f"{'_DEBUG' if debug_mode_flag else ''}" \
                         f"{'_TEST' if test_mode_flag else ''}" \
                         f"{'_TravisCI' if travis_flag else ''}"
        boto3_session = Session(aws_access_key_id=aws_access_key_id,
                                aws_secret_access_key=aws_secret_access_key,
                                region_name='us-west-2')
        failure_watchtower_log_handler = CloudWatchLogHandler(
            boto3_session=boto3_session,
            use_queues=False,
            log_group=log_group_name,
            stream_name=prefixed_name)
        logger2.addHandler(failure_watchtower_log_handler)
        logger2.setLevel(logging.DEBUG)
        logger2.info(
            f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'."
        )
        logger2.critical(
            f"{prefixed_name} threw an exception while processing: {queued_json_payload}"
        )
        logger2.critical(f"{e}: {traceback.format_exc()}")
        failure_watchtower_log_handler.close()
        raise e  # We raise the exception again so it goes into the failed queue

    elapsed_milliseconds = round((time() - start_time) * 1000)
    stats_client.timing(f'{job_handler_stats_prefix}.job.OBSPDF.duration',
                        elapsed_milliseconds)
    if elapsed_milliseconds < 2000:
        logger.info(
            f"{prefix}tX job handling for {job_descriptive_name} PDF completed in {elapsed_milliseconds:,} milliseconds."
        )
    else:
        logger.info(
            f"{prefix}tX job handling for {job_descriptive_name} PDF completed in {round(time() - start_time)} seconds."
        )

    stats_client.incr(f'{job_handler_stats_prefix}.jobs.OBSPDF.completed')
    main_watchtower_log_handler.close(
    )  # Ensure queued logs are uploaded to AWS CloudWatch
Example #19
0
test_mode_flag = os.getenv('TEST_MODE', '')
travis_flag = os.getenv('TRAVIS_BRANCH', '')
log_group_name = f"{'' if test_mode_flag or travis_flag else prefix}tX" \
                    f"{'_DEBUG' if debug_mode_flag else ''}" \
                    f"{'_TEST' if test_mode_flag else ''}" \
                    f"{'_TravisCI' if travis_flag else ''}"

# Setup logging
logger = logging.getLogger(job_handler_stats_prefix)
boto3_session = Session(aws_access_key_id=aws_access_key_id,
                        aws_secret_access_key=aws_secret_access_key,
                        region_name=AWS_REGION_NAME)
main_watchtower_log_handler = CloudWatchLogHandler(
    boto3_session=boto3_session,
    # use_queues=False, # Because this forked process is quite transient
    log_group=log_group_name,
    stream_name='tX-PDF-Job-Handler')
logger.addHandler(main_watchtower_log_handler)
logger.setLevel(logging.DEBUG if debug_mode_flag else logging.INFO)
# Change these loggers to only report errors:
logging.getLogger('boto3').setLevel(logging.ERROR)
logging.getLogger('botocore').setLevel(logging.ERROR)
logger.debug(
    f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'."
)

# Get the Graphite URL from the environment, otherwise use a local test instance
graphite_url = os.getenv('GRAPHITE_HOSTNAME', 'localhost')
stats_client = StatsClient(host=graphite_url, port=8125)
Example #20
0
    def from_chemsys(cls,
                     chemsys,
                     prefix="proto-dft-2/runs",
                     n_max_atoms=20,
                     agent=None,
                     analyzer=None,
                     experiment=None,
                     log_file="campaign.log",
                     cloudwatch_group="/camd/worker/dev/"):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign
            prefix (str): prefix for s3
            n_max_atoms (int): number of maximum atoms
            agent (Agent): agent for stability campaign
            analyzer (Analyzer): analyzer for stability campaign
            experiment (Agent): experiment for stability campaign
            log_file (str): log filename
            cloudwatch_group (str): cloudwatch group to log to

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        logger = logging.Logger("camd")
        logger.setLevel("INFO")
        file_handler = logging.FileHandler(log_file)
        cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group,
                                          stream_name=chemsys)
        logger.addHandler(file_handler)
        logger.addHandler(cw_handler)
        logger.addHandler(logging.StreamHandler())

        logger.info(
            "Starting campaign factory from_chemsys {}".format(chemsys))
        s3_prefix = "{}/{}".format(prefix, chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        element_list = chemsys.split('-')
        max_coeff, charge_balanced = heuristic_setup(element_list)
        domain = StructureDomain.from_bounds(element_list,
                                             charge_balanced=charge_balanced,
                                             n_max_atoms=n_max_atoms,
                                             **{'grid': range(1, max_coeff)})
        candidate_data = domain.candidates()

        # Dump structure/candidate data
        with open('candidate_data.pickle', 'wb') as f:
            pickle.dump(candidate_data, f)
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')
        logger.info("Candidates generated")

        # Set up agents and loop parameters
        agent = agent or AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2)
        experiment = experiment or OqmdDFTonMC1(timeout=30000)
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix=s3_prefix,
                   logger=logger)
Example #21
0
    def from_chemsys(cls,
                     chemsys,
                     prefix="proto-dft-2/runs",
                     n_max_atoms=20,
                     agent=None,
                     analyzer=None,
                     experiment=None,
                     log_file="campaign.log",
                     cloudwatch_group="/camd/worker/dev/"):
        """
        Class factory method for constructing campaign from
        chemsys.

        Args:
            chemsys (str): chemical system for the campaign
            prefix (str): prefix for s3
            n_max_atoms (int): number of maximum atoms
            agent (Agent): agent for stability campaign
            analyzer (Analyzer): analyzer for stability campaign
            experiment (Agent): experiment for stability campaign
            log_file (str): log filename
            cloudwatch_group (str): cloudwatch group to log to

        Returns:
            (ProtoDFTCampaign): Standard proto-dft campaign from
                the chemical system

        """
        logger = logging.Logger("camd")
        logger.setLevel("INFO")
        file_handler = logging.FileHandler(log_file)
        cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group,
                                          stream_name=chemsys)
        logger.addHandler(file_handler)
        logger.addHandler(cw_handler)
        logger.addHandler(logging.StreamHandler())

        logger.info(
            "Starting campaign factory from_chemsys {}".format(chemsys))
        s3_prefix = "{}/{}".format(prefix, chemsys)

        # Initialize s3
        dumpfn({
            "started": datetime.now().isoformat(),
            "version": __version__
        }, "start.json")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Get structure domain
        # Check cache
        cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format(
            chemsys, n_max_atoms)
        # TODO: create test of isfile
        if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key):
            logger.info("Found cached protosearch domain.")
            candidate_data = pd.read_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Loaded cached {}.".format(cache_key))
        else:
            logger.info(
                "Generating domain with max {} atoms.".format(n_max_atoms))
            element_list = chemsys.split('-')
            max_coeff, charge_balanced = heuristic_setup(element_list)
            domain = StructureDomain.from_bounds(
                element_list,
                charge_balanced=charge_balanced,
                n_max_atoms=n_max_atoms,
                **{'grid': range(1, max_coeff)})
            candidate_data = domain.candidates()
            logger.info("Candidates generated")
            candidate_data.to_pickle("s3://{}/{}".format(
                CAMD_S3_BUCKET, cache_key))
            logger.info("Cached protosearch domain at {}.".format(cache_key))

        # Dump structure/candidate data
        candidate_data.to_pickle("candidate_data.pickle")
        s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.')

        # Set up agents and loop parameters
        agent = agent or AgentStabilityAdaBoost(
            model=MLPRegressor(hidden_layer_sizes=(84, 50)),
            n_query=10,
            hull_distance=0.2,
            exploit_fraction=1.0,
            uncertainty=True,
            alpha=0.5,
            diversify=True,
            n_estimators=20)
        analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2)
        experiment = experiment or OqmdDFTonMC1(timeout=30000,
                                                prefix_append="proto-dft")
        seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2")

        # Load cached experiments
        logger.info("Loading cached experiments")
        cached_experiments = experiment.fetch_cached(candidate_data)
        logger.info("Found {} experiments.".format(len(cached_experiments)))
        if len(cached_experiments) > 0:
            summary, seed_data = analyzer.analyze(cached_experiments,
                                                  seed_data)
            # Remove cached experiments from candidate_data
            candidate_space = candidate_data.index.difference(
                cached_experiments.index, sort=False).tolist()
            candidate_data = candidate_data.loc[candidate_space]
            logger.info("Cached experiments added to seed.")

        # Construct and start loop
        return cls(candidate_data=candidate_data,
                   agent=agent,
                   experiment=experiment,
                   analyzer=analyzer,
                   seed_data=seed_data,
                   heuristic_stopper=5,
                   s3_prefix=s3_prefix,
                   logger=logger)
Example #22
0
import logging
import os
import sys


sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))  # noqa
from watchtower import CloudWatchLogHandler

handler = CloudWatchLogHandler(stream_name='run_logging')
logger = logging.getLogger('run_logging')
logger.setLevel(logging.INFO)
logger.addHandler(handler)
logger.info('msg')
handler.close()
logger = logging.getLogger(prefixed_our_name)
sh = logging.StreamHandler(sys.stdout)
sh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s: %(message)s'))
logger.addHandler(sh)
aws_access_key_id = environ['AWS_ACCESS_KEY_ID']
boto3_session = Session(aws_access_key_id=aws_access_key_id,
                        aws_secret_access_key=environ['AWS_SECRET_ACCESS_KEY'],
                        region_name='us-west-2')
test_mode_flag = getenv('TEST_MODE', '')
travis_flag = getenv('TRAVIS_BRANCH', '')
log_group_name = f"{'' if test_mode_flag or travis_flag else prefix}tX" \
                 f"{'_DEBUG' if debug_mode_flag else ''}" \
                 f"{'_TEST' if test_mode_flag else ''}" \
                 f"{'_TravisCI' if travis_flag else ''}"
watchtower_log_handler = CloudWatchLogHandler(boto3_session=boto3_session,
                                              log_group=log_group_name,
                                              stream_name=prefixed_our_name)
logger.addHandler(watchtower_log_handler)
# Enable DEBUG logging for dev- instances (but less logging for production)
logger.setLevel(logging.DEBUG if prefix else logging.INFO)
logger.debug(
    f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'."
)

# Setup queue variables
QUEUE_NAME_SUFFIX = ''  # Used to switch to a different queue, e.g., '_1'
if prefix not in ('', DEV_PREFIX):
    logger.critical(
        f"Unexpected prefix: '{prefix}' — expected '' or '{DEV_PREFIX}'")
if prefix:  # don't use production queue
    our_adjusted_webhook_queue_name = prefixed_our_name + QUEUE_NAME_SUFFIX  # Will become our main queue name