def test_formatters(self): logging.config.dictConfig(self._make_dict_config(use_queues=False)) logger = logging.getLogger("root") date = datetime.now() with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch: logger.critical({"date": date}) submit_batch.assert_called_once() self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"], json.dumps({"date": date.isoformat()})) del logger.handlers[:] handler = CloudWatchLogHandler(json_serialize_default=str, use_queues=False) logger.addHandler(handler) with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch: logger.critical({"date": date}) submit_batch.assert_called_once() self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"], json.dumps({"date": str(date)})) handler.formatter.json_serialize_default = None with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch: logger.critical({"date": date}) submit_batch.assert_not_called() # Error serializing message, caught and printed by logging del logger.handlers[:] handler = CloudWatchLogHandler(json_serialize_default=str, use_queues=False) logger.addHandler(handler) handler.formatter.add_log_record_attrs = ["levelname"] with mock.patch("watchtower.CloudWatchLogHandler._submit_batch") as submit_batch: logger.critical("hello") logger.critical({"msg": "hello", "metadata": {}}) self.assertEqual(submit_batch.call_args_list[-2].args[0][0]["message"], json.dumps({"msg": "hello", "levelname": "CRITICAL"})) self.assertEqual(submit_batch.call_args_list[-1].args[0][0]["message"], json.dumps({"msg": "hello", "metadata": {}, "levelname": "CRITICAL"}))
def get_handler(cls, settings, testing=False): """Turn ExternalIntegration into a log handler.""" group = settings.setting(cls.GROUP).value or cls.DEFAULT_APP_NAME stream = settings.setting(cls.STREAM).value or cls.DEFAULT_APP_NAME interval = settings.setting(cls.INTERVAL).value or cls.DEFAULT_INTERVAL region = settings.setting(cls.REGION).value or cls.DEFAULT_REGION create_group = (settings.setting(cls.CREATE_GROUP).value or cls.DEFAULT_CREATE_GROUP) try: interval = int(interval) if interval <= 0: raise CannotLoadConfiguration( "AWS Cloudwatch Logs interval must be a positive integer.") except ValueError: raise CannotLoadConfiguration( "AWS Cloudwatch Logs interval configuration must be an integer." ) client = AwsClient("logs", region_name=region) handler = CloudWatchLogHandler( log_group_name=group, log_stream_name=stream, send_interval=interval, boto3_client=client, create_log_group=create_group == "TRUE", ) # Add a filter that makes sure no messages from botocore are processed by # the cloudwatch logs integration, as these messages can lead to an infinite loop. class BotoFilter(logging.Filter): def filter(self, record): return not record.name.startswith("botocore") handler.addFilter(BotoFilter()) return handler
def create_cw_logger( name, trace_id, current_frame=None, log_group_name=CommonConst.LOG_GROUP_NAME_STEP_FUNCTIONS, format_stream_name=CommonConst.FORMAT_STREAM_NAME_STEP_FUNCTIONS): log_stream_name = format_stream_name.format(request_id=trace_id) cw_logger = logging.getLogger(log_stream_name) env_is_running_step_functions = os.getenv('RUNNING_STEP_FUNCTION', 'False') is_running_step_functions = bool(strtobool(env_is_running_step_functions)) if is_running_step_functions: # format message cw logger format = "[%(levelname)s] %(asctime)s %(message)s" date_format = '%Y-%m-%dT%H:%M:%S' msec_format = '%s.%03dZ' formatter = logging.Formatter(format) formatter.default_time_format = date_format formatter.default_msec_format = msec_format # create handler cloud watch log handler = CloudWatchLogHandler(stream_name=log_stream_name, log_group=log_group_name, create_log_group=False, use_queues=False) handler.setFormatter(formatter) if (len(cw_logger.handlers) == 0): cw_logger.addHandler(handler) # setting cw logger log_level = os.getenv('LOG_LEVEL', default='INFO') cw_logger.setLevel(log_level) function_name = inspect.getframeinfo(current_frame)[2] extra = {'request_id': trace_id, 'function_name': function_name} cw_logger_obj = CwLogAdapter(cw_logger, extra) return cw_logger_obj
def test_existing_log_stream_does_not_create_log_stream(self): log_group = "py_watchtower_test" log_stream = "existing_" + str(uuid.uuid4()) logs = boto3.client("logs") config_dict = self._make_dict_config( log_group=log_group, stream_name=log_stream, use_queues=False, ) logging.config.dictConfig(config_dict) logger = logging.getLogger("root") class h: cwl_client = logs CloudWatchLogHandler._idempotent_call(h, "create_log_stream", logGroupName=log_group, logStreamName=log_stream) self.addCleanup( logs.delete_log_stream, logGroupName=log_group, logStreamName=log_stream, ) with mock.patch("watchtower.CloudWatchLogHandler._idempotent_call") as create_log_stream_mock: logger.error("message") create_log_stream_mock.assert_not_called()
def test_flush_safing(self): handler = CloudWatchLogHandler() logger = logging.getLogger("l") logger.addHandler(handler) handler.flush() logger.critical("msg") handler.close() logger.critical("msg")
def init(cls, reset=True, **kwargs): """ Class init method to set all vars :param bool reset: :param kwargs: """ #print("AppSettings.init(reset={}, {})".format(reset,kwargs)) if cls.dirty and reset: AppSettings.db_close() reset_class(AppSettings) if 'prefix' in kwargs and kwargs['prefix'] != cls.prefix: cls.__prefix_vars(kwargs['prefix']) cls.set_vars(**kwargs) test_mode_flag = os.getenv('TEST_MODE', '') travis_flag = os.getenv('TRAVIS_BRANCH', '') log_group_name = f"{'' if test_mode_flag or travis_flag else cls.prefix}tX" \ f"{'_DEBUG' if debug_mode_flag else ''}" \ f"{'_TEST' if test_mode_flag else ''}" \ f"{'_TravisCI' if travis_flag else ''}" boto3_session = Session( aws_access_key_id=cls.aws_access_key_id, aws_secret_access_key=cls.aws_secret_access_key, region_name=cls.aws_region_name) cls.watchtower_log_handler = CloudWatchLogHandler( boto3_session=boto3_session, # use_queues=False, # Because this forked process is quite transient log_group=log_group_name, stream_name=cls.name) setup_logger(cls.logger, cls.watchtower_log_handler, logging.DEBUG if debug_mode_flag else logging.INFO) cls.logger.debug( f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{cls.aws_access_key_id[-2:]}'." )
def test_json_logging(self): handler = CloudWatchLogHandler() logger = logging.getLogger("json") logger.addHandler(handler) for i in range(10): logger.critical( dict(src="foo", event=str(i), stack=[1, 2, 3, i], details={}))
def get_handler(cls, settings, testing=False): """Turn ExternalIntegration into a log handler. """ group = settings.setting(cls.GROUP).value or cls.DEFAULT_APP_NAME stream = settings.setting(cls.STREAM).value or cls.DEFAULT_APP_NAME interval = settings.setting(cls.INTERVAL).value or cls.DEFAULT_INTERVAL region = settings.setting(cls.REGION).value or cls.DEFAULT_REGION create_group = settings.setting( cls.CREATE_GROUP).value or cls.DEFAULT_CREATE_GROUP try: interval = int(interval) if interval <= 0: raise CannotLoadConfiguration( "AWS Cloudwatch Logs interval must be a positive integer.") except ValueError: raise CannotLoadConfiguration( "AWS Cloudwatch Logs interval configuration must be an integer." ) session = AwsSession(region_name=region) return CloudWatchLogHandler(log_group=group, stream_name=stream, send_interval=interval, boto3_session=session, create_log_group=create_group == "TRUE")
def test_empty_message(self): handler = CloudWatchLogHandler(use_queues=False) logger = logging.getLogger("empty") logger.addHandler(handler) with self.assertWarns(WatchtowerWarning) as cm: logger.critical("") self.assertEqual( str(cm.warning), "Received empty message. Empty messages cannot be sent to CloudWatch Logs" )
def test_json_logging_object_with_datetime(self): handler = CloudWatchLogHandler() logger = logging.getLogger("json") logger.addHandler(handler) for i in range(10): logger.critical( dict(src="foo", event=str(i), stack=[1, 2, 3, i], details=dict(time=datetime(2019, 1, 1))))
def __init__( self, log_group: str, log_stream: str, aws_access_key: str, aws_secret_key: str, aws_region: str = "us-east-1", send_interval: int = 15, formatter="%(asctime)s | %(filename)s (%(lineno)d) | %(levelname)s: %(message)s", ): """Initialize CloudLogger. Parameters ---------- log_group : str Name of the CloudWatch log group to write logs to. log_stream : str Name of the CloudWatch log stream to write logs to. aws_access_key : str AWS Access Key ID from AWS management console. aws_secret_key : str AWS Secret Access Key from AWS management console. aws_region : str AWS Region used for Amazon CloudWatch. send_interval : int Maximum seconds to hold messages in queue before sending a batch. formatter : str Logging log message format. """ # Setup basic logging configuration logging.basicConfig(level=logging.INFO, format=formatter) # Connect to AWS Session through boto3 boto3_session = Session( aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key, region_name=aws_region, ) # Setup handler and logger objects self.handler = CloudWatchLogHandler( log_group=log_group, stream_name=log_stream, boto3_session=boto3_session, send_interval=send_interval, create_log_group=False, create_log_stream=False, ) self.logger = logging.getLogger(log_stream) self.logger.addHandler(self.handler) connection_log = "Connected to AWS CloudWatch (%s): %s (%s)" self.logger.info(connection_log, aws_region, log_group, log_stream)
def test_flush_safing(self): handler = CloudWatchLogHandler() logger = logging.getLogger("l") logger.addHandler(handler) handler.flush() logger.critical("msg") handler.close() with self.assertWarns(WatchtowerWarning) as cm: logger.critical("msg") self.assertEqual( str(cm.warning), "Received message after logging system shutdown", )
def test_basic_pycwl_statements(self): h = CloudWatchLogHandler() loggers = [] for i in range(5): logger = logging.getLogger("logger{}".format(i)) logger.addHandler(h) #logger.addHandler(CloudWatchLogHandler(use_queues=False)) loggers.append(logger) for i in range(10001): for logger in loggers: logger.error("test") import time time.sleep(1) for i in range(9000): for logger in loggers: logger.error("test") for i in range(1001): for logger in loggers: logger.error("test")
import logging import os import sys sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) # noqa from watchtower import CloudWatchLogHandler handler = CloudWatchLogHandler(stream_name='run_logging') logger = logging.getLogger('run_logging') logger.setLevel(logging.INFO) logger.addHandler(handler) logger.info('msg') handler.close()
def test_empty_message(self): handler = CloudWatchLogHandler(use_queues=False) logger = logging.getLogger("empty") logger.addHandler(handler) logger.critical("")
def job(queued_json_payload:Dict[str,Any]) -> None: """ This function is called by the rq package to process a job in the queue(s). (Don't rename this function.) The job is removed from the queue before the job is started, but if the job throws an exception or times out (timeout specified in enqueue process) then the job gets added to the 'failed' queue. """ AppSettings.logger.info("Door43-Job-Handler received a callback" + (" (in debug mode)" if debug_mode_flag else "")) start_time = time.time() stats_client.incr(f'{callback_stats_prefix}.jobs.attempted') current_job = get_current_job() #print(f"Current job: {current_job}") # Mostly just displays the job number and payload #print("id",current_job.id) # Displays job number #print("origin",current_job.origin) # Displays queue name #print("meta",current_job.meta) # Empty dict # AppSettings.logger.info(f"Updating queue statistics…") our_queue= Queue(callback_queue_name, connection=current_job.connection) len_our_queue = len(our_queue) # Should normally sit at zero here # AppSettings.logger.debug(f"Queue '{callback_queue_name}' length={len_our_queue}") stats_client.gauge(f'"{door43_stats_prefix}.enqueue-job.callback.queue.length.current', len_our_queue) AppSettings.logger.info(f"Updated stats for '{door43_stats_prefix}.enqueue-job.callback.queue.length.current' to {len_our_queue}") #print(f"Got a job from {current_job.origin} queue: {queued_json_payload}") #print(f"\nGot job {current_job.id} from {current_job.origin} queue") #queue_prefix = 'dev-' if current_job.origin.startswith('dev-') else '' #assert queue_prefix == prefix try: job_descriptive_name, door43_webhook_received_at = \ process_callback_job(prefix, queued_json_payload, current_job.connection) except Exception as e: # Catch most exceptions here so we can log them to CloudWatch prefixed_name = f"{prefix}Door43_Callback" AppSettings.logger.critical(f"{prefixed_name} threw an exception while processing: {queued_json_payload}") AppSettings.logger.critical(f"{e}: {traceback.format_exc()}") AppSettings.close_logger() # Ensure queued logs are uploaded to AWS CloudWatch # Now attempt to log it to an additional, separate FAILED log import logging from boto3 import Session from watchtower import CloudWatchLogHandler logger2 = logging.getLogger(prefixed_name) test_mode_flag = os.getenv('TEST_MODE', '') travis_flag = os.getenv('TRAVIS_BRANCH', '') log_group_name = f"FAILED_{'' if test_mode_flag or travis_flag else prefix}tX" \ f"{'_DEBUG' if debug_mode_flag else ''}" \ f"{'_TEST' if test_mode_flag else ''}" \ f"{'_TravisCI' if travis_flag else ''}" aws_access_key_id = os.environ['AWS_ACCESS_KEY_ID'] boto3_session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=os.environ['AWS_SECRET_ACCESS_KEY'], region_name='us-west-2') failure_watchtower_log_handler = CloudWatchLogHandler(boto3_session=boto3_session, use_queues=False, log_group=log_group_name, stream_name=prefixed_name) logger2.addHandler(failure_watchtower_log_handler) logger2.setLevel(logging.DEBUG) logger2.info(f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'.") logger2.critical(f"{prefixed_name} threw an exception while processing: {queued_json_payload}") logger2.critical(f"{e}: {traceback.format_exc()}") failure_watchtower_log_handler.close() # NOTE: following line removed as stats recording used too much disk space # stats_client.gauge(user_projects_invoked_string, 1) # Mark as 'failed' stats_client.gauge(project_types_invoked_string, 1) # Mark as 'failed' raise e # We raise the exception again so it goes into the failed queue elapsed_milliseconds = round((time.time() - start_time) * 1000) stats_client.timing(f'{callback_stats_prefix}.job.duration', elapsed_milliseconds) if elapsed_milliseconds < 2000: AppSettings.logger.info(f"{prefix}Door43 callback handling for {job_descriptive_name} completed in {elapsed_milliseconds:,} milliseconds.") else: AppSettings.logger.info(f"{prefix}Door43 callback handling for {job_descriptive_name} completed in {round(time.time() - start_time)} seconds.") # Calculate total elapsed time for the job total_elapsed_time = datetime.utcnow() - \ datetime.strptime(door43_webhook_received_at, '%Y-%m-%dT%H:%M:%SZ') AppSettings.logger.info(f"{prefix}Door43 total job for {job_descriptive_name} completed in {round(total_elapsed_time.total_seconds())} seconds.") stats_client.timing(f'{job_handler_stats_prefix}.total.job.duration', round(total_elapsed_time.total_seconds() * 1000)) # NOTE: following line removed as stats recording used too much disk space # stats_client.gauge(user_projects_invoked_string, 0) # Mark as 'succeeded' stats_client.gauge(project_types_invoked_string, 0) # Mark as 'succeeded' stats_client.incr(f'{callback_stats_prefix}.jobs.succeeded') AppSettings.close_logger() # Ensure queued logs are uploaded to AWS CloudWatch
def job(queued_json_payload: Dict[str, Any]) -> None: """ This function is called by the rq package to process a job in the queue(s). (Don't rename this function.) The job is removed from the queue before the job is started, but if the job throws an exception or times out (timeout specified in enqueue process) then the job gets added to the 'failed' queue. """ logger.info(MY_NAME_VERSION_STRING) logger.debug("tX PDF JobHandler received a job" + (" (in debug mode)" if debug_mode_flag else "")) start_time = time() stats_client.incr(f'{job_handler_stats_prefix}.jobs.OBSPDF.attempted') logger.info(f"Clearing /tmp folder…") empty_folder( '/tmp/', only_prefix='tX_') # Stops failed jobs from accumulating in /tmp # logger.info(f"Updating queue statistics…") our_queue = Queue(webhook_queue_name, connection=get_current_job().connection) len_our_queue = len(our_queue) # Should normally sit at zero here # logger.debug(f"Queue '{webhook_queue_name}' length={len_our_queue}") stats_client.gauge( f'{tx_stats_prefix}.enqueue-job.queue.OBSPDF.length.current', len_our_queue) logger.info( f"Updated stats for '{tx_stats_prefix}.enqueue-job.queue.OBSPDF.length.current' to {len_our_queue}" ) # Save some stats stats_client.incr( f"{job_handler_stats_prefix}.jobs.OBSPDF.input.{queued_json_payload['input_format']}" ) stats_client.incr( f"{job_handler_stats_prefix}.jobs.OBSPDF.subject.{queued_json_payload['resource_type']}" ) try: job_descriptive_name = process_PDF_job(prefix, queued_json_payload) except Exception as e: # Catch most exceptions here so we can log them to CloudWatch prefixed_name = f"{prefix}tX_PDF_Job_Handler" logger.critical( f"{prefixed_name} threw an exception while processing: {queued_json_payload}" ) logger.critical(f"{e}: {traceback.format_exc()}") main_watchtower_log_handler.close( ) # Ensure queued logs are uploaded to AWS CloudWatch # Now attempt to log it to an additional, separate FAILED log logger2 = logging.getLogger(prefixed_name) log_group_name = f"FAILED_{'' if test_mode_flag or travis_flag else prefix}tX" \ f"{'_DEBUG' if debug_mode_flag else ''}" \ f"{'_TEST' if test_mode_flag else ''}" \ f"{'_TravisCI' if travis_flag else ''}" boto3_session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name='us-west-2') failure_watchtower_log_handler = CloudWatchLogHandler( boto3_session=boto3_session, use_queues=False, log_group=log_group_name, stream_name=prefixed_name) logger2.addHandler(failure_watchtower_log_handler) logger2.setLevel(logging.DEBUG) logger2.info( f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'." ) logger2.critical( f"{prefixed_name} threw an exception while processing: {queued_json_payload}" ) logger2.critical(f"{e}: {traceback.format_exc()}") failure_watchtower_log_handler.close() raise e # We raise the exception again so it goes into the failed queue elapsed_milliseconds = round((time() - start_time) * 1000) stats_client.timing(f'{job_handler_stats_prefix}.job.OBSPDF.duration', elapsed_milliseconds) if elapsed_milliseconds < 2000: logger.info( f"{prefix}tX job handling for {job_descriptive_name} PDF completed in {elapsed_milliseconds:,} milliseconds." ) else: logger.info( f"{prefix}tX job handling for {job_descriptive_name} PDF completed in {round(time() - start_time)} seconds." ) stats_client.incr(f'{job_handler_stats_prefix}.jobs.OBSPDF.completed') main_watchtower_log_handler.close( ) # Ensure queued logs are uploaded to AWS CloudWatch
test_mode_flag = os.getenv('TEST_MODE', '') travis_flag = os.getenv('TRAVIS_BRANCH', '') log_group_name = f"{'' if test_mode_flag or travis_flag else prefix}tX" \ f"{'_DEBUG' if debug_mode_flag else ''}" \ f"{'_TEST' if test_mode_flag else ''}" \ f"{'_TravisCI' if travis_flag else ''}" # Setup logging logger = logging.getLogger(job_handler_stats_prefix) boto3_session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key, region_name=AWS_REGION_NAME) main_watchtower_log_handler = CloudWatchLogHandler( boto3_session=boto3_session, # use_queues=False, # Because this forked process is quite transient log_group=log_group_name, stream_name='tX-PDF-Job-Handler') logger.addHandler(main_watchtower_log_handler) logger.setLevel(logging.DEBUG if debug_mode_flag else logging.INFO) # Change these loggers to only report errors: logging.getLogger('boto3').setLevel(logging.ERROR) logging.getLogger('botocore').setLevel(logging.ERROR) logger.debug( f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'." ) # Get the Graphite URL from the environment, otherwise use a local test instance graphite_url = os.getenv('GRAPHITE_HOSTNAME', 'localhost') stats_client = StatsClient(host=graphite_url, port=8125)
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds(element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() # Dump structure/candidate data with open('candidate_data.pickle', 'wb') as f: pickle.dump(candidate_data, f) s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') logger.info("Candidates generated") # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000) seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)
def from_chemsys(cls, chemsys, prefix="proto-dft-2/runs", n_max_atoms=20, agent=None, analyzer=None, experiment=None, log_file="campaign.log", cloudwatch_group="/camd/worker/dev/"): """ Class factory method for constructing campaign from chemsys. Args: chemsys (str): chemical system for the campaign prefix (str): prefix for s3 n_max_atoms (int): number of maximum atoms agent (Agent): agent for stability campaign analyzer (Analyzer): analyzer for stability campaign experiment (Agent): experiment for stability campaign log_file (str): log filename cloudwatch_group (str): cloudwatch group to log to Returns: (ProtoDFTCampaign): Standard proto-dft campaign from the chemical system """ logger = logging.Logger("camd") logger.setLevel("INFO") file_handler = logging.FileHandler(log_file) cw_handler = CloudWatchLogHandler(log_group=cloudwatch_group, stream_name=chemsys) logger.addHandler(file_handler) logger.addHandler(cw_handler) logger.addHandler(logging.StreamHandler()) logger.info( "Starting campaign factory from_chemsys {}".format(chemsys)) s3_prefix = "{}/{}".format(prefix, chemsys) # Initialize s3 dumpfn({ "started": datetime.now().isoformat(), "version": __version__ }, "start.json") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Get structure domain # Check cache cache_key = "protosearch_cache/v1/{}/{}/candidates.pickle".format( chemsys, n_max_atoms) # TODO: create test of isfile if s3_key_exists(bucket=CAMD_S3_BUCKET, key=cache_key): logger.info("Found cached protosearch domain.") candidate_data = pd.read_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Loaded cached {}.".format(cache_key)) else: logger.info( "Generating domain with max {} atoms.".format(n_max_atoms)) element_list = chemsys.split('-') max_coeff, charge_balanced = heuristic_setup(element_list) domain = StructureDomain.from_bounds( element_list, charge_balanced=charge_balanced, n_max_atoms=n_max_atoms, **{'grid': range(1, max_coeff)}) candidate_data = domain.candidates() logger.info("Candidates generated") candidate_data.to_pickle("s3://{}/{}".format( CAMD_S3_BUCKET, cache_key)) logger.info("Cached protosearch domain at {}.".format(cache_key)) # Dump structure/candidate data candidate_data.to_pickle("candidate_data.pickle") s3_sync(s3_bucket=CAMD_S3_BUCKET, s3_prefix=s3_prefix, sync_path='.') # Set up agents and loop parameters agent = agent or AgentStabilityAdaBoost( model=MLPRegressor(hidden_layer_sizes=(84, 50)), n_query=10, hull_distance=0.2, exploit_fraction=1.0, uncertainty=True, alpha=0.5, diversify=True, n_estimators=20) analyzer = analyzer or StabilityAnalyzer(hull_distance=0.2) experiment = experiment or OqmdDFTonMC1(timeout=30000, prefix_append="proto-dft") seed_data = load_dataframe("oqmd1.2_exp_based_entries_featurized_v2") # Load cached experiments logger.info("Loading cached experiments") cached_experiments = experiment.fetch_cached(candidate_data) logger.info("Found {} experiments.".format(len(cached_experiments))) if len(cached_experiments) > 0: summary, seed_data = analyzer.analyze(cached_experiments, seed_data) # Remove cached experiments from candidate_data candidate_space = candidate_data.index.difference( cached_experiments.index, sort=False).tolist() candidate_data = candidate_data.loc[candidate_space] logger.info("Cached experiments added to seed.") # Construct and start loop return cls(candidate_data=candidate_data, agent=agent, experiment=experiment, analyzer=analyzer, seed_data=seed_data, heuristic_stopper=5, s3_prefix=s3_prefix, logger=logger)
logger = logging.getLogger(prefixed_our_name) sh = logging.StreamHandler(sys.stdout) sh.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s: %(message)s')) logger.addHandler(sh) aws_access_key_id = environ['AWS_ACCESS_KEY_ID'] boto3_session = Session(aws_access_key_id=aws_access_key_id, aws_secret_access_key=environ['AWS_SECRET_ACCESS_KEY'], region_name='us-west-2') test_mode_flag = getenv('TEST_MODE', '') travis_flag = getenv('TRAVIS_BRANCH', '') log_group_name = f"{'' if test_mode_flag or travis_flag else prefix}tX" \ f"{'_DEBUG' if debug_mode_flag else ''}" \ f"{'_TEST' if test_mode_flag else ''}" \ f"{'_TravisCI' if travis_flag else ''}" watchtower_log_handler = CloudWatchLogHandler(boto3_session=boto3_session, log_group=log_group_name, stream_name=prefixed_our_name) logger.addHandler(watchtower_log_handler) # Enable DEBUG logging for dev- instances (but less logging for production) logger.setLevel(logging.DEBUG if prefix else logging.INFO) logger.debug( f"Logging to AWS CloudWatch group '{log_group_name}' using key '…{aws_access_key_id[-2:]}'." ) # Setup queue variables QUEUE_NAME_SUFFIX = '' # Used to switch to a different queue, e.g., '_1' if prefix not in ('', DEV_PREFIX): logger.critical( f"Unexpected prefix: '{prefix}' — expected '' or '{DEV_PREFIX}'") if prefix: # don't use production queue our_adjusted_webhook_queue_name = prefixed_our_name + QUEUE_NAME_SUFFIX # Will become our main queue name