def check_existence(self): """ Check for the existence of the corresponding data and config in s3. """ exists_errmsg = " [Internal (check_existence)] INPUT ERROR: S3 Bucket does not contain {}" if type(self.data_name) is str: check_data_exists = utilsparams3.exists(self.bucket_name,self.data_name) elif type(self.data_name) is list: check_data_exists = all([utilsparams3.exists(self.bucket_name,name) for name in self.data_name]) else: raise TypeError("[JOB TERMINATE REASON] 'dataname' field is not the right type. Should be string or list.") if not check_data_exists: msg = exists_errmsg.format(self.data_name) self.logger.append(msg) self.logger.printlatest() self.logger.write() raise ValueError("[JOB TERMINATE REASON] 'dataname' field refers to data that cannot be found. Be sure this is a full path to the data, without the bucket name.") elif not utilsparams3.exists(self.bucket_name,self.config_name): msg = exists_errmsg.format(self.config_name) self.logger.append(msg) self.logger.printlatest() self.logger.write() raise ValueError("[JOB TERMINATE REASON] 'configname' field refers to a configuration file that cannot be found. Be sure this is a fill path to the data, without the bucket name.") ########################### ## Now get the actual paths to relevant data from the foldername: if type(self.data_name) is str: self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) elif type(self.data_name) is list: self.filenames = self.data_name assert len(self.filenames) > 0, "[JOB TERMINATE REASON] The folder indicated is empty, or does not contain analyzable data."
def monitor_updater(event,context): """ Newest version of events monitoring that updates pre-existing logs. """ ## 1. First, find the instance id. ## 2. Go find the appropriate log folder in the bucket [bucket available through os. ] ## 3. Now figure out if this is an "running" or "shutting-down" statechange. " ## 4. accordingly, either update the log [running] or update the log and move it to the appropriate folder [given by the log contents.] ## Include exception handling for the case where one of the fields is not completed. try: print(event) print(context) time = event['time'] instanceid = event['detail']['instance-id'] logname = "{}.json".format(instanceid) statechange = event['detail']['state'] bucket_name = os.environ["BUCKET_NAME"] if statechange in ["running","shutting-down"]: log = utilsparams3.update_monitorlog(bucket_name,logname,statechange,time) if type(log["datapath"]) is str: path_to_data = log["datapath"] elif type(log["datapath"]) is list: assert type(log["datapath"][0]) == str path_to_data = log["datapath"][0] else: print("datapath type unsupported, exiting.") jobname = os.path.basename(log["jobpath"]).replace(":","_") ## Monitoring names cannot have print(path_to_data,"path_to_data") groupname = re.findall('.+?(?=/'+os.environ["INDIR"]+')',path_to_data)[0] ## Log name for the group that make this job: current_job_log = os.path.join("logs","active",logname) completed_job_log = os.path.join("logs",groupname,logname) if statechange == "shutting-down": utilsparams3.mv(bucket_name,current_job_log,completed_job_log) timepkg.sleep(5) ## Now check if we can delete this rule and send the end signal to the user: rulename = "Monitor{}".format(jobname) instances_under_rule = utilsparamevents.get_monitored_instances(rulename) condition = [utilsparams3.exists(bucket_name,os.path.join("logs","active","{}.json".format(inst))) for inst in instances_under_rule] ## Delete the rule if not any(condition): ## get the target: response = utilsparamevents.full_delete_rule(rulename) terminated = utilsparams3.write_endfile(log["databucket"],log["jobpath"]) else: pass else: print("unhandled state change. quitting") raise ValueError("statechange {} not expected".format(statechange)) exitcode = 0 except Exception as e: print("error: {}".format(e)) exitcode = 99 return exitcode
def __init__(self, bucket_name, key, time): ## Initialize as before: # Get Upload Location Information self.bucket_name = bucket_name ## Get directory above the input directory. self.path = re.findall('.+?(?=/' + os.environ["SUBMITDIR"] + ')', key)[0] ## Now add in the time parameter: self.time = time ## We will index by the submit file name prefix if it exists: submit_search = re.findall('.+?(?=/submit.json)', os.path.basename(key)) try: submit_name = submit_search[0] except IndexError as e: ## If the filename is just "submit.json, we just don't append anything to the job name. " submit_name = "" #### Parse submit file submit_file = utilsparams3.load_json(bucket_name, key) ## Machine formatted fields (error only available in lambda) ## These next three fields check that the submit file is correctly formatted try: self.timestamp = submit_file["timestamp"] ## KEY: Now set up logging in the input folder too: except KeyError as ke: ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing timestamp when data was uploaded.") ## Initialize s3 directory for this job. self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name, self.timestamp) jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname) self.jobpath = jobpath ## And create a corresponding directory in the submit area. create_jobdir = utilsparams3.mkdir( self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']), self.jobname) ## Create a logging object and write to it. ## a logger for the submit area. self.logger = utilsparams3.JobLogger_demo(self.bucket_name, self.jobpath) self.logger.append("Unique analysis version id: {}".format( os.environ['versionid'].split("\n")[0])) self.logger.append("Initializing analysis.") self.logger.write() ######################## ## Now parse the rest of the file. try: self.instance_type = submit_file[ 'instance_type'] # TODO default option from config except KeyError as ke: msg = "Using default instance type {} from config file".format( os.environ["INSTANCE_TYPE"]) self.instance_type = os.environ["INSTANCE_TYPE"] # Log this message self.logger.append(msg) self.logger.write() ## Check that we have a dataname field: submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data." try: self.data_name = submit_file[ 'dataname'] # TODO validate extensions except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing data name to analyze") try: self.config_name = submit_file["configname"] self.logger.assign_config(self.config_name) except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError(os.environ["MISSING_CONFIG_ERROR"]) self.logger.append( "Analysis request detected with dataset(s): {}, config file {}. Reading analysis blueprint." .format(self.data_name, self.config_name)) self.logger.write() ########################## ## Check for the existence of the corresponding data and config in s3. ## Check that we have the actual data in the bucket. exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}" if type(self.data_name) is str: check_data_exists = utilsparams3.exists(self.bucket_name, self.data_name) elif type(self.data_name) is list: check_data_exists = all([ utilsparams3.exists(self.bucket_name, name) for name in self.data_name ]) else: raise TypeError("dataname should be string or list.") if not check_data_exists: msg = exists_errmsg.format(self.data_name) self.logger.append(msg) self.logger.write() raise ValueError("dataname given does not exist in bucket.") elif not utilsparams3.exists(self.bucket_name, self.config_name): msg = exists_errmsg.format(self.config_name) self.logger.append(msg) self.logger.write() raise ValueError("configname given does not exist in bucket.") ########################### ## Now get the actual paths to relevant data from the foldername: if type(self.data_name) is str: self.filenames = utilsparams3.extract_files(self.bucket_name, self.data_name, ext=None) elif type(self.data_name) is list: self.filenames = self.data_name assert len(self.filenames) > 0, "we must have data to analyze."
def __init__(self, bucket_name, key, time): #### Declare basic parameters: # Get Upload Location Information self.bucket_name = bucket_name ## Important paths: ## Get directory above the input directory where the job was submitted. self.path = re.findall('.+?(?=/' + os.environ["INDIR"] + ')', key)[0] ## The other important directory is the actual base directory of the input bucket itself. ## Now add in the time parameter: self.time = time #### Set up basic logging so we can get a trace when errors happen. ## We will index by the submit file name prefix if it exists: submit_search = re.findall('.+?(?=/submit.json)', os.path.basename(key)) try: submit_name = submit_search[0] except IndexError as e: ## If the filename is just "submit.json, we just don't append anything to the job name. " submit_name = "" #### Parse submit file submit_file = utilsparams3.load_json(bucket_name, key) ## These next three fields check that the submit file is correctly formatted try: self.timestamp = submit_file["timestamp"] ## KEY: Now set up logging in the input folder too: except KeyError as ke: ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing timestamp when data was uploaded.") ## Now we're going to get the path to the results directory in the submit folder: self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name, self.timestamp) jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname) self.jobpath_submit = jobpath ## And create a corresponding directory in the submit area. create_jobdir = utilsparams3.mkdir( self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']), self.jobname) ## a logger for the submit area. self.logger = utilsparams3.JobLogger_demo(self.bucket_name, self.jobpath) self.logger.append( "Initializing EPI analysis: Parameter search for 2D LDS.") self.logger.write() try: self.instance_type = submit_file[ 'instance_type'] # TODO default option from config except KeyError as ke: msg = "Using default instance type {} from config file".format( os.environ["INSTANCE_TYPE"]) self.instance_type = os.environ["INSTANCE_TYPE"] ## Check that we have a dataname field: submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data." try: self.input_bucket_name = submit_file["bucketname"] ## KEY: Now set up logging in the input folder too: self.inputlogger = utilsparams3.JobLogger( self.input_bucket_name, os.path.join(os.environ['OUTDIR'], self.jobname) ) ##TODO: this relies upon "OUTDIR" being the same in the submit and input buckets. Make sure to alter this later. except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing bucket name where data is located.") try: self.data_name = submit_file[ 'dataname'] # TODO validate extensions except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() self.inputlogger.append(submit_errmsg.format(ke)) self.inputlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing data name to analyze") try: self.config_name = submit_file["configname"] self.submitlogger.assign_config(self.config_name) except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() self.inputlogger.append(submit_errmsg.format(ke)) self.inputlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError(os.environ["MISSING_CONFIG_ERROR"]) ## Check that we have the actual data in the bucket. exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}" if not utilsparams3.exists(self.input_bucket_name, self.data_name): msg = exists_errmsg.format(self.data_name) self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() raise ValueError("dataname given does not exist in bucket.") elif not utilsparams3.exists(self.input_bucket_name, self.config_name): msg = exists_errmsg.format(self.config_name) self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() raise ValueError("configname given does not exist in bucket.") ## Check what instance we should use. try: self.instance_type = submit_file['instance_type'] except KeyError as ke: msg = "Instance type {} does not exist, using default from config file".format( ke) self.instance_type = os.environ["INSTANCE_TYPE"] ## Log this message. self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() ########################### ## Now get the actual paths to relevant data from the foldername: self.filenames = utilsparams3.extract_files(self.input_bucket_name, self.data_name, ext=None) assert len(self.filenames) > 0, "we must have data to analyze."
def __init__(self,bucket_name,key,time): ## Initialize as before: # Get Upload Location Information self.bucket_name = bucket_name ## Get directory above the input directory. self.path = re.findall('.+?(?=/'+os.environ["INDIR"]+')',key)[0] ## Now add in the time parameter: self.time = time ## We will index by the submit file name prefix if it exists: submit_search = re.findall('.+?(?=/submit.json)',os.path.basename(key)) try: submit_name = submit_search[0] except IndexError as e: ## If the filename is just "submit.json, we just don't append anything to the job name. " submit_name = "" ## Now we're going to get the path to the results directory: self.jobname = "job"+submit_name+self.time jobpath = os.path.join(self.path,os.environ['OUTDIR'],self.jobname) self.jobpath = jobpath create_jobdir = utilsparams3.mkdir(self.bucket_name, os.path.join(self.path,os.environ['OUTDIR']),self.jobname) print(self.path,'path') self.logger = utilsparams3.JobLogger(self.bucket_name, self.jobpath) #self.out_path = utilsparams3.mkdir(self.bucket_name, self.path, config.OUTDIR) #self.in_path = utilsparams3.mkdir(self.bucket_name, self.path, config.INDIR) # Load Content Of Submit File submit_file = utilsparams3.load_json(bucket_name, key) ## Check what instance we should use. try: self.instance_type = submit_file['instance_type'] # TODO default option from config except KeyError as ke: msg = "Instance type {} does not exist, using default from config file".format(ke) self.instance_type = os.environ["INSTANCE_TYPE"] ## Log this message. self.logger.append(msg) self.logger.write() ## These next two check that the submit file is correctly formatted ## Check that we have a dataname field: submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data." try: self.data_name = submit_file['dataname'] # TODO validate extensions except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing data name to analyze") try: self.config_name = submit_file["configname"] self.logger.assign_config(self.config_name) except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError(os.environ["MISSING_CONFIG_ERROR"]) ## Check that we have the actual data in the bucket. exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}" if not utilsparams3.exists(self.bucket_name,self.data_name): msg = exists_errmsg.format(self.data_name) self.logger.append(msg) self.logger.write() raise ValueError("dataname given does not exist in bucket.") elif not utilsparams3.exists(self.bucket_name,self.config_name): msg = exists_errmsg.format(self.config_name) self.logger.append(msg) self.logger.write() raise ValueError("configname given does not exist in bucket.") ########################### ## Now get the actual paths to relevant data from the foldername: self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) assert len(self.filenames) > 0, "we must have data to analyze."