def __init__(self, bucket_name, key, time): #### Declare basic parameters: # Get Upload Location Information self.bucket_name = bucket_name ## Important paths: ## Get directory above the input directory where the job was submitted. self.path = re.findall('.+?(?=/' + os.environ["INDIR"] + ')', key)[0] ## The other important directory is the actual base directory of the input bucket itself. ## Now add in the time parameter: self.time = time #### Set up basic logging so we can get a trace when errors happen. ## We will index by the submit file name prefix if it exists: submit_search = re.findall('.+?(?=/submit.json)', os.path.basename(key)) try: submit_name = submit_search[0] except IndexError as e: ## If the filename is just "submit.json, we just don't append anything to the job name. " submit_name = "" #### Parse submit file submit_file = utilsparams3.load_json(bucket_name, key) ## These next three fields check that the submit file is correctly formatted try: self.timestamp = submit_file["timestamp"] ## KEY: Now set up logging in the input folder too: except KeyError as ke: ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing timestamp when data was uploaded.") ## Now we're going to get the path to the results directory in the submit folder: self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name, self.timestamp) jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname) self.jobpath_submit = jobpath ## And create a corresponding directory in the submit area. create_jobdir = utilsparams3.mkdir( self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']), self.jobname) ## a logger for the submit area. self.logger = utilsparams3.JobLogger_demo(self.bucket_name, self.jobpath) self.logger.append( "Initializing EPI analysis: Parameter search for 2D LDS.") self.logger.write() try: self.instance_type = submit_file[ 'instance_type'] # TODO default option from config except KeyError as ke: msg = "Using default instance type {} from config file".format( os.environ["INSTANCE_TYPE"]) self.instance_type = os.environ["INSTANCE_TYPE"] ## Check that we have a dataname field: submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data." try: self.input_bucket_name = submit_file["bucketname"] ## KEY: Now set up logging in the input folder too: self.inputlogger = utilsparams3.JobLogger( self.input_bucket_name, os.path.join(os.environ['OUTDIR'], self.jobname) ) ##TODO: this relies upon "OUTDIR" being the same in the submit and input buckets. Make sure to alter this later. except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing bucket name where data is located.") try: self.data_name = submit_file[ 'dataname'] # TODO validate extensions except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() self.inputlogger.append(submit_errmsg.format(ke)) self.inputlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing data name to analyze") try: self.config_name = submit_file["configname"] self.submitlogger.assign_config(self.config_name) except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.submitlogger.append(submit_errmsg.format(ke)) self.submitlogger.write() self.inputlogger.append(submit_errmsg.format(ke)) self.inputlogger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError(os.environ["MISSING_CONFIG_ERROR"]) ## Check that we have the actual data in the bucket. exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}" if not utilsparams3.exists(self.input_bucket_name, self.data_name): msg = exists_errmsg.format(self.data_name) self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() raise ValueError("dataname given does not exist in bucket.") elif not utilsparams3.exists(self.input_bucket_name, self.config_name): msg = exists_errmsg.format(self.config_name) self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() raise ValueError("configname given does not exist in bucket.") ## Check what instance we should use. try: self.instance_type = submit_file['instance_type'] except KeyError as ke: msg = "Instance type {} does not exist, using default from config file".format( ke) self.instance_type = os.environ["INSTANCE_TYPE"] ## Log this message. self.submitlogger.append(msg) self.submitlogger.write() self.inputlogger.append(msg) self.inputlogger.write() ########################### ## Now get the actual paths to relevant data from the foldername: self.filenames = utilsparams3.extract_files(self.input_bucket_name, self.data_name, ext=None) assert len(self.filenames) > 0, "we must have data to analyze."
def __init__(self,bucket_name,key,time): ## Initialize as before: # Get Upload Location Information self.bucket_name = bucket_name ## Get directory above the input directory. self.path = re.findall('.+?(?=/'+os.environ["INDIR"]+')',key)[0] ## Now add in the time parameter: self.time = time ## We will index by the submit file name prefix if it exists: submit_search = re.findall('.+?(?=/submit.json)',os.path.basename(key)) try: submit_name = submit_search[0] except IndexError as e: ## If the filename is just "submit.json, we just don't append anything to the job name. " submit_name = "" ## Now we're going to get the path to the results directory: self.jobname = "job"+submit_name+self.time jobpath = os.path.join(self.path,os.environ['OUTDIR'],self.jobname) self.jobpath = jobpath create_jobdir = utilsparams3.mkdir(self.bucket_name, os.path.join(self.path,os.environ['OUTDIR']),self.jobname) print(self.path,'path') self.logger = utilsparams3.JobLogger(self.bucket_name, self.jobpath) #self.out_path = utilsparams3.mkdir(self.bucket_name, self.path, config.OUTDIR) #self.in_path = utilsparams3.mkdir(self.bucket_name, self.path, config.INDIR) # Load Content Of Submit File submit_file = utilsparams3.load_json(bucket_name, key) ## Check what instance we should use. try: self.instance_type = submit_file['instance_type'] # TODO default option from config except KeyError as ke: msg = "Instance type {} does not exist, using default from config file".format(ke) self.instance_type = os.environ["INSTANCE_TYPE"] ## Log this message. self.logger.append(msg) self.logger.write() ## These next two check that the submit file is correctly formatted ## Check that we have a dataname field: submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data." try: self.data_name = submit_file['dataname'] # TODO validate extensions except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError("Missing data name to analyze") try: self.config_name = submit_file["configname"] self.logger.assign_config(self.config_name) except KeyError as ke: print(submit_errmsg.format(ke)) ## Write to logger self.logger.append(submit_errmsg.format(ke)) self.logger.write() ## Now raise an exception to halt processing, because this is a catastrophic error. raise ValueError(os.environ["MISSING_CONFIG_ERROR"]) ## Check that we have the actual data in the bucket. exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}" if not utilsparams3.exists(self.bucket_name,self.data_name): msg = exists_errmsg.format(self.data_name) self.logger.append(msg) self.logger.write() raise ValueError("dataname given does not exist in bucket.") elif not utilsparams3.exists(self.bucket_name,self.config_name): msg = exists_errmsg.format(self.config_name) self.logger.append(msg) self.logger.write() raise ValueError("configname given does not exist in bucket.") ########################### ## Now get the actual paths to relevant data from the foldername: self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) assert len(self.filenames) > 0, "we must have data to analyze."