Example #1
0
    def check_existence(self):
        """
        Check for the existence of the corresponding data and config in s3. 
        """
        exists_errmsg = "        [Internal (check_existence)] INPUT ERROR: S3 Bucket does not contain {}"
        if type(self.data_name) is str:
            check_data_exists = utilsparams3.exists(self.bucket_name,self.data_name)
        elif type(self.data_name) is list:
            check_data_exists = all([utilsparams3.exists(self.bucket_name,name) for name in self.data_name])
        else:
            raise TypeError("[JOB TERMINATE REASON] 'dataname' field is not the right type. Should be string or list.")

        if not check_data_exists: 
            msg = exists_errmsg.format(self.data_name)
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            raise ValueError("[JOB TERMINATE REASON] 'dataname' field refers to data that cannot be found. Be sure this is a full path to the data, without the bucket name.")
        elif not utilsparams3.exists(self.bucket_name,self.config_name): 
            msg = exists_errmsg.format(self.config_name)
            self.logger.append(msg)
            self.logger.printlatest()
            self.logger.write()
            raise ValueError("[JOB TERMINATE REASON] 'configname' field refers to a configuration file that cannot be found. Be sure this is a fill path to the data, without the bucket name.")
        ###########################

        ## Now get the actual paths to relevant data from the foldername: 
        if type(self.data_name) is str:
            self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) 
        elif type(self.data_name) is list:
            self.filenames = self.data_name
        assert len(self.filenames) > 0, "[JOB TERMINATE REASON] The folder indicated is empty, or does not contain analyzable data."
Example #2
0
def monitor_updater(event,context):
    """
    Newest version of events monitoring that updates pre-existing logs. 

    """
    ## 1. First, find the instance id. 
    ## 2. Go find the appropriate log folder in the bucket [bucket available through os. ]
    ## 3. Now figure out if this is an "running" or "shutting-down" statechange. "
    ## 4. accordingly, either update the log [running] or update the log and move it to the appropriate folder [given by the log contents.]
    ## Include exception handling for the case where one of the fields is not completed. 
    try: 
        print(event)
        print(context)
        time = event['time']
        instanceid = event['detail']['instance-id']
        logname = "{}.json".format(instanceid)
        statechange = event['detail']['state']
        bucket_name = os.environ["BUCKET_NAME"]
        
        if statechange in ["running","shutting-down"]:
            log = utilsparams3.update_monitorlog(bucket_name,logname,statechange,time) 
            if type(log["datapath"]) is str:
                path_to_data = log["datapath"]
            elif type(log["datapath"]) is list:
                assert type(log["datapath"][0]) == str
                path_to_data = log["datapath"][0]
            else:
                print("datapath type unsupported, exiting.")
            jobname = os.path.basename(log["jobpath"]).replace(":","_") ## Monitoring names cannot have 

            print(path_to_data,"path_to_data")
            groupname = re.findall('.+?(?=/'+os.environ["INDIR"]+')',path_to_data)[0]
            ## Log name for the group that make this job: 
            current_job_log = os.path.join("logs","active",logname)
            completed_job_log =  os.path.join("logs",groupname,logname)
            if statechange == "shutting-down":
                utilsparams3.mv(bucket_name,current_job_log,completed_job_log)
                timepkg.sleep(5)
                ## Now check if we can delete this rule and send the end signal to the user:
                rulename = "Monitor{}".format(jobname)
                instances_under_rule = utilsparamevents.get_monitored_instances(rulename) 
                condition = [utilsparams3.exists(bucket_name,os.path.join("logs","active","{}.json".format(inst))) for inst in instances_under_rule]
                ## Delete the rule
                if not any(condition):
                    ## get the target:
                    response = utilsparamevents.full_delete_rule(rulename)
                    terminated = utilsparams3.write_endfile(log["databucket"],log["jobpath"])
                    
                else:
                    pass

        else:
            print("unhandled state change. quitting")
            raise ValueError("statechange {} not expected".format(statechange))
        exitcode = 0 
    except Exception as e:
        print("error: {}".format(e))
        exitcode = 99 
    return exitcode
Example #3
0
    def __init__(self, bucket_name, key, time):
        ## Initialize as before:
        # Get Upload Location Information
        self.bucket_name = bucket_name
        ## Get directory above the input directory.
        self.path = re.findall('.+?(?=/' + os.environ["SUBMITDIR"] + ')',
                               key)[0]
        ## Now add in the time parameter:
        self.time = time
        ## We will index by the submit file name prefix if it exists:
        submit_search = re.findall('.+?(?=/submit.json)',
                                   os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""

        #### Parse submit file
        submit_file = utilsparams3.load_json(bucket_name, key)

        ## Machine formatted fields (error only available in lambda)
        ## These next three fields check that the submit file is correctly formatted
        try:
            self.timestamp = submit_file["timestamp"]
            ## KEY: Now set up logging in the input folder too:
        except KeyError as ke:
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing timestamp when data was uploaded.")

        ## Initialize s3 directory for this job.
        self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name,
                                             self.timestamp)
        jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname)
        self.jobpath = jobpath
        ## And create a corresponding directory in the submit area.
        create_jobdir = utilsparams3.mkdir(
            self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']),
            self.jobname)

        ## Create a logging object and write to it.
        ## a logger for the submit area.
        self.logger = utilsparams3.JobLogger_demo(self.bucket_name,
                                                  self.jobpath)
        self.logger.append("Unique analysis version id: {}".format(
            os.environ['versionid'].split("\n")[0]))
        self.logger.append("Initializing analysis.")
        self.logger.write()
        ########################
        ## Now parse the rest of the file.
        try:
            self.instance_type = submit_file[
                'instance_type']  # TODO default option from config
        except KeyError as ke:
            msg = "Using default instance type {} from config file".format(
                os.environ["INSTANCE_TYPE"])
            self.instance_type = os.environ["INSTANCE_TYPE"]
            # Log this message
            self.logger.append(msg)
            self.logger.write()

        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try:
            self.data_name = submit_file[
                'dataname']  # TODO validate extensions
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"]
            self.logger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        self.logger.append(
            "Analysis request detected with dataset(s): {}, config file {}. Reading analysis blueprint."
            .format(self.data_name, self.config_name))

        self.logger.write()
        ##########################
        ## Check for the existence of the corresponding data and config in s3.
        ## Check that we have the actual data in the bucket.
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if type(self.data_name) is str:
            check_data_exists = utilsparams3.exists(self.bucket_name,
                                                    self.data_name)
        elif type(self.data_name) is list:
            check_data_exists = all([
                utilsparams3.exists(self.bucket_name, name)
                for name in self.data_name
            ])
        else:
            raise TypeError("dataname should be string or list.")

        if not check_data_exists:
            msg = exists_errmsg.format(self.data_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.bucket_name, self.config_name):
            msg = exists_errmsg.format(self.config_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("configname given does not exist in bucket.")
        ###########################

        ## Now get the actual paths to relevant data from the foldername:
        if type(self.data_name) is str:
            self.filenames = utilsparams3.extract_files(self.bucket_name,
                                                        self.data_name,
                                                        ext=None)
        elif type(self.data_name) is list:
            self.filenames = self.data_name
        assert len(self.filenames) > 0, "we must have data to analyze."
Example #4
0
    def __init__(self, bucket_name, key, time):
        #### Declare basic parameters:
        # Get Upload Location Information
        self.bucket_name = bucket_name

        ## Important paths:
        ## Get directory above the input directory where the job was submitted.
        self.path = re.findall('.+?(?=/' + os.environ["INDIR"] + ')', key)[0]
        ## The other important directory is the actual base directory of the input bucket itself.

        ## Now add in the time parameter:
        self.time = time

        #### Set up basic logging so we can get a trace when errors happen.
        ## We will index by the submit file name prefix if it exists:
        submit_search = re.findall('.+?(?=/submit.json)',
                                   os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""
        #### Parse submit file
        submit_file = utilsparams3.load_json(bucket_name, key)

        ## These next three fields check that the submit file is correctly formatted
        try:
            self.timestamp = submit_file["timestamp"]
            ## KEY: Now set up logging in the input folder too:
        except KeyError as ke:
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing timestamp when data was uploaded.")

        ## Now we're going to get the path to the results directory in the submit folder:
        self.jobname = "job_{}_{}_{}".format(submit_name, bucket_name,
                                             self.timestamp)
        jobpath = os.path.join(self.path, os.environ['OUTDIR'], self.jobname)
        self.jobpath_submit = jobpath
        ## And create a corresponding directory in the submit area.
        create_jobdir = utilsparams3.mkdir(
            self.bucket_name, os.path.join(self.path, os.environ['OUTDIR']),
            self.jobname)
        ## a logger for the submit area.
        self.logger = utilsparams3.JobLogger_demo(self.bucket_name,
                                                  self.jobpath)
        self.logger.append(
            "Initializing EPI analysis: Parameter search for 2D LDS.")
        self.logger.write()

        try:
            self.instance_type = submit_file[
                'instance_type']  # TODO default option from config
        except KeyError as ke:
            msg = "Using default instance type {} from config file".format(
                os.environ["INSTANCE_TYPE"])
            self.instance_type = os.environ["INSTANCE_TYPE"]

        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try:
            self.input_bucket_name = submit_file["bucketname"]
            ## KEY: Now set up logging in the input folder too:
            self.inputlogger = utilsparams3.JobLogger(
                self.input_bucket_name,
                os.path.join(os.environ['OUTDIR'], self.jobname)
            )  ##TODO: this relies upon "OUTDIR" being the same in the submit and input buckets. Make sure to alter this later.
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing bucket name where data is located.")

        try:
            self.data_name = submit_file[
                'dataname']  # TODO validate extensions
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            self.inputlogger.append(submit_errmsg.format(ke))
            self.inputlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"]
            self.submitlogger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.submitlogger.append(submit_errmsg.format(ke))
            self.submitlogger.write()
            self.inputlogger.append(submit_errmsg.format(ke))
            self.inputlogger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        ## Check that we have the actual data in the bucket.
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if not utilsparams3.exists(self.input_bucket_name, self.data_name):
            msg = exists_errmsg.format(self.data_name)
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.input_bucket_name, self.config_name):
            msg = exists_errmsg.format(self.config_name)
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
            raise ValueError("configname given does not exist in bucket.")

        ## Check what instance we should use.
        try:
            self.instance_type = submit_file['instance_type']
        except KeyError as ke:
            msg = "Instance type {} does not exist, using default from config file".format(
                ke)
            self.instance_type = os.environ["INSTANCE_TYPE"]
            ## Log this message.
            self.submitlogger.append(msg)
            self.submitlogger.write()
            self.inputlogger.append(msg)
            self.inputlogger.write()
        ###########################

        ## Now get the actual paths to relevant data from the foldername:

        self.filenames = utilsparams3.extract_files(self.input_bucket_name,
                                                    self.data_name,
                                                    ext=None)
        assert len(self.filenames) > 0, "we must have data to analyze."
Example #5
0
    def __init__(self,bucket_name,key,time):
        ## Initialize as before:
        # Get Upload Location Information
        self.bucket_name = bucket_name
        ## Get directory above the input directory. 
        self.path = re.findall('.+?(?=/'+os.environ["INDIR"]+')',key)[0] 
        ## Now add in the time parameter: 
        self.time = time
        ## We will index by the submit file name prefix if it exists: 
        submit_search = re.findall('.+?(?=/submit.json)',os.path.basename(key))
        try:
            submit_name = submit_search[0]
        except IndexError as e:
            ## If the filename is just "submit.json, we just don't append anything to the job name. "
            submit_name = ""
            
        ## Now we're going to get the path to the results directory: 
        self.jobname = "job"+submit_name+self.time
        jobpath = os.path.join(self.path,os.environ['OUTDIR'],self.jobname)
        self.jobpath = jobpath
        create_jobdir  = utilsparams3.mkdir(self.bucket_name, os.path.join(self.path,os.environ['OUTDIR']),self.jobname)
        
        print(self.path,'path')
        self.logger = utilsparams3.JobLogger(self.bucket_name, self.jobpath)
        #self.out_path = utilsparams3.mkdir(self.bucket_name, self.path, config.OUTDIR)
        #self.in_path = utilsparams3.mkdir(self.bucket_name, self.path, config.INDIR)

        # Load Content Of Submit File 
        submit_file = utilsparams3.load_json(bucket_name, key)
        ## Check what instance we should use. 
        try:
            self.instance_type = submit_file['instance_type'] # TODO default option from config
        except KeyError as ke: 
            msg = "Instance type {} does not exist, using default from config file".format(ke)
            self.instance_type = os.environ["INSTANCE_TYPE"]
            ## Log this message.
            self.logger.append(msg)
            self.logger.write()

        ## These next two check that the submit file is correctly formatted
        ## Check that we have a dataname field:
        submit_errmsg = "INPUT ERROR: Submit file does not contain field {}, needed to analyze data."
        try: 
            self.data_name = submit_file['dataname'] # TODO validate extensions 
        except KeyError as ke:

            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError("Missing data name to analyze")

        try:
            self.config_name = submit_file["configname"] 
            self.logger.assign_config(self.config_name)
        except KeyError as ke:
            print(submit_errmsg.format(ke))
            ## Write to logger
            self.logger.append(submit_errmsg.format(ke))
            self.logger.write()
            ## Now raise an exception to halt processing, because this is a catastrophic error.  
            raise ValueError(os.environ["MISSING_CONFIG_ERROR"])

        ## Check that we have the actual data in the bucket.  
        exists_errmsg = "INPUT ERROR: S3 Bucket does not contain {}"
        if not utilsparams3.exists(self.bucket_name,self.data_name): 
            msg = exists_errmsg.format(self.data_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("dataname given does not exist in bucket.")
        elif not utilsparams3.exists(self.bucket_name,self.config_name): 
            msg = exists_errmsg.format(self.config_name)
            self.logger.append(msg)
            self.logger.write()
            raise ValueError("configname given does not exist in bucket.")
        ###########################

        ## Now get the actual paths to relevant data from the foldername: 

        self.filenames = utilsparams3.extract_files(self.bucket_name,self.data_name,ext = None) 
        assert len(self.filenames) > 0, "we must have data to analyze."