Ejemplo n.º 1
0
def disk_usage_alert(proj, max_disk, emails):

    try:
        import datetime
    except ImportError:
        return 'failed import of datetime'

    # if we are at a round 5-minute interval
    timenow = datetime.datetime.now()

    if ((timenow.minute % 5) != 0):
        return

    last_entry = proj[-1]

    log_time = last_entry.get_log_time()
    log_dict = last_entry.get_log_dict()

    lastDISK = 0

    for key in log_dict:
        if (str(key) == 'DISK_USAGE_DATA'):
            lastDISK = float(log_dict[key]) * 100

    if (lastDISK > max_disk):

        pub_smtp(
            receiver=emails,
            subject='PUBS ALERT: Disk usage on %s above %i percent!' %
            (pub_env.kSERVER_NAME, max_disk),
            text=
            'Current disk usage is at %.02f percent. Please take action and clear disk space!'
            % (lastDISK))

    return
Ejemplo n.º 2
0
    def _jobstat_from_log(self, submit_time):
        result = (False,'')
        if not os.path.isfile(self.JOBSUB_LOG):
            subject = 'Failed fetching job log'
            text  = 'Batch job log file not available... (check daemon, should not happen)'
            text += '\n\n'
            pub_smtp(receiver = self._experts,
                     subject = subject,
                     text = text)
            return result

        # Make sure log has been updated more recently than most recent submit.

        mod_time = os.path.getmtime(self.JOBSUB_LOG)
        self.info('Job log modification time: %s' % time.ctime(mod_time))
        self.info('Submit time: %s' % time.ctime(submit_time))
        if mod_time < submit_time + 60:
            return result

        log_age = time.time() - mod_time
        if log_age + 10 > self._period:
            return result

        contents = open(self.JOBSUB_LOG,'r').read()
        return (self._check_jobstat_str(contents),contents)
Ejemplo n.º 3
0
    def check_db( self ):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs( self._project, 2 ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 2
            in_file_name = self._infile_format % ( run, subrun )
            in_file = '%s/%s' % ( self._in_dir, in_file_name )

            # Get status object
            status = self._api.get_status(ds_status(self._project,
                                                    x[0],x[1],x[2]))

            self._data = status._data
            self._data = str( self._data )

            if self._data:
               statusCode = 0
            else:
                subject = 'Checksum of the file %s not in database' % in_file
                text = """File: %s
Checksum is not in database
                """ % ( in_file )

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 4
0
def disk_usage_alert(proj,max_disk,emails):

    try:
        import datetime
    except ImportError:
        return 'failed import of datetime'

    # if we are at a round 5-minute interval
    timenow = datetime.datetime.now()
    
    if ( (timenow.minute%5) != 0):
        return

    last_entry = proj[-1]
    
    log_time = last_entry.get_log_time()
    log_dict = last_entry.get_log_dict()

    lastDISK = 0

    for key in log_dict:
        if (str(key) == 'DISK_USAGE_DATA'):
            lastDISK = float(log_dict[key])*100

    if (lastDISK > max_disk):
        
        pub_smtp(receiver = emails,
                 subject  = 'PUBS ALERT: Disk usage on %s above %i percent!' %(pub_env.kSERVER_NAME, max_disk) ,
                 text     = 'Current disk usage is at %.02f percent. Please take action and clear disk space!'%(lastDISK))

    return
Ejemplo n.º 5
0
    def find_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs( [self._project, self._parent_project], [2, 0] ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Checking tape: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 2

            in_file = self._infile_format % ( run, subrun )
            samweb = samweb_cli.SAMWebClient(experiment="uboone")

            meta = {}
            try:
                meta = samweb.getMetadata(filenameorid=in_file)
                checksum_info = meta['checksum'][0].split(':')
                if checksum_info[0] == 'enstore':
                    self._data = checksum_info[1]
                    statusCode = 0

                else:
                    statusCode = 1

            except samweb_cli.exceptions.FileNotFound:
                subject = 'Failed to locate file %s at SAM' % in_file
                text = 'File %s is not found at SAM!' % in_file

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 6
0
    def execute(self):
        try:
            #self._logger.info('Executing: \"%s\"' % self._info._command)
            #self._logger.info('Executing: \"%s\"' % str(self._info._command.split()))
            self._proc = Popen(
                self._info._command.split(None),
                #shell=True,
                stdout=PIPE,
                stderr=PIPE)
            self._running = True
        except OSError as e:

            self._logger.error(e.strerror)
            self._logger.error('Error executing %s! Sending e-mail report...' %
                               self._info._project)
            try:
                pub_smtp(receiver=self._info._email,
                         subject='Failed execution of project %s' %
                         self.name(),
                         text=e.strerror)
            except BaseException as be:
                self._logger.critical(
                    'Project %s error could not be reported via email!' %
                    self._info._project)
                for line in be.v.split('\n'):
                    self._logger.error(line)
            raise DSException(e.strerror)
Ejemplo n.º 7
0
    def execute(self):
        try:
            #self._logger.info('Executing: \"%s\"' % self._info._command)
            #self._logger.info('Executing: \"%s\"' % str(self._info._command.split()))
            self._proc = Popen(self._info._command.split(None),
                               #shell=True,
                               stdout = PIPE,
                               stderr = PIPE)
            self._running = True
        except OSError as e:

            self._logger.error(e.strerror)
            self._logger.error('Error executing %s! Sending e-mail report...' % self._info._project)
            try:
                pub_smtp(receiver = self._info._email,
                         subject  = 'Failed execution of project %s' % self.name(),
                         text     = e.strerror)
            except BaseException as be:
                self._logger.critical('Project %s error could not be reported via email!' % self._info._project)                
                for line in be.v.split('\n'):
                    self._logger.error(line)
            raise DSException(e.strerror)
Ejemplo n.º 8
0
 def email(cls, owner, subject, text):
     if not owner in cls._address:
         return False
     try:
         if cls._sub_prefix[owner]:
             subject = '<<%s>> %s' % (cls._sub_prefix[owner], subject)
         res = pub_smtp(receiver=cls._address[owner],
                        subject=subject,
                        text=text)
         if not res: res = False
         return res
     except BaseException as e:
         print e
         return False
Ejemplo n.º 9
0
 def email(cls,owner,subject,text):
     if not owner in cls._address:
         return False
     try:
         if cls._sub_prefix[owner]:
             subject  = '<<%s>> %s' % (cls._sub_prefix[owner],subject)
         res = pub_smtp(receiver = cls._address[owner],
                        subject = subject,
                        text = text)
         if not res: res=False
         return res
     except BaseException as e:
         print e
         return False
Ejemplo n.º 10
0
    def _jobstat_from_log(self, submit_time):
        result = (False, '')
        if not os.path.isfile(self.JOBSUB_LOG):
            subject = 'Failed fetching job log'
            text = 'Batch job log file not available... (check daemon, should not happen)'
            text += '\n\n'
            pub_smtp(receiver=self._experts, subject=subject, text=text)
            return result

        # Make sure log has been updated more recently than most recent submit.

        mod_time = os.path.getmtime(self.JOBSUB_LOG)
        self.info('Job log modification time: %s' % time.ctime(mod_time))
        self.info('Submit time: %s' % time.ctime(submit_time))
        if mod_time < submit_time + 60:
            return result

        log_age = time.time() - mod_time
        if log_age + 10 > self._period:
            return result

        contents = open(self.JOBSUB_LOG, 'r').read()
        return (self._check_jobstat_str(contents), contents)
Ejemplo n.º 11
0
    def recover( self, statusCode, istage, run, subrun ):
        current_status = statusCode + istage
        error_status   = current_status + 1000
                             
        # Report starting
        # self.info()
        self._data = str( self._data )

        # Main command
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage, run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return current_status

        # Submit job.
        jobid=''
        try:
            jobid = project.dosubmit(probj, stobj)
        except:
            self.error('Exception raied by project.dosubmit:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return current_status
        self.info( 'Resubmit jobs: xml: %s, stage: %s' %( self._xml_file, stage ) )

        # Tentatively do so; need to change!!!
        if not jobid:
            self.error('Failed to fetch job log id...')
            subject = 'Failed to fetch job log id while submitting project %s stage %s.' % (
                probj.name, stobj.name)
            text = subject
            text += '\n'
            text += 'Status code is set to %d!\n\n' % error_status
            pub_smtp(receiver = self._experts,
                     subject = subject,
                     text = text)
            return error_status

        # Now grab the parent job id and submit time
        if self._data == None or self._data == "None" or len(self._data) == 0:
            self._data = '%s+%f' % (jobid, time.time())
        else:
            self._data += ':%s+%f' % (jobid, time.time())

        statusCode = istage + self.kSUBMITTED
        self.info( "Resubmitted jobs, job id: %s, status: %d" % ( self._data, statusCode ) )

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks
        return statusCode
Ejemplo n.º 12
0
    def declare_to_sam(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))
        self._project_requirement[0] = 1

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' %
                      (run, subrun))

            status = 1

            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_json = '%s.json' % in_file

            self.info('Declaring ' + in_file + ' to SAM: using ' + in_json)

            if os.path.isfile(in_file) and os.path.isfile(in_json):
                self.info('Found %s' % (in_file))
                self.info('Found %s' % (in_json))
                json_dict = json.load(open(in_json))

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    in_file_base = os.path.basename(in_file)
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Email the experts
                    subject = 'File %s Existing at SAM' % in_file_base
                    text = """
File %s has already exists at SAM!
                    """ % in_file_base

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)

                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 2
                    except Exception as e:
                        #                        print "Unexpected error: samweb declareFile problem: "
                        self.error(
                            "Unexpected error: samweb declareFile problem: ")
                        self.error("%s" % e)
                        subject = "samweb declareFile problem: %s" % in_file_base
                        text = """
File %s failed to be declared to SAM!
%s
                        """ % (in_file_base, traceback.print_exc())

                        pub_smtp(os.environ['PUB_SMTP_ACCT'],
                                 os.environ['PUB_SMTP_SRVR'],
                                 os.environ['PUB_SMTP_PASS'], self._experts,
                                 subject, text)

                        # print "Give some null properties to this meta data"
                        self.error("Give this file a status 102")
                        status = 102

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=0,
                               status=status)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 13
0
    def isRunning(self, statusCode, istage, run, subrun):
        current_status = statusCode + istage
        error_status = current_status + 1000

        self._data = str(self._data)
        last_job_data = self._data.strip().split(':')[-1]
        job_data_list = last_job_data.split('+')
        jobid = job_data_list[0]
        if len(job_data_list) > 1:
            submit_time = float(job_data_list[1])
        else:
            submit_time = float(0)

        # Main command
        jobstat = self._jobstat_from_log(submit_time)
        if not jobstat[0]:
            self.warning(
                'Fetching job status from log file failed! Try running cmd...')
            jobstat = self._jobstat_from_cmd(jobid)

        if not jobstat[0]:
            subject = 'Failed to fetch job status!'
            text = ''
            if not jobstat[1]:
                text = 'No job log found...'
            else:
                text = 'Job log indicates query has failed (see below).\n %s' % jobstat[
                    1]
            text += '\n'
            text += 'PUBS status remains same (%d)' % current_status
            self.error(subject)
            self.error(text)
            pub_smtp(receiver=self._experts, subject=subject, text=text)
            return current_status

        is_running = False
        target_jobs = [
            x for x in jobstat[1].split('\n') if x.startswith(jobid)
        ]
        for line in target_jobs:
            words = line.split()
            job_state = words[5]
            if job_state == 'X': continue
            is_running = True
            if job_state == 'R':
                statusCode = self.kRUNNING
                break

        if not is_running:
            statusCode = self.kFINISHED

        msg = 'jobid: %s ... status: ' % jobid
        if statusCode == self.kRUNNING:
            msg += 'RUNNING'
        elif statusCode == self.kFINISHED:
            msg += 'FINISHED'
        elif statusCode == self.kSUBMITTED:
            msg += 'SUBMITTED'
        statusCode += istage
        msg += ' (%d)' % statusCode
        self.info(msg)

        return statusCode
Ejemplo n.º 14
0
    def check(self, statusCode, istage, run, subrun):
        self._data = str(self._data)
        nSubmit = None

        # Get the number of job submissions.
        if self._data != None and len(self._data) > 0:

            holder = self._data.split(':')
            nSubmit = len(holder)

        # Check the finished jobs
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage,
                                                  run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Do check.
        try:
            real_stdout = sys.stdout
            real_stderr = sys.stderr
            sys.stdout = StringIO.StringIO()
            sys.stderr = StringIO.StringIO()
            project.doshorten(stobj)
            check_status = project.docheck(probj, stobj, ana=False)
            strout = sys.stdout.getvalue()
            strerr = sys.stderr.getvalue()
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            if strout:
                self.info(strout)
            if strerr:
                self.warning(strerr)
        except:
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            self.error('Exception raied by project.docheck:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Update pubs status.
        if check_status == 0:
            statusCode = self.kREADYFORSAM
            self._data = ''

        elif nSubmit > self._nresubmission:
            # If the sample has been submitted more than a certain number
            # of times, email the expert, and move on to the next stage
            subject = "MCC jobs fails after %d resubmissions" % nSubmit
            text = """
Sample     : %s
Stage      : %s
Job IDs    : %s
           """ % (self._project, self._digit_to_name[istage],
                  self._data.split(':')[2:])

            pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'],
                     os.environ['PUB_SMTP_PASS'], self._experts, subject, text)

            #statusCode = self.kDONE
            #istage += 10
            statusCode += 1000
        else:
            statusCode = self.kTOBERECOVERED

        statusCode += istage
        self.info("Checked job, status: %d" % statusCode)

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks

        return statusCode
Ejemplo n.º 15
0
    def recover(self, statusCode, istage, run, subrun):
        current_status = statusCode + istage
        error_status = current_status + 1000

        # Report starting
        # self.info()
        self._data = str(self._data)

        # Main command
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage,
                                                  run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return current_status

        # Submit job.
        jobid = ''
        try:
            jobid = project.dosubmit(probj, stobj)
        except:
            self.error('Exception raied by project.dosubmit:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return current_status
        self.info('Resubmit jobs: xml: %s, stage: %s' %
                  (self._xml_file, stage))

        # Tentatively do so; need to change!!!
        if not jobid:
            self.error('Failed to fetch job log id...')
            subject = 'Failed to fetch job log id while submitting project %s stage %s.' % (
                probj.name, stobj.name)
            text = subject
            text += '\n'
            text += 'Status code is set to %d!\n\n' % error_status
            pub_smtp(receiver=self._experts, subject=subject, text=text)
            return error_status

        # Now grab the parent job id and submit time
        if self._data == None or self._data == "None" or len(self._data) == 0:
            self._data = '%s+%f' % (jobid, time.time())
        else:
            self._data += ':%s+%f' % (jobid, time.time())

        statusCode = istage + self.kSUBMITTED
        self.info("Resubmitted jobs, job id: %s, status: %d" %
                  (self._data, statusCode))

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks
        return statusCode
Ejemplo n.º 16
0
    def store(self, statusCode, istage, run, subrun):

        # Only store the final stage.
        # If this is not the final stage, advance to the next stage.

        if istage != self._stage_digits[-1]:
            statusCode = self.kDONE
            istage += 10
            return statusCode + istage

        # Get stage name.
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage,
                                                  run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Do store.
        try:
            real_stdout = sys.stdout
            real_stderr = sys.stderr
            sys.stdout = StringIO.StringIO()
            sys.stderr = StringIO.StringIO()

            # Store files.

            dim = project_utilities.dimensions(probj, stobj, ana=False)
            store_status = project.docheck_locations(dim,
                                                     stobj.outdir,
                                                     add=False,
                                                     clean=False,
                                                     remove=False,
                                                     upload=True)

            if store_status == 0:
                dim = project_utilities.dimensions(probj, stobj, ana=True)
                store_status = project.docheck_locations(dim,
                                                         stobj.outdir,
                                                         add=False,
                                                         clean=False,
                                                         remove=False,
                                                         upload=True)

            strout = sys.stdout.getvalue()
            strerr = sys.stderr.getvalue()
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            if strout:
                self.info(strout)
            if strerr:
                self.warning(strerr)
        except:
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            self.error('Exception raied by project.docheck_locations:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Update pubs status.
        if store_status == 0:
            statusCode = self.kDONE
            istage += 10

        # Pretend I'm doing something
        time.sleep(5)

        # If all the stages complete, send an email to experts
        if not istage in self._stage_digits:
            subject = "Completed: MCC sample %s" % self._project
            text = """
Sample     : %s
Stage      : %s
               """ % (self._project, self._digit_to_name[istage - 10])

            pub_smtp(os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'],
                     os.environ['PUB_SMTP_PASS'], self._experts, subject, text)

        statusCode += istage
        self.info("SAM store, status: %d" % statusCode)

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks

        return statusCode
Ejemplo n.º 17
0
    def calculate_checksum(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 1):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 1

            in_file_name = self._infile_format % (run, subrun)
            in_file_holder = '%s/%s' % (self._in_dir, in_file_name)
            filelist = glob.glob(in_file_holder)
            if (len(filelist) > 1):
                self.error(
                    'ERROR: There is more than one file matching that pattern: %s'
                    % in_file_name)
            if (len(filelist) < 1):
                errorMessage = "Failed to find file%s" % in_file_holder
                subject = "get_checksum_temp Failed to find file%s" % in_file_holder
                text = """File: %s
Error message:
%s
                """ % (in_file, errorMessage)
                pub_smtp(os.environ['PUB_SMTP_ACCT'],
                         os.environ['PUB_SMTP_SRVR'],
                         os.environ['PUB_SMTP_PASS'], self._experts, subject,
                         text)
                statusCode = 200
            else:
                in_file = filelist[0]
                metadata = {}
                try:
                    metadata[
                        'crc'] = samweb_client.utility.fileEnstoreChecksum(
                            in_file)
                    self._data = metadata['crc']['crc_value']
                    statusCode = 0
                except Exception:
                    errorMessage = traceback.print_exc()
                    subject = 'Failed to obtain the checksum of the file %s' % in_file
                    text = """File: %s
Error message:
%s
                """ % (in_file, errorMessage)

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)

                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 18
0
    def isRunning( self, statusCode, istage, run, subrun ):
        current_status = statusCode + istage
        error_status   = current_status + 1000

        self._data = str( self._data )
        last_job_data = self._data.strip().split(':')[-1]
        job_data_list = last_job_data.split('+')
        jobid = job_data_list[0]
        if len(job_data_list) > 1:
            submit_time = float(job_data_list[1])
        else:
            submit_time = float(0)

        # Main command
        jobstat = self._jobstat_from_log(submit_time)
        if not jobstat[0]:
            self.warning('Fetching job status from log file failed! Try running cmd...')
            jobstat = self._jobstat_from_cmd(jobid)
        
        if not jobstat[0]:
            subject = 'Failed to fetch job status!'
            text = ''
            if not jobstat[1]:
                text = 'No job log found...'
            else:
                text = 'Job log indicates query has failed (see below).\n %s' % jobstat[1]
            text += '\n'
            text += 'PUBS status remains same (%d)' % current_status
            self.error(subject)
            self.error(text)
            pub_smtp(receiver = self._experts,
                     subject = subject,
                     text = text)
            return current_status

        is_running = False
        target_jobs = [x for x in jobstat[1].split('\n') if x.startswith(jobid)]
        for line in target_jobs:
            words = line.split()
            job_state = words[5]
            if job_state == 'X': continue
            is_running = True
            if job_state == 'R':
                statusCode = self.kRUNNING
                break

        if not is_running:
            statusCode = self.kFINISHED

        msg = 'jobid: %s ... status: ' % jobid
        if statusCode == self.kRUNNING:
            msg += 'RUNNING'
        elif statusCode == self.kFINISHED:
            msg += 'FINISHED'
        elif statusCode == self.kSUBMITTED:
            msg += 'SUBMITTED'
        statusCode += istage
        msg += ' (%d)' % statusCode
        self.info(msg)

        return statusCode
Ejemplo n.º 19
0
    def check( self, statusCode, istage, run, subrun ):
        self._data = str( self._data )
        nSubmit     = None

        # Get the number of job submissions.
        if self._data != None and len(self._data) > 0:

            holder = self._data.split(':')
            nSubmit = len(holder)

        # Check the finished jobs
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage, run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage 

        # Do check.
        try:
            real_stdout = sys.stdout
            real_stderr = sys.stderr
            sys.stdout = StringIO.StringIO()
            sys.stderr = StringIO.StringIO()
            project.doshorten(stobj)
            check_status = project.docheck(probj, stobj, ana=False)
            strout = sys.stdout.getvalue()
            strerr = sys.stderr.getvalue()
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            if strout:
                self.info(strout)
            if strerr:
                self.warning(strerr)
        except:
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            self.error('Exception raied by project.docheck:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Update pubs status.
        if check_status == 0:
           statusCode = self.kREADYFORSAM
           self._data = ''

        elif nSubmit > self._nresubmission:
           # If the sample has been submitted more than a certain number
           # of times, email the expert, and move on to the next stage
           subject = "MCC jobs fails after %d resubmissions" % nSubmit 
           text = """
Sample     : %s
Stage      : %s
Job IDs    : %s
           """ % ( self._project, self._digit_to_name[istage], self._data.split(':')[2:] )

           pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

           #statusCode = self.kDONE
           #istage += 10
           statusCode += 1000
        else:
           statusCode = self.kTOBERECOVERED

        statusCode += istage
        self.info("Checked job, status: %d" % statusCode)

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks

        return statusCode
Ejemplo n.º 20
0
    def check_db(self):
        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs(self._project, 2):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 2
            in_file_name = self._infile_format % (run, subrun)
            in_file = '%s/%s' % (self._in_dir, in_file_name)

            # Get status object
            status = self._api.get_status(
                ds_status(self._project, x[0], x[1], x[2]))

            self._data = status._data
            self._data = str(self._data)

            if self._data:
                statusCode = 0
            else:
                subject = 'Checksum of the file %s not in database' % in_file
                text = """File: %s
Checksum is not in database
                """ % (in_file)

                pub_smtp(os.environ['PUB_SMTP_ACCT'],
                         os.environ['PUB_SMTP_SRVR'],
                         os.environ['PUB_SMTP_PASS'], self._experts, subject,
                         text)

                statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 21
0
    def store( self, statusCode, istage, run, subrun ):

        # Only store the final stage.
        # If this is not the final stage, advance to the next stage.

        if istage != self._stage_digits[-1]:
            statusCode = self.kDONE
            istage += 10
            return statusCode + istage

        # Get stage name.
        stage = self._digit_to_name[istage]

        # Get project and stage object.
        try:
            probj, stobj = project.get_pubs_stage(self._xml_file, '', stage, run, subrun, self._version)
        except:
            self.error('Exception raied by project.get_pubs_stage:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage 

        # Do store.
        try:
            real_stdout = sys.stdout
            real_stderr = sys.stderr
            sys.stdout = StringIO.StringIO()
            sys.stderr = StringIO.StringIO()

            # Store files.

            dim = project_utilities.dimensions(probj, stobj, ana=False)
            store_status = project.docheck_locations(dim, stobj.outdir, 
                                                     add=False,
                                                     clean=False,
                                                     remove=False,
                                                     upload=True)

            if store_status == 0:
                dim = project_utilities.dimensions(probj, stobj, ana=True)
                store_status = project.docheck_locations(dim, stobj.outdir, 
                                                         add=False,
                                                         clean=False,
                                                         remove=False,
                                                         upload=True)

            strout = sys.stdout.getvalue()
            strerr = sys.stderr.getvalue()
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            if strout:
                self.info(strout)
            if strerr:
                self.warning(strerr)
        except:
            sys.stdout = real_stdout
            sys.stderr = real_stderr
            self.error('Exception raied by project.docheck_locations:')
            e = sys.exc_info()
            for item in e:
                self.error(item)
            for line in traceback.format_tb(e[2]):
                self.error(line)
            return statusCode + istage

        # Update pubs status.
        if store_status == 0:
           statusCode = self.kDONE
           istage += 10

        # Pretend I'm doing something
        time.sleep(5)

        # If all the stages complete, send an email to experts
        if not istage in self._stage_digits:
            subject = "Completed: MCC sample %s" % self._project
            text = """
Sample     : %s
Stage      : %s
               """ % ( self._project, self._digit_to_name[istage-10] )

            pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

        statusCode += istage
        self.info("SAM store, status: %d" % statusCode)

        # Pretend I'm doing something
        time.sleep(5)

        # Here we may need some checks

        return statusCode
Ejemplo n.º 22
0
    def compare(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns

        for x in self.get_xtable_runs(
            [self._project, self._ref_project, self._parent_project],
            [1, 0, 0]):

            # Counter decreases by 1
            ctr -= 1

            # Currently hard-coded seq = 0
            (run, subrun, seq) = (int(x[0]), int(x[1]), 0)

            # Report starting
            now_str = time.strftime('%Y-%m-%d %H:%M:%S')
            self.info('Comparing checksums: run=%d, subrun=%d @ %s' %
                      (run, subrun, now_str))

            statusCode = 1

            # Get status objects
            RefStatus = self._api.get_status(
                ds_status(self._ref_project, run, subrun, seq))
            ParentStatus = self._api.get_status(
                ds_status(self._parent_project, run, subrun, seq))

            if RefStatus._data == ParentStatus._data:
                statusCode = 0
            else:
                subject = 'Checksum different in run %d, subrun %d between %s and %s' % (
                    run, subrun, self._ref_project, self._parent_project)

                text = '%s\n' % subject
                text += 'Run %d, subrun %d\n' % (run, subrun)
                text += '%s checksum: %s\n' % (self._ref_project,
                                               RefStatus._data)
                text += '%s checksum: %s\n' % (self._parent_project,
                                               ParentStatus._data)

                pub_smtp(os.environ['PUB_SMTP_ACCT'],
                         os.environ['PUB_SMTP_SRVR'],
                         os.environ['PUB_SMTP_PASS'], self._experts, subject,
                         text)
                statusCode = 1000
                self._data = '%s:%s;%s:%s' % (
                    self._ref_project, RefStatus._data, self._parent_project,
                    ParentStatus._data)

            # Pretend I'm doing something
            time.sleep(0.5)

            # Report finishing
            now_str = time.strftime('%Y-%m-%d %H:%M:%S')
            self.info('Finished comparing checksums: run=%d, subrun=%d @ %s' %
                      (run, subrun, now_str))

            # Create a status object to be logged to DB (if necessary)
            # Let's say we set the status to be 10
            status = ds_status(project=self._project,
                               run=int(x[0]),
                               subrun=int(x[1]),
                               seq=seq,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 23
0
    def compare( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        self.get_resource()

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns

        for x in self.get_xtable_runs( [self._project, self._ref_project, self._parent_project],                                       [1, 0, 0] ):

            # Counter decreases by 1
            ctr -=1

            # Currently hard-coded seq = 0
            (run, subrun, seq) = (int(x[0]), int(x[1]), 0)

            # Report starting
            now_str  = time.strftime('%Y-%m-%d %H:%M:%S')
            self.info('Comparing checksums: run=%d, subrun=%d @ %s' % ( run, subrun, now_str ))

            statusCode = 1

            # Get status objects
            RefStatus = self._api.get_status( ds_status( self._ref_project, run, subrun, seq ))
            ParentStatus = self._api.get_status( ds_status( self._parent_project, run, subrun, seq ))

            if RefStatus._data == ParentStatus._data:
                statusCode = 0
            else:
                subject = 'Checksum different in run %d, subrun %d between %s and %s' % ( run, subrun, self._ref_project, self._parent_project )

                text = '%s\n' % subject
                text += 'Run %d, subrun %d\n' % ( run, subrun )
                text += '%s checksum: %s\n' % ( self._ref_project, RefStatus._data )
                text += '%s checksum: %s\n' % ( self._parent_project, ParentStatus._data )

                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                statusCode = 1000
                self._data = '%s:%s;%s:%s' % ( self._ref_project, RefStatus._data, self._parent_project, ParentStatus._data )


            # Pretend I'm doing something
            time.sleep(0.5)

            # Report finishing
            now_str  = time.strftime('%Y-%m-%d %H:%M:%S')
            self.info('Finished comparing checksums: run=%d, subrun=%d @ %s' % ( run, subrun, now_str ))

            # Create a status object to be logged to DB (if necessary)
            # Let's say we set the status to be 10
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = seq,
                                status  = statusCode,
                                data    = self._data )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 24
0
    def compare_dropbox_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs( [self._project, self._parent_project], [1, 0] ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 1
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_name = os.path.basename(in_file)
            out_file = '%s/%s' % ( self._out_dir, in_file_name )

            #Note that this has the sequence number hard coded as number 0
            RefStatus = self._api.get_status( ds_status(self._ref_project, run, subrun, 0))
            near1_checksum = RefStatus._data

            try:
                pnfs_adler32_1, pnfs_size = get_pnfs_1_adler32_and_size( out_file )
                near1_adler32_1 = convert_0_adler32_to_1_adler32(near1_checksum, pnfs_size)

                if near1_adler32_1 == pnfs_adler32_1:
                    statusCode = 0
                else:
                    subject = 'Checksum different in run %d, subrun %d between %s and PNFS' % ( run, subrun, self._ref_project )
                    text = '%s\n' % subject
                    text += 'Run %d, subrun %d\n' % ( run, subrun )
                    text += 'Converted %s checksum: %s\n' % ( self._ref_project, near1_adler32_1 )
                    text += 'Converted PNFS checksum: %s\n' % ( pnfs_adler32_1 )
                    
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                    statusCode = 1000
                    self._data = '%s:%s;PNFS:%s' % ( self._ref_project, near1_adler32_1, pnfs_adler32_1 )

            except LookupError:

                self.warning("Could not find file in the dropbox %s" % out_file)
                self.warning("Gonna go looking on tape %s" % in_file_name)
                samweb = samweb_cli.SAMWebClient(experiment="uboone")
                meta = {}

                try:
                    meta = samweb.getMetadata(filenameorid=in_file_name)
                    checksum_info = meta['checksum'][0].split(':')
                    if checksum_info[0] == 'enstore':
                        self._data = checksum_info[1]
                        statusCode = 0
                    else:
                        statusCode = 10

                except samweb_cli.exceptions.FileNotFound:
                    subject = 'Failed to locate file %s at SAM' % in_file
                    text = 'File %s is not found at SAM!' % in_file
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 25
0
    def declare_to_sam( self ):
        
        # Attempt to connect DB. If failure, abort
        if not self.connect():
	    self.error('Cannot connect to DB! Aborting...')
	    return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        # self.info('Here, self._nruns=%d ... ' % (self._nruns))
        self._project_requirement[0] = 1

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs(self._project_list,
                                      self._project_requirement):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Declaring a file to SAM: run=%d, subrun=%d ...' % (run,subrun) )

            status = 1
            
            # Check input file exists. Otherwise report error
            in_file_holder = '%s/%s' % (self._in_dir,self._infile_format % (run,subrun))
            filelist = glob.glob( in_file_holder )
            if (len(filelist)<1):
                self.error('ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!' % (run,subrun))
                status_code=100
                status = ds_status( project = self._project,
                                    run     = run,
                                    subrun  = subrun,
                                    seq     = 0,
                                    status  = status_code )
                self.log_status( status )                
                continue

            if (len(filelist)>1):
                self.error('ERROR: Found too many files for (run,subrun) = %s @ %s !!!' % (run,subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_json = '%s.json' % in_file

            self.info('Declaring ' + in_file + ' to SAM: using ' + in_json  )
            

            if os.path.isfile( in_file ) and os.path.isfile( in_json ):
                self.info('Found %s' % (in_file) )
                self.info('Found %s' % (in_json) )
                json_dict = json.load( open( in_json ) )

                # native SAM python call, instead of a system call
                # make sure you've done get-cert
                # Perhaps we want a try block for samweb?
                samweb = samweb_cli.SAMWebClient(experiment="uboone")

                # Check if the file already exists at SAM
                try:
                    in_file_base=os.path.basename(in_file)
                    samweb.getMetadata(filenameorid=in_file_base)
                    status = 101
                    # Email the experts
                    subject = 'File %s Existing at SAM' % in_file_base
                    text = """
File %s has already exists at SAM!
                    """ % in_file_base

                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                except samweb_cli.exceptions.FileNotFound:
                    # metadata already validated in get_assembler_metadata_file.py
                    try:
                        samweb.declareFile(md=json_dict)
                        status = 2
                    except Exception as e:
#                        print "Unexpected error: samweb declareFile problem: "
                        self.error( "Unexpected error: samweb declareFile problem: ")
                        self.error( "%s" % e)
                        subject = "samweb declareFile problem: %s" % in_file_base
                        text = """
File %s failed to be declared to SAM!
%s
                        """ % ( in_file_base, traceback.print_exc() )

                        pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                        # print "Give some null properties to this meta data"
                        self.error( "Give this file a status 102")
                        status = 102

            else:
                status = 100

            # Pretend I'm doing something
            time.sleep(1)

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = int(x[0]),
                                subrun  = int(x[1]),
                                seq     = 0,
                                status  = status )
            
            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 26
0
    def calculate_checksum( self ):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_runs( self._project, 1 ):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))
            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' % (run,subrun))

            statusCode = 1

            in_file_name = self._infile_format % ( run, subrun )
            in_file_holder = '%s/%s' % ( self._in_dir, in_file_name )
            filelist = glob.glob( in_file_holder )
            if (len(filelist)>1):
                self.error('ERROR: There is more than one file matching that pattern: %s' % in_file_name)
            if (len(filelist)<1):
                errorMessage = "Failed to find file%s"%in_file_holder
                subject = "get_checksum_temp Failed to find file%s"%in_file_holder
                text = """File: %s
Error message:
%s
                """ % ( in_file, errorMessage )
                pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )
                statusCode = 200
            else:
                in_file = filelist[0]
                metadata = {}
                try:
                    metadata['crc'] = samweb_client.utility.fileEnstoreChecksum( in_file )
                    self._data = metadata['crc']['crc_value']
                    statusCode = 0
                except Exception:
                    errorMessage = traceback.print_exc()
                    subject = 'Failed to obtain the checksum of the file %s' % in_file
                    text = """File: %s
Error message:
%s
                """ % ( in_file, errorMessage )
                    
                    pub_smtp( os.environ['PUB_SMTP_ACCT'], os.environ['PUB_SMTP_SRVR'], os.environ['PUB_SMTP_PASS'], self._experts, subject, text )

                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status( project = self._project,
                                run     = run,
                                subrun  = subrun,
                                seq     = 0,
                                status  = statusCode,
                                data    = self._data )

            # Log status
            self.log_status( status )

            # Break from loop if counter became 0
            if not ctr: break
Ejemplo n.º 27
0
    def compare_dropbox_checksum(self):

        # Attempt to connect DB. If failure, abort
        if not self.connect():
            self.error('Cannot connect to DB! Aborting...')
            return

        # If resource info is not yet read-in, read in.
        if self._nruns is None:
            self.get_resource()

        self.info('Here, self._nruns=%d ... ' % (self._nruns))

        # Fetch runs from DB and process for # runs specified for this instance.
        ctr = self._nruns
        for x in self.get_xtable_runs([self._project, self._parent_project],
                                      [1, 0]):

            # Counter decreases by 1
            ctr -= 1

            (run, subrun) = (int(x[0]), int(x[1]))

            # Report starting
            self.info('Calculating the file checksum: run=%d, subrun=%d ...' %
                      (run, subrun))

            statusCode = 1
            in_file_holder = '%s/%s' % (self._in_dir, self._infile_format %
                                        (run, subrun))
            filelist = glob.glob(in_file_holder)
            if (len(filelist) < 1):
                self.error(
                    'ERROR: Failed to find the file for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                status_code = 100
                status = ds_status(project=self._project,
                                   run=run,
                                   subrun=subrun,
                                   seq=0,
                                   status=status_code)
                self.log_status(status)
                continue

            if (len(filelist) > 1):
                self.error(
                    'ERROR: Found too many files for (run,subrun) = %s @ %s !!!'
                    % (run, subrun))
                self.error('ERROR: List of files found %s' % filelist)

            in_file = filelist[0]
            in_file_name = os.path.basename(in_file)
            out_file = '%s/%s' % (self._out_dir, in_file_name)

            #Note that this has the sequence number hard coded as number 0
            RefStatus = self._api.get_status(
                ds_status(self._ref_project, run, subrun, 0))
            near1_checksum = RefStatus._data

            try:
                pnfs_adler32_1, pnfs_size = get_pnfs_1_adler32_and_size(
                    out_file)
                near1_adler32_1 = convert_0_adler32_to_1_adler32(
                    near1_checksum, pnfs_size)

                if near1_adler32_1 == pnfs_adler32_1:
                    statusCode = 0
                else:
                    subject = 'Checksum different in run %d, subrun %d between %s and PNFS' % (
                        run, subrun, self._ref_project)
                    text = '%s\n' % subject
                    text += 'Run %d, subrun %d\n' % (run, subrun)
                    text += 'Converted %s checksum: %s\n' % (self._ref_project,
                                                             near1_adler32_1)
                    text += 'Converted PNFS checksum: %s\n' % (pnfs_adler32_1)

                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)
                    statusCode = 1000
                    self._data = '%s:%s;PNFS:%s' % (
                        self._ref_project, near1_adler32_1, pnfs_adler32_1)

            except LookupError:

                self.warning("Could not find file in the dropbox %s" %
                             out_file)
                self.warning("Gonna go looking on tape %s" % in_file_name)
                samweb = samweb_cli.SAMWebClient(experiment="uboone")
                meta = {}

                try:
                    meta = samweb.getMetadata(filenameorid=in_file_name)
                    checksum_info = meta['checksum'][0].split(':')
                    if checksum_info[0] == 'enstore':
                        self._data = checksum_info[1]
                        statusCode = 0
                    else:
                        statusCode = 10

                except samweb_cli.exceptions.FileNotFound:
                    subject = 'Failed to locate file %s at SAM' % in_file
                    text = 'File %s is not found at SAM!' % in_file
                    pub_smtp(os.environ['PUB_SMTP_ACCT'],
                             os.environ['PUB_SMTP_SRVR'],
                             os.environ['PUB_SMTP_PASS'], self._experts,
                             subject, text)
                    statusCode = 100

            # Create a status object to be logged to DB (if necessary)
            status = ds_status(project=self._project,
                               run=run,
                               subrun=subrun,
                               seq=0,
                               status=statusCode,
                               data=self._data)

            # Log status
            self.log_status(status)

            # Break from loop if counter became 0
            if not ctr: break