Beispiel #1
0
    def __init__(self):

        self.raw_data = {
            "tegner-login-1": "/cfs/klemming/projects/xenon/xenon1t/raw/",
            "midway-login1": "/project2/lgrandi/xenon1t/raw/"
        }

        self.proc_data = {
            "tegner-login-1":
            "/cfs/klemming/projects/xenon/xenon1t/processed/",
            "midway-login1": "/project/lgrandi/xenon1t/processed/"
        }

        self.chown_user = {
            "tegner-login-1": "bobau",
            "midway-login1": "pdeperio"
        }

        self.chown_group = {
            "tegner-login-1": "xenon-users",
            "midway-login1": "xenon1t-admins"
        }

        self.chmod = {"tegner-login-1": '750', "midway-login1": '755'}

        Task.__init__(self)
        self.hostname_config = config.get_config(config.get_hostname())
        self.hostname = config.get_hostname()
Beispiel #2
0
    def each_location(self, data_doc):
        # Only data waiting to be verified
        if data_doc['status'] != 'verifying':  # and data_doc['status'] != 'transferred':
            self.log.debug('Location '+data_doc['host']+' does not need to add new checksum')
            return
        
        if data_doc['status'] == 'transferred' and \
           (config.get_hostname() == 'xe1t-datamanager' or config.get_hostname() == 'login'):
            return
        

        # Data must be hosted somewhere
        if 'host' not in data_doc:
            return

        # Data must be here locally
        if data_doc['host'] != config.get_hostname():

            # Special case of midway-srm accessible via POSIX on midway-login1
            if not (data_doc['host']  == "midway-srm" and config.get_hostname() == "midway-login1"):
                self.log.debug('Location not here')
                return

        # This status is given after checksumming
        status = 'transferred'

        # Find file and perform checksum
        if os.path.isdir(data_doc['location']):
            value = checksumdir.dirhash(data_doc['location'],
                                        'sha512')
        elif os.path.isfile(data_doc['location']):
            value = checksumdir._filehash(data_doc['location'],
                                          hashlib.sha512)
        else:
            # Data not actually found
            self.log.error("Location %s not found." % data_doc['location'])
            value = None
            status = 'error'

        if config.DATABASE_LOG:
            if data_doc['status'] == 'verifying':
                self.log.info("Adding a checksum to run "
                              "%d %s" % (self.run_doc['number'],
                                         data_doc['type']))
                self.collection.update({'_id' : self.run_doc['_id'],
                                        'data': {'$elemMatch': data_doc}},
                                       {'$set': {'data.$.status'  : status,
                                                 'data.$.checksum': value}})
            elif data_doc['checksum'] != value or status == 'error':
                self.log.info("Checksum fail "
                              "%d %s" % (self.run_doc['number'],
                                         data_doc['type']))
                self.collection.update({'_id' : self.run_doc['_id'],
                                        'data': {'$elemMatch': data_doc}},
                                       {'$set': {'data.$.checksumproblem': True}})
Beispiel #3
0
 def each_run(self):
     """Run over the requested data types according to the json config file"""
     
     if 'data_type' not in config.get_config( config.get_hostname() ):
        logging.info("Error: Define a data_type in your configuration file")
        logging.info("       (e.g. 'data_type': ['raw'])")
        exit()
     
     for data_type in config.get_config( config.get_hostname() )['data_type']:
         self.log.debug("%s" % data_type)
         self.do_possible_transfers(option_type=self.option_type,
                                    data_type=data_type)
Beispiel #4
0
    def each_run(self):
        """Run over the requested data types according to the json config file"""

        if 'data_type' not in config.get_config(config.get_hostname()):
            logging.info(
                "Error: Define a data_type in your configuration file")
            logging.info("       (e.g. 'data_type': ['raw'])")
            exit()

        for data_type in config.get_config(config.get_hostname())['data_type']:
            self.log.debug("%s" % data_type)
            self.do_possible_transfers(option_type=self.option_type,
                                       data_type=data_type)
Beispiel #5
0
    def each_location(self, data_doc):
        """
        Check every location with data whether it should be purged.
        """
        self.log.debug("Checking purge logic")

        # Skip places where we can't locally access data
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return

        # See if purge settings specified, otherwise don't purge
        if not config.purge_version() or (config.purge_version() == None) :
            self.log.debug("No processed version specified for purge, skipping")
            return

        # Do not purge processed data
        if data_doc['type'] == 'raw':
           self.log.debug("Do not purge raw data")
           return

        # Check pax version of processed run
        if (data_doc['pax_version'] != config.purge_version()) :
            self.log.debug("Don't purge this version: %s" % (data_doc['pax_version']) )
            return

        # The purge data
        self.log.info("Purging %s" % data_doc['location'])
        self.purge(data_doc)
        
        return
Beispiel #6
0
    def each_location(self, data_doc):
        """Check every location with data whether it should be purged.
        """
        # Skip places where we can't locally access data
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return

        # Do not purge processed data (use PurgeProcessed below)
        if data_doc['type'] == 'processed':
            self.log.debug("Do not purge processed data")
            return

        self.log.debug("Checking purge logic")

        # Only purge transfered data
        if data_doc["status"] != "transferred":
            self.log.debug("Not transfered")
            return

        # Require at least three copies of the data since we are deleting third.
        num_copies = self.check(data_doc['type'], warn=False)
        if num_copies < 3:
            self.log.debug("Not enough copies (%d)" % num_copies)
            return

        if self.check_purge_requirements():
            self.log.info("Purging %s" % data_doc['location'])
            self.purge(data_doc)
        else:
            self.log.debug("Not enough time elapsed")
Beispiel #7
0
    def each_run(self):
        # For each data location, see if this filename in it
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or \
                            data_doc['host'] != config.get_hostname():
                continue

            if data_doc['location'] != self.location:
                continue

            # Notify run database
            if config.DATABASE_LOG is True:
                self.collection.update({'_id': self.run_doc['_id']},
                                       {'$pull': {
                                           'data': data_doc
                                       }})

            # Perform operation
            self.log.info("Removing %s" % (self.location))
            if os.path.isdir(data_doc['location']):
                shutil.rmtree(data_doc['location'])
            else:
                os.remove(self.location)

            break
Beispiel #8
0
def get_queue(host=config.get_hostname(), partition=''):
    """Get list of jobs in queue"""

    if host == "midway-login1":
        args = {'partition': 'sandyb',
                'user' : config.get_user()}
    elif host == 'tegner-login-1':
        args = {'partition': 'main',
                'user' : 'bobau'}
    else:
        return []

    if partition == '':
        command = 'squeue --user={user} -o "%.30j"'.format(**args)

    else:
        args['partition'] = partition
        command = 'squeue --partition={partition} --user={user} -o "%.30j"'.format(**args)

    try:
        queue = subprocess.check_output(command,
                                        shell=True,
                                        timeout=120)
    except subprocess.TimeoutExpired as e:
        logging.error("Process timeout")
        return []
    except Exception as e:
        logging.exception(e)
        return []


    queue_list = queue.rstrip().decode('ascii').split()
    if len(queue_list) > 1:
        return queue_list[1:]
    return []
Beispiel #9
0
    def each_location(self, data_doc):
        """
        Check every location with data whether it should be purged.
        """
        self.log.debug("Checking purge logic")

        # Skip places where we can't locally access data
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return

        # See if purge settings specified, otherwise don't purge
        if not config.purge_version() or (config.purge_version() == None):
            self.log.debug(
                "No processed version specified for purge, skipping")
            return

        # Do not purge processed data
        if data_doc['type'] == 'raw':
            self.log.debug("Do not purge raw data")
            return

        # Check pax version of processed run
        if (data_doc['pax_version'] != config.purge_version()):
            self.log.debug("Don't purge this version: %s" %
                           (data_doc['pax_version']))
            return

        # The purge data
        self.log.info("Purging %s" % data_doc['location'])
        self.purge(data_doc)

        return
Beispiel #10
0
    def each_run(self):

        thishost = config.get_hostname()

        if thishost != 'midway-login1':
            return

        version = 'v%s' % pax.__version__
        have_processed, have_raw = self.local_data_finder(thishost, version)

        # Skip if no raw data
        if not have_processed:
            self.log.debug("Skipping %s with no processed data",
                           self.run_doc['name'])
            return

        in_location = config.get_processing_dir(thishost, version)
        out_location = config.get_minitrees_dir(thishost, version)

        queue_list = qsub.get_queue(thishost)
        # Should check version here too
        if self.run_doc['name'] in queue_list:
            self.log.debug("Skipping %s currently in queue",
                           self.run_doc['name'])
            return

        self.log.info("Processing %s with hax_%s, output to %s",
                      self.run_doc['name'], version, out_location)

        _process_hax(self.run_doc['name'], in_location, thishost, version,
                     out_location, self.run_doc['detector'])
Beispiel #11
0
    def each_run(self):
        # For each data location, see if this filename in it
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or \
                            data_doc['host'] != config.get_hostname():
                continue

            if data_doc['location'] != self.input:
                continue

            self.log.info("Moving %s to %s" % (self.input, self.output))
            # Perform renaming
            try:
                shutil.move(self.input, self.output)
            except Exception as e:
                print(e)

            ## Notify run database
            if config.DATABASE_LOG is True:
                self.collection.update(
                    {
                        '_id': self.run_doc['_id'],
                        'data': {
                            '$elemMatch': data_doc
                        }
                    }, {'$set': {
                        'data.$.location': self.output
                    }})
            break
Beispiel #12
0
    def each_run(self):

        thishost = config.get_hostname()

        hax_version = 'v%s' % hax.__version__
        pax_version = 'v%s' % pax.__version__
        have_processed, have_raw = self.local_data_finder(thishost,
                                                          pax_version)

        # Skip if no processed data
        if not have_processed:
            self.log.debug("Skipping %s with no processed data", self.run_doc['name'])
            return

        in_location = os.path.dirname(have_processed['location'])
        out_location = config.get_minitrees_dir(thishost, pax_version)

        queue_list = qsub.get_queue(thishost)

        # Should check version here too
        if self.run_doc['name'] in queue_list:
            self.log.debug("Skipping %s currently in queue",
                           self.run_doc['name'])
            return

        self.log.info("Processing %s (%s) with hax_%s, output to %s",
                      self.run_doc['name'], pax_version, hax_version,
                      out_location)

        _process_hax(self.run_doc['name'], in_location, thishost,
                     pax_version, out_location,
                     self.run_doc['detector'])
Beispiel #13
0
Datei: qsub.py Projekt: rynge/cax
def get_queue(host=config.get_hostname(), partition=''):
    """Get list of jobs in queue"""

    if host == "midway-login1":
        args = {'partition': 'sandyb', 'user': config.get_user()}
    elif host == 'tegner-login-1':
        args = {'partition': 'main', 'user': '******'}
    else:
        return []

    if partition == '':
        command = 'squeue --user={user} -o "%.30j"'.format(**args)

    else:
        args['partition'] = partition
        command = 'squeue --partition={partition} --user={user} -o "%.30j"'.format(
            **args)

    try:
        queue = subprocess.check_output(command, shell=True, timeout=120)
    except subprocess.TimeoutExpired as e:
        logging.error("Process timeout")
        return []
    except Exception as e:
        logging.exception(e)
        return []

    queue_list = queue.rstrip().decode('ascii').split()
    if len(queue_list) > 1:
        return queue_list[1:]
    return []
Beispiel #14
0
    def each_run(self):
        # For each data location, see if this filename in it
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or \
                            data_doc['host'] != config.get_hostname():
                continue

            if data_doc['location'] != self.input:
                continue

            self.log.info("Moving %s to %s" % (self.input,
                                               self.output))
            # Perform renaming
            try:
              shutil.move(self.input, self.output)
            except Exception as e:
              print( e )

            ## Notify run database
            if config.DATABASE_LOG is True:
                self.collection.update({'_id' : self.run_doc['_id'],
                                        'data': {'$elemMatch': data_doc}},
                                       {'$set': {
                                           'data.$.location': self.output}})
            break
Beispiel #15
0
    def each_location(self, data_doc):
        """Check every location with data whether it should be purged.
        """
        # Skip places where we can't locally access data
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return

        # Do not purge processed data (use PurgeProcessed below)
        if data_doc['type'] == 'processed':
            self.log.debug("Do not purge processed data")
            return

        self.log.debug("Checking purge logic")

        # Only purge transfered data
        if data_doc["status"] != "transferred":
            self.log.debug("Not transfered")
            return

        # Require at least three copies of the data since we are deleting third.
        num_copies = self.check(data_doc['type'], warn=False)
        if num_copies < 3:
            self.log.debug("Not enough copies (%d)" % num_copies)
            return

        if self.check_purge_requirements():
            self.log.info("Purging %s" % data_doc['location'])
            self.purge(data_doc)
        else:
            self.log.debug("Not enough time elapsed")
Beispiel #16
0
    def purge(self, data_doc, delete_data=True):

        if delete_data is True:

            self.log.info("Deleting %s" % data_doc['location'])

            # Temporary hardcoded check for gfal-rm removal
            if config.get_hostname(
            ) == 'login' and 'raw' in data_doc['location']:
                config_original = config.get_config('login')
                server = config_original['hostname']
                if config.get_cert() == None:
                    grid_cert = ''
                else:
                    grid_cert = config.get_cert()

                full_command = "gfal-rm -v -r --cert %s " % grid_cert + \
                               server+data_doc['location']

                self.log.info(full_command)

                try:
                    gfal_out = subprocess.check_output(
                        full_command, stderr=subprocess.STDOUT, shell=True)

                except subprocess.CalledProcessError as gfal_exec:
                    self.log.error(gfal_exec.output.rstrip().decode('ascii'))
                    self.log.error("Error: gfal-rm status = %d\n" %
                                   gfal_exec.returncode)
                    raise

                gfal_out_ascii = gfal_out.rstrip().decode('ascii')
                if "error" in gfal_out_ascii.lower(
                ):  # Some errors don't get caught above
                    self.log.error(gfal_out_ascii)
                    raise

                else:
                    self.log.info(gfal_out_ascii)  # To print timing

            # Default POSIX removal
            else:
                if os.path.isdir(data_doc['location']):
                    shutil.rmtree(data_doc['location'])
                    self.log.info('Deleted, notify run database.')
                elif os.path.isfile(data_doc['location']):
                    os.remove(data_doc['location'])
                else:
                    self.log.error('did not exist, notify run database.')

        if config.DATABASE_LOG == True:
            resp = self.collection.update({'_id': self.run_doc['_id']},
                                          {'$pull': {
                                              'data': data_doc
                                          }})
            self.log.info('Removed from run database: %s' %
                          data_doc['location'])
            self.log.debug(resp)
Beispiel #17
0
Datei: task.py Projekt: rynge/cax
    def get_daq_buffer(self):
        for data_doc in self.run_doc['data']:
            if data_doc['type'] == 'untriggered':
                if data_doc['host'] == 'reader':
                    if config.get_hostname() == 'eb0':
                        return data_doc

        # Not found
        return None
Beispiel #18
0
    def __init__(self):

        self.raw_data = {"tegner-login-1": "/cfs/klemming/projects/xenon/xenon1t/raw/",
                         "midway-login1": "/project2/lgrandi/xenon1t/raw/"}
        
        self.proc_data = {"tegner-login-1": "/cfs/klemming/projects/xenon/xenon1t/processed/",
                          "midway-login1": "/project/lgrandi/xenon1t/processed/"}
        
        self.chown_user = {"tegner-login-1": "bobau",
                           "midway-login1": "pdeperio"}

        self.chown_group = {"tegner-login-1": "xenon-users",
                            "midway-login1": "xenon1t-admins"}

        self.chmod = {"tegner-login-1": '750',
                      "midway-login1": '755'}

        Task.__init__(self)
        self.hostname_config = config.get_config(config.get_hostname())
        self.hostname = config.get_hostname()
Beispiel #19
0
    def purge(self, data_doc, delete_data=True):

        if delete_data is True:

            self.log.info("Deleting %s" % data_doc['location'])

            # Temporary hardcoded check for gfal-rm removal
            if config.get_hostname() == 'login' and 'raw' in data_doc['location']:
                config_original = config.get_config('login')
                server = config_original['hostname']
                if config.get_cert() == None:
                    grid_cert = ''
                else:
                    grid_cert = config.get_cert()

                full_command = "gfal-rm -v -r --cert %s " % grid_cert + \
                               server+data_doc['location']

                self.log.info(full_command)

                try:
                    gfal_out = subprocess.check_output(full_command, stderr=subprocess.STDOUT, shell=True)

                except subprocess.CalledProcessError as gfal_exec:
                    self.log.error(gfal_exec.output.rstrip().decode('ascii'))
                    self.log.error("Error: gfal-rm status = %d\n" % gfal_exec.returncode)
                    raise

                gfal_out_ascii = gfal_out.rstrip().decode('ascii')
                if "error" in gfal_out_ascii.lower(): # Some errors don't get caught above
                    self.log.error(gfal_out_ascii)
                    raise

                else:
                    self.log.info(gfal_out_ascii) # To print timing

            # Default POSIX removal        
            else:
                if os.path.isdir(data_doc['location']):
                    shutil.rmtree(data_doc['location'])
                    self.log.info('Deleted, notify run database.')
                elif os.path.isfile(data_doc['location']):
                    os.remove(data_doc['location'])
                else:
                    self.log.error('did not exist, notify run database.')

        if config.DATABASE_LOG == True:
            resp = self.collection.update({'_id': self.run_doc['_id']},
                                              {'$pull': {'data': data_doc}})
            self.log.info('Removed from run database: %s' % data_doc['location'])
            self.log.debug(resp)
Beispiel #20
0
    def each_location(self, data_doc):
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return  # Skip places where we can't locally access data

        if 'creation_time' not in data_doc:
            self.log.warning("No creation time for %s" % str(data_doc))
            return

        # How long has transfer been ongoing
        try:
            time_modified = os.stat(data_doc['location']).st_mtime
        except FileNotFoundError:
            time_modified = 0
        time_modified = datetime.datetime.fromtimestamp(time_modified)
        time_made = data_doc['creation_time']

        # Some RunsDB entries are different format for some reason (#40)
        if isinstance(time_made, list):
            # Assume only one list entry that contains the time
            time_made = time_made[0]

        difference = datetime.datetime.utcnow() - max(time_modified, time_made)

        if data_doc["status"] == "transferred" or data_doc[
                "status"] == "verifying":
            return  # Transfer went fine

        self.log.debug(difference)

        if difference > datetime.timedelta(hours=2):  # If stale transfer
            self.give_error("Transfer %s from run %d (%s) lasting more than "
                            "2 hours" %
                            (data_doc['type'], self.run_doc['number'],
                             self.run_doc['name']))

        # Do not delete stalled or failed raw data transfers to recover with rsync
        # (Warning: do not use scp, which may create nested directories)
        delete_data = (data_doc['type'] == 'processed'
                       and 'v%s' % pax.__version__ == data_doc['pax_version'])

        if difference > datetime.timedelta(hours=24):
            self.give_error("Transfer lasting more than 24 hours, retry.")
            self.purge(data_doc, delete_data)

        elif data_doc["status"] == 'error' and data_doc[
                'host'] != 'xe1t-datamanager':
            self.give_error("Transfer or process errored, retry.")
            self.purge(data_doc, delete_data)
Beispiel #21
0
    def each_location(self, data_doc):
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return  # Skip places where we can't locally access data

        if 'creation_time' not in data_doc:
            self.log.warning("No creation time for %s" % str(data_doc))
            return

        # How long has transfer been ongoing
        try:
            time_modified = os.stat(data_doc['location']).st_mtime
        except FileNotFoundError:
            time_modified = 0
        time_modified = datetime.datetime.fromtimestamp(time_modified)
        time_made = data_doc['creation_time']

        # Some RunsDB entries are different format for some reason (#40)
        if isinstance(time_made, list):
            # Assume only one list entry that contains the time
            time_made = time_made[0]

        difference = datetime.datetime.utcnow() - max(time_modified,
                                                      time_made)

        if data_doc["status"] == "transferred" or data_doc["status"] == "verifying":
            return  # Transfer went fine

        self.log.debug(difference)

        if difference > datetime.timedelta(hours=2):  # If stale transfer
            self.give_error("Transfer %s from run %d (%s) lasting more than "
                            "2 hours" % (data_doc['type'],
                                          self.run_doc['number'],
                                          self.run_doc['name']))

        # Do not delete stalled or failed raw data transfers to recover with rsync 
        # (Warning: do not use scp, which may create nested directories)
        delete_data = (data_doc['type'] == 'processed' and 'v%s' % pax.__version__ == data_doc['pax_version'])

        if difference > datetime.timedelta(hours=24):
            self.give_error("Transfer lasting more than 24 hours, retry.")
            self.purge(data_doc, delete_data)
            
        elif data_doc["status"] == 'error' and data_doc['host'] != 'xe1t-datamanager':
            self.give_error("Transfer or process errored, retry.")
            self.purge(data_doc, delete_data)
Beispiel #22
0
    def each_location(self, data_doc):
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return  # Skip places where we can't locally access data

        if data_doc["status"] != "transferred":
            return

        comparison = self.get_main_checksum(**data_doc)

        if comparison is None:
            return

        if data_doc['checksum'] != comparison:
            self.give_error("Bad checksum %d, %s, %s, %s" % (self.run_doc['number'], data_doc['host'], \
                            data_doc['type'], data_doc['pax_version']))

            if self.check(data_doc['type'], warn=False) > 1:
                self.purge(data_doc)
Beispiel #23
0
    def each_run(self):
        """Set ownership and permissons for files/folders"""
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
                continue


            #extract path:
            f_path = data_doc['location']
            f_type = data_doc['type']

            #apply changes according to processed/raw and analysis facility
            if f_type == 'processed':
              logging.info('Change ownership and permission for %s', f_path)
              logging.info('Change to username %s and group %s', self.chown_user[self.hostname], self.chown_group[self.hostname])
              logging.info('Set permission: %s', self.chmod[self.hostname] )
              logging.info('Set ownership and permissions at %s', config.get_hostname() )
              if config.get_hostname() == "midway-login1":
                subprocess.call(['chmod', self.chmod[self.hostname], f_path])
                subprocess.call(['chown', str(self.chown_user[self.hostname]+":"+self.chown_group[self.hostname]), f_path])
              elif config.get_hostname() == "tegner-login-1":
                subprocess.call(['chmod', self.chmod[self.hostname], f_path])
                subprocess.call(['chown', str(self.chown_user[self.hostname]+":"+self.chown_group[self.hostname]), f_path])
                subprocess.call(['setfacl', '-R', '-M', '/cfs/klemming/projects/xenon/misc/basic_file', f_path])
              else:
                logging.info('Analysis facility does not match')
            elif f_type == 'raw':
              logging.info('Change ownership and permission for %s', f_path)
              logging.info('Change to username %s and group %s', self.chown_user[self.hostname], self.chown_group[self.hostname])
              logging.info('Set permission: %s', self.chmod[self.hostname] )
              logging.info('Set ownership and permissions at %s', config.get_hostname() )
              if config.get_hostname() == "midway-login1":
                subprocess.call(['chmod', '-R', self.chmod[self.hostname], f_path])
                subprocess.call(['chown', '-R', str(self.chown_user[self.hostname]+":"+self.chown_group[self.hostname]), f_path])
              elif config.get_hostname() == "tegner-login-1":
                subprocess.call(['chmod', self.chmod[self.hostname], f_path])
                subprocess.call(['chown', str(self.chown_user[self.hostname]+":"+self.chown_group[self.hostname]), f_path])
                subprocess.call(['setfacl', '-R', '-M', '/cfs/klemming/projects/xenon/misc/basic', f_path])
              else:
                logging.info('Analysis facility does not match')

            else:
              logging.info("Nothing to change: Ownership/Permission")
Beispiel #24
0
    def tsm_commands(self, method=None):

        host_xe1t_datamanager = """#!/bin/bash
echo "Basic Config@xe1tdatamanager"
source /home/xe1ttransfer/tsm_config/init_tsm.sh   
        """

        host_teger = """#!/bin/bash
echo "Basic Config@Tegner"
export PATH="/cfs/klemming/projects/xenon/.adm/xenon-tsm/:$PATH"
        """

        general = {"xe1t-datamanager": host_xe1t_datamanager,
                   "tegner-login-1": host_teger}

        check_for_raw_data = """
dsmc query ba {path}    
        """

        check_method = """
echo "No method is selected: Do nothing"
        """

        incr_upload = """
dsmc incr {path}/
        """

        restore_path = """
dsmc rest {path_tsm}/ {path_restore}/ -followsymbolic=yes
        """

        check_install = """
dsmc
        """

        if method == "check-for-raw-data":
            return general[config.get_hostname()] + check_for_raw_data
        elif method == None:
            return general[config.get_hostname()]
        elif method == "incr-upload-path":
            return general[config.get_hostname()] + incr_upload
        elif method == "restore-path":
            return general[config.get_hostname()] + restore_path
        elif method == "check-installation":
            return general[config.get_hostname()] + check_install
        else:
            return general[config.get_hostname()] + check_method
Beispiel #25
0
    def check(self, type='raw',
              warn=True):
        """Returns number of verified data locations

        Return the number of sites that have the same checksum as the master
        site.
        """
        n = 0

        for data_doc in self.run_doc['data']:
            # Only look at transfered data that is not untriggered
            if 'host' not in data_doc or \
                            data_doc['status'] != 'transferred' or \
                            data_doc['type'] == 'untriggered' or \
                            data_doc['type'] != type or \
                            'checksum' not in data_doc:
                continue

            # Rucio stores its own checksum, assume "transferred" is 1 good copy
            if data_doc['host'] == 'rucio-catalogue':
                n += 1

            # Grab main checksum and compare
            elif data_doc['checksum'] != self.get_main_checksum(**data_doc):

                if data_doc['host'] == config.get_hostname():
                    error = "Local checksum error " \
                            "run %d, %s %s" % (self.run_doc['number'], data_doc['type'], \
                                            data_doc['pax_version'])
                    if warn:
                        self.give_error(error)

            # Comparison did not fail so add 1 good copy
            else:
                n += 1

        return n
Beispiel #26
0
    def local_data_finder(self, data_type, option_type, remote_host):
        datum_here = None  # Information about data here
        datum_there = None  # Information about data there

        version = 'v%s' % pax.__version__

        # Iterate over data locations to know status
        for datum in self.run_doc['data']:

            # Is host known?
            if 'host' not in datum or datum['type'] != data_type:
                continue

            transferred = (datum['status'] == 'transferred')

            # If the location refers to here
            if datum['host'] == config.get_hostname():
                # If uploading, we should have data
                if option_type == 'upload' and not transferred:
                    continue

                if datum['type'] == 'processed' and not version == datum['pax_version']:
                    continue

                datum_here = datum.copy()

            elif datum['host'] == remote_host:  # This the remote host?
                # If downloading, they should have data
                if option_type == 'download' and not transferred:
                    continue

                if datum['type'] == 'processed' and not version == datum['pax_version']:
                    continue

                datum_there = datum.copy()

        return datum_here, datum_there
Beispiel #27
0
    def check(self, type='raw', warn=True):
        """Returns number of verified data locations

        Return the number of sites that have the same checksum as the master
        site.
        """
        n = 0

        for data_doc in self.run_doc['data']:
            # Only look at transfered data that is not untriggered
            if 'host' not in data_doc or \
                            data_doc['status'] != 'transferred' or \
                            data_doc['type'] == 'untriggered' or \
                            data_doc['type'] != type or \
                            'checksum' not in data_doc:
                continue

            # Rucio stores its own checksum, assume "transferred" is 1 good copy
            if data_doc['host'] == 'rucio-catalogue':
                n += 1

            # Grab main checksum and compare
            elif data_doc['checksum'] != self.get_main_checksum(**data_doc):

                if data_doc['host'] == config.get_hostname():
                    error = "Local checksum error " \
                            "run %d, %s %s" % (self.run_doc['number'], data_doc['type'], \
                                            data_doc['pax_version'])
                    if warn:
                        self.give_error(error)

            # Comparison did not fail so add 1 good copy
            else:
                n += 1

        return n
Beispiel #28
0
    def each_run(self):
        # For each data location, see if this filename in it
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or \
                            data_doc['host'] != config.get_hostname():
                continue

            if data_doc['location'] != self.location:
                continue

            # Notify run database
            if config.DATABASE_LOG is True:
                self.collection.update({'_id': self.run_doc['_id']},
                                       {'$pull': {'data': data_doc}})

            # Perform operation
            self.log.info("Removing %s" % (self.location))
            if os.path.isdir(data_doc['location']):
                shutil.rmtree(data_doc['location'])
            else:
                os.remove(self.location)

            break
Beispiel #29
0
    def each_location(self, data_doc):
        if 'host' not in data_doc or data_doc['host'] != config.get_hostname():
            return  # Skip places where we can't locally access data

        if data_doc["status"] != "transferred":
            return

        comparison = self.get_main_checksum(**data_doc)

        if comparison is None:
            return

        if data_doc['checksum'] != comparison:
            self.give_error("Bad checksum %d, %s, %s" % (self.run_doc['number'], data_doc['host'], \
                            data_doc['type']))

            if data_doc['type'] == 'processed':
                self.give_error("Bad checksum %s" % data_doc['pax_version'])
                self.purge(data_doc)
                return

            # Check for 2 or more copies with raw data
            if self.check(data_doc['type'], warn=False) > 1:
                self.purge(data_doc)
Beispiel #30
0
def get_number_in_queue(host=config.get_hostname(), partition=''):
    # print (len(get_queue(host, partition)), host, partition)
    return len(get_queue(host, partition))
Beispiel #31
0
    def each_location(self, data_doc):
        #print("each location")
        hostname = config.get_hostname()
        destination = config.get_config("tsm-server")
        self.variables()

        if data_doc['host'] == "xe1t-datamanager":
            self.checksum_xe1t = data_doc['checksum']
            logging.info("Found checksum for xe1t-datamanger: %s",
                         self.checksum_xe1t)
            return

        if destination['name'] == data_doc['host'] and data_doc['checksum'] == None and data_doc['status'] == 'transferred':
            """A dedicated function to add checksums to the database
               in case there are no checksums for tsm-server entries
               but the status says transferred
            """
            logging.info(
                "There is a database entry for %s (transferred) but no checksum", data_doc['location'])

            # Init the TSMclient class:
            self.tsm = TSMclient()

            raw_data_location = data_doc['location']
            raw_data_filename = data_doc['location'].split('/')[-1]
            raw_data_path = config.get_config(config.get_hostname())['dir_raw']
            raw_data_tsm = config.get_config(config.get_hostname())['dir_tsm']
            tmp_data_path = raw_data_tsm + "tmp_checksum_test/"
            logging.info("Raw data location @xe1t-datamanager: %s",
                         raw_data_location)
            logging.info("Path to raw data: %s", raw_data_path)
            logging.info("Path to tsm data: %s", raw_data_tsm)
            logging.info("Path to temp. data: %s", tmp_data_path)
            logging.info("File/Folder for backup: %s", raw_data_filename)

            # Sanity Check
            if self.tsm.check_client_installation() == False:
                logging.info("There is a problem with your dsmc client")
                return

            # Make sure that temp. download directory exists:
            if not os.path.exists(tmp_data_path):
                os.makedirs(tmp_data_path)

            # Download it to a temp directory
            dfolder = tmp_data_path + "/" + raw_data_filename
            if os.path.exists(dfolder):
                logging.info(
                    "Temp. directory %s already exists -> Delete it now", dfolder)
                shutil.rmtree(dfolder)

            tsm_download_result = self.tsm.download(
                raw_data_tsm + raw_data_filename, tmp_data_path, raw_data_filename)
            if os.path.exists(tmp_data_path + raw_data_filename) == False:
                logging.info("Download to %s failed.", raw_data_path)

            # Do the checksum
            checksum_after = self.tsm.get_checksum_folder(
                tmp_data_path + "/" + raw_data_filename)
            logging.info("Summary of the download for checksum comparison:")
            logging.info("Number of downloaded files: %s",
                         tsm_download_result["tno_restored_objects"])
            logging.info("Transferred amount of data: %s",
                         tsm_download_result["tno_restored_bytes"])
            logging.info("Network transfer rate: %s",
                         tsm_download_result["tno_network_transfer_rate"])
            logging.info("Download time: %s",
                         tsm_download_result["tno_data_transfer_time"])
            logging.info("Number of failed downloads: %s",
                         tsm_download_result["tno_failed_objects"])
            logging.info("MD5 Hash (database entry|TSM-SERVER): %s",
                         data_doc['checksum'])
            logging.info(
                "MD5 Hash (database entry|xe1t-datamanager): %s", self.checksum_xe1t)
            logging.info("MD5 Hash (downloaded data): %s", checksum_after)

            # Add to runDB and compare
            # if data_doc['checksum'] == None and self.checksum_xe1t == checksum_after:
            if data_doc['checksum'] == None and self.checksum_xe1t == "no_checksum_xe1tdatam":
                logging.info("No checksum for database entry TSM-server")
                logging.info("Checksums for xe1t-datamanager is verfied")

                if config.DATABASE_LOG:
                    logging.info("Notify the runDB to add checksum")
                    self.collection.update({'_id': self.run_doc['_id'],
                                            'data': {'$elemMatch': data_doc}},
                                           {'$set': {'data.$.checksum': checksum_after}})

                # Delete from temp directory
            # if data_doc['checksum'] == None and self.checksum_xe1t == checksum_after:
                logging.info(
                    "Delete temp. directory for checksum verification: %s", dfolder)
                shutil.rmtree(dfolder)
Beispiel #32
0
    def copy_tsm(self, datum, destination, method, option_type):

        # hard coded sha512 checksum which stands for an empty directory
        #(Used for verifying the goodness of the uploaded data)"
        checksum_empty_dir = "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e"

        # Init the TSM client for tape backup from an extern class
        self.tsm = TSMclient()

        logging.info('Tape Backup to PDC STOCKHOLM')
        print(datum, destination, method, option_type)

        logging.debug("Notifying run database")
        datum_new = {'type': datum['type'],
                     'host': destination,
                     'status': 'transferring',
                     'location': "n/a",
                     'checksum': None,
                     'creation_time': datetime.datetime.utcnow(),
                     }
        logging.info("new entry for rundb: %s", datum_new)

        if config.DATABASE_LOG == True:
            result = self.collection.update_one({'_id': self.run_doc['_id'],
                                                 },
                                                {'$push': {'data': datum_new}})

            if result.matched_count == 0:
                self.log.error("Race condition!  Could not copy because another "
                               "process seemed to already start.")
                return

        raw_data_location = datum['location']
        raw_data_filename = datum['location'].split('/')[-1]
        raw_data_path = raw_data_location.replace(raw_data_filename, "")
        raw_data_tsm = config.get_config(config.get_hostname())['dir_tsm']
        logging.info("Raw data location @xe1t-datamanager: %s",
                     raw_data_location)
        logging.info("Path to raw data: %s", raw_data_path)
        logging.info("Path to tsm data: %s", raw_data_tsm)
        logging.info("File/Folder for backup: %s", raw_data_filename)

        # Do a simple pretest to analyse the directory what is going to be backuped up
        # continue only if there are files in the directory and no more folders
        list_files = []
        list_folders = []
        for root, dirs, files in os.walk(raw_data_path + raw_data_filename):
            for name in files:
                list_files.append(name)
            for name in dirs:
                list_folders.append(name)

        # Sanity check if raw data folder contains a subfolder (mostly important for old raw data sets)
        if len(list_files) == 0 or len(list_folders) > 0:
            logging.info("ERROR: There are %s files in %s", len(
                list_files), raw_data_path + raw_data_filename)
            if len(list_folders) > 0:
                logging.info("ERROR: These folders are found in %s:",
                             raw_data_path + raw_data_filename)
                for i_folders in list_folders:
                    logging.info("  <> %s", i_folders)
                logging.info("Check the error(s) and start again")

                if config.DATABASE_LOG:
                    self.collection.update({'_id': self.run_doc['_id'],
                                            'data': {
                        '$elemMatch': datum_new}},
                        {'$set': {'data.$.status': "error",
                                  'data.$.location': "n/a",
                                                     'data.$.checksum': "n/a",
                                  }
                         })
            return
        else:
            logging.info(
                "Pre-test of %s counts %s files for tape upload [succcessful]", raw_data_path + raw_data_filename, len(list_files))

        # Do a checksum pre-test for double counts:
        checksum_pretest_list = []
        for i_file in files:
            f_path = os.path.join(raw_data_path, raw_data_filename, i_file)
            pre_test_checksum = ChecksumMethods.get_crc32(self, f_path)
            checksum_pretest_list.append(pre_test_checksum)

        double_counts = set(
            [x for x in checksum_pretest_list if checksum_pretest_list.count(x) > 1])

        if len(double_counts) > 0:
            logging.info("Pre checksum test: [failed]")
            logging.info("There are two or more identical checksums observed in %s", os.path.join(
                raw_data_path, raw_data_filename))
            if config.DATABASE_LOG:
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "error",
                              'data.$.location': "n/a",
                                                 'data.$.checksum': "n/a",
                              }
                     })
            return
        else:
            logging.info("Pre checksum test: [succcessful]")

        # Check first if everything is fine with the dsmc client
        if self.tsm.check_client_installation() == False:
            logging.info("There is a problem with your dsmc client")
            if config.DATABASE_LOG:
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "error",
                              'data.$.location': "n/a",
                                                 'data.$.checksum': "n/a",
                              }
                     })
            return

        #logging.debug("Notifying run database")
        # datum_new = {'type'         : datum['type'],
            #'host'         : destination,
            #'status'       : 'transferring',
            #'location'     : "n/a",
            #'checksum'     : None,
            #'creation_time': datetime.datetime.utcnow(),
            #}
        #logging.info("new entry for rundb: %s", datum_new )

        # if config.DATABASE_LOG == True:
            # result = self.collection.update_one({'_id': self.run_doc['_id'],
            #},
            #{'$push': {'data': datum_new}})

            # if result.matched_count == 0:
            # self.log.error("Race condition!  Could not copy because another "
            #"process seemed to already start.")
            # return

        logging.info("Start tape upload")

        # Prepare a copy from raw data location to tsm location ( including renaming)
        checksum_before_raw = self.tsm.get_checksum_folder(
            raw_data_path + raw_data_filename)
        file_list = []
        for (dirpath, dirnames, filenames) in os.walk(raw_data_path + raw_data_filename):
            file_list.extend(filenames)
            break

        if not os.path.exists(raw_data_tsm + raw_data_filename):
            os.makedirs(raw_data_tsm + raw_data_filename)

        for i_file in file_list:
            path_old = raw_data_path + raw_data_filename + "/" + i_file
            path_new = raw_data_tsm + raw_data_filename + \
                "/" + raw_data_filename + "_" + i_file
            if not os.path.exists(path_new):
                shutil.copy2(path_old, path_new)

        checksum_before_tsm = self.tsm.get_checksum_folder(
            raw_data_tsm + raw_data_filename)

        if checksum_before_raw != checksum_before_tsm:
            logging.info("Something went wrong during copy & rename")
            if config.DATABASE_LOG:
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "error",
                              'data.$.location': "n/a",
                                                 'data.$.checksum': "n/a",
                              }
                     })

            return
        elif checksum_before_raw == checksum_before_tsm:
            logging.info("Copy & rename: [succcessful] -> Checksums agree")

        tsm_upload_result = self.tsm.upload(raw_data_tsm + raw_data_filename)
        logging.info("Number of uploaded files: %s",
                     tsm_upload_result["tno_backedup"])
        logging.info("Number of inspected files: %s",
                     tsm_upload_result["tno_inspected"])
        logging.info("Number of failed files: %s",
                     tsm_upload_result["tno_failed"])
        logging.info("Transferred amount of data: %s",
                     tsm_upload_result["tno_bytes_transferred"])
        logging.info("Inspected amount of data: %s",
                     tsm_upload_result["tno_bytes_inspected"])
        logging.info("Upload time: %s",
                     tsm_upload_result["tno_data_transfer_time"])
        logging.info("Network transfer rate: %s",
                     tsm_upload_result["tno_network_transfer_rate"])
        logging.info("MD5 Hash (raw data): %s", checksum_before_tsm)

        test_download = os.path.join(raw_data_tsm, "tsm_verify_download")
        # Make sure that temp. download directory exists:
        if not os.path.exists(test_download):
            os.makedirs(test_download)
        logging.info("Start the re-download to %s", test_download)
        tsm_download_result = self.tsm.download(
            raw_data_tsm + raw_data_filename, test_download, raw_data_filename)
        logging.info("Finished the re-download")
        if os.path.exists(test_download + "/" + raw_data_filename) == False:
            logging.info("Download to %s failed. Checksum will not match",
                         test_download + "/" + raw_data_filename)
        else:
            logging.info("Download to %s succcessful. Folder exists",
                         test_download + "/" + raw_data_filename)

        checksum_after = self.tsm.get_checksum_folder(
            test_download + "/" + raw_data_filename)
        logging.info("Summary of the download for checksum comparison:")
        logging.info("Number of downloaded files: %s",
                     tsm_download_result["tno_restored_objects"])
        logging.info("Transferred amount of data: %s",
                     tsm_download_result["tno_restored_bytes"])
        logging.info("Network transfer rate: %s",
                     tsm_download_result["tno_network_transfer_rate"])
        logging.info("Download time: %s",
                     tsm_download_result["tno_data_transfer_time"])
        logging.info("Number of failed downloads: %s",
                     tsm_download_result["tno_failed_objects"])
        logging.info("MD5 Hash (raw data): %s", checksum_after)

        status = ""
        if checksum_before_tsm == checksum_after and checksum_empty_dir != checksum_before_tsm and checksum_empty_dir != checksum_after:
            logging.info("Upload to tape: [succcessful]")
            status = "transferred"
        else:
            logging.info("Upload to tape: [failed]")
            status = "error"

        # Print a warning if the checksum crosscheck fails!
        if checksum_empty_dir == checksum_before_tsm or checksum_empty_dir == checksum_after:
            logging.info(
                "Checksum test indicates an empty folder before or after the tape upload")
            logging.info("Check your raw data directory %s for files",
                         raw_data_tsm + raw_data_filename)

        # Delete check folder
        shutil.rmtree(raw_data_tsm + raw_data_filename)
        shutil.rmtree(test_download + "/" + raw_data_filename)
        logging.info("Finished to delete temp. directories: %s and %s",
                     raw_data_tsm + raw_data_filename, test_download + "/" + raw_data_filename)

        if config.DATABASE_LOG:
            self.collection.update({'_id': self.run_doc['_id'],
                                    'data': {
                '$elemMatch': datum_new}},
                {'$set': {'data.$.status': status,
                          'data.$.location': raw_data_tsm + raw_data_filename,
                          'data.$.checksum': checksum_after,
                          }
                 })
            logging.info("Update database")

        return 0
Beispiel #33
0
    def copy_handshake(self, datum, destination, method, option_type, data_type):
        """ Perform all the handshaking required with the run DB.
        :param datum: The dictionary data location describing data to be
                      transferred
         :type str
        :param destination:  The host name where data should go to.
         :type str
        :return:
        """

        # Get information about this destination
        destination_config = config.get_config(destination)

        self.log.info(option_type + "ing run %d to: %s" % (self.run_doc['number'],
                                                           destination))

        # Determine where data should be copied to
        if destination_config['dir_%s' % datum['type']] != None:
            base_dir = destination_config['dir_%s' % datum['type']]
            if base_dir is None:
                self.log.info("no directory specified for %s" % datum['type'])
                return

            if datum['type'] == 'processed':
                self.log.info(datum)
                base_dir = os.path.join(base_dir, 'pax_%s' %
                                        datum['pax_version'])

            # Check directory existence on local host for download only
            if option_type == 'download' and not os.path.exists(base_dir):
                if destination != config.get_hostname():
                    raise NotImplementedError("Cannot create directory on another "
                                              "machine.")

                # Recursively make directories
                os.makedirs(base_dir)
        else:
            base_dir = "none"

        # Directory or filename to be copied
        filename = datum['location'].split('/')[-1]

        self.log.debug("Notifying run database")
        datum_new = {'type': datum['type'],
                     'host': destination,
                     'status': 'transferring',
                     'location': os.path.join(base_dir,
                                              filename),
                     'checksum': None,
                     'creation_time': datetime.datetime.utcnow(),
                     }

        if datum['type'] == 'processed':
            for variable in ('pax_version', 'pax_hash', 'creation_place'):
                datum_new[variable] = datum.get(variable)

        if method == "rucio" and option_type == "upload":
            # Init the rucio module when method==rucio is requested
            self.log.info(
                "Init rucio_mover module for Rucio transfers (upload)")
            self.rucio = RucioBase(self.run_doc)
            self.rucio.set_host(config.get_hostname())
            self.rucio.set_remote_host(destination)
            # Sanity check for rucio client
            if self.rucio.sanity_checks() == False:
                logging.info("!!! <<The sanity checks fail>>  !!!")
                return 0
            # Add two further database entries for rucio related uploads
            datum_new['rse'] = []
            datum_new['location'] = "n/a"
            datum_new['rule_info'] = "no_rule"

        if method == "rucio" and option_type == "download":
            rucio_catalogue_config = config.get_config("rucio-catalogue")

            self.log.info(
                "Init rucio_mover module for Rucio transfers (download)")

            # Load and config the download module of rucio/ruciax
            self.ruciodw = RucioDownload()
            self.ruciodw.SetDatabaseEntry(self.run_doc)
            self.ruciodw.ExternalDatabaseEntry()
            self.ruciodw.SetDownloadConfig(
                rucio_catalogue_config, destination_config)
            # specify a not available path for the download destination
            datum_new['location'] = "NA"

        if config.DATABASE_LOG == True:
            result = self.collection.update_one({'_id': self.run_doc['_id'],
                                                 },
                                                {'$push': {'data': datum_new}})

            if result.matched_count == 0:
                self.log.error("Race condition!  Could not copy because another "
                               "process seemed to already start.")
                return

        self.log.info('Starting ' + method)

        try:  # try to copy
            self.copy(datum,
                      datum_new,
                      method,
                      option_type, data_type)
            # Checksumming to follow on local site
            if method == 'scp' or method == 'rsync':
                status = 'verifying'

            # Cannot do cax-checksum on GRID sites,
            # so assume gfal-copy/lcg-cp checksum is sufficient
            else:
                status = 'verifying'
                # TO DO: Manually copy checksum to DB entry here

        except scp.SCPException as e:
            self.log.exception(e)
            status = 'error'

        # WARNING: This needs to be extended to catch gfal-copy errors
        except:
            self.log.exception("Unexpected copy error")
            status = 'error'

        self.log.debug(method + " done, telling run database")

        if config.DATABASE_LOG:
            if method == "rucio" and option_type == "upload":
                logging.info("Following entries are added to the runDB:")
                logging.info("  * Status: %s",
                             self.rucio.get_rucio_info()['status'])
                logging.info("  * Location: %s",
                             self.rucio.get_rucio_info()['location'])
                logging.info("  * Checksum: %s",
                             self.rucio.get_rucio_info()['checksum'])
                logging.info("  * RSE: %s", self.rucio.get_rucio_info()['rse'])
                logging.info("  * Preliminary rule information: %s",
                             self.rucio.get_rucio_info()['rule_info'])

                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                                        '$elemMatch': datum_new}},
                                       {'$set': {
                                           'data.$.status': self.rucio.get_rucio_info()['status'],
                                           'data.$.location': self.rucio.get_rucio_info()['location'],
                                           'data.$.checksum': self.rucio.get_rucio_info()['checksum'],
                                           'data.$.rse': self.rucio.get_rucio_info()['rse'],
                                           'data.$.rule_info': self.rucio.get_rucio_info()['rule_info']
                                       }
                })

            elif method == "rucio" and option_type == "download":
                logging.info("Following entries are added to the runDB:")
                logging.info("  * Status: %s",
                             self.ruciodw.get_rucio_info()['status'])
                logging.info("  * Location: %s",
                             self.ruciodw.get_rucio_info()['location'])

                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                                        '$elemMatch': datum_new}},
                                       {'$set': {
                                           'data.$.status': self.ruciodw.get_rucio_info()['status'],
                                           'data.$.location': self.ruciodw.get_rucio_info()['location']
                                       }
                })

            else:
                # Fill the data if method is not rucio
                if config.DATABASE_LOG:
                    self.collection.update({'_id': self.run_doc['_id'],
                                            'data': {
                        '$elemMatch': datum_new}},
                        {'$set': {
                            'data.$.status': status
                        }
                    })

        if method == "rucio" and option_type == "upload":
            # Rucio 'side load' to set the transfer rules directly after the file upload
            if self.rucio.get_rucio_info()['status'] == "transferred":
                logging.info(
                    "Initiate the RucioRule for a first set of transfer rules")
                # Add: Outcome of the rucio transfers to the new database entry
                #     without read the runDB again.
                datum_new['status'] = self.rucio.get_rucio_info()['status']
                datum_new['location'] = self.rucio.get_rucio_info()['location']
                datum_new['checksum'] = self.rucio.get_rucio_info()['checksum']
                datum_new['rse'] = self.rucio.get_rucio_info()['rse']
                datum_new['rule_info'] = self.rucio.get_rucio_info()[
                    'rule_info']

                # Init the RucioRule module and set its runDB entry manually
                self.rucio_rule = RucioRule()
                self.rucio_rule.set_db_entry_manually(self.run_doc)
                # Perform the initial rule setting:
                self.rucio_rule.set_possible_rules(
                    data_type=datum['type'], dbinfo=datum_new)
                logging.info("Status: transferred -> Transfer rules are set for %s",
                             self.rucio.get_rucio_info()['rse'])

                # Commented out due to upload section (rucio_mover) option 3!
                # No need to delete single file rules manually after upload
                # let it sleep for 5 seconds:
                # logging.info("Sleep")
                # time.sleep(15)
                #logging.info("Sleep time finished ")
                #self.log.info("Delete individual rules of the uploaded files:")
                # for i_file in self.rucio.get_rucio_info()['file_list']:
                # i_location = "{iscope}:{ifile}".format( iscope=self.rucio.get_rucio_info()['scope_upload'],
                # ifile=i_file.split("/")[-1] )

                #self.log.info("Time out for %s", i_location)
                #self.rucio_rule.update_rule( i_location, self.rucio.get_rucio_info()['rse'][0], "10" )
            else:
                logging.info(
                    "Something went wrong during the upload (error). No rules are set")

        elif method == "rucio" and option_type == "download":
            logging.info("<-- Finished download %s to location %s with status %s -->",
                         datum['location'], self.ruciodw.get_rucio_info()['location'], self.ruciodw.get_rucio_info()['status'])

        logging.debug(method + " done, telling run database")

        logging.info("End of " + option_type + "\n")
Beispiel #34
0
 def each_location(self, data_doc):
     if data_doc['host'] == config.get_hostname():
         self.locations.append(data_doc['location'])
Beispiel #35
0
    def copy_tsm_download(self, datum, destination, method, option_type):
        """A dedicated download function for downloads from tape storage"""
        self.tsm = TSMclient()

        logging.info('Tape Backup to PDC STOCKHOLM (Download)')

        raw_data_location = datum['location']
        raw_data_filename = datum['location'].split('/')[-1]
        raw_data_path = config.get_config(config.get_hostname())['dir_raw']
        raw_data_tsm = config.get_config(config.get_hostname())['dir_tsm']
        logging.info("Raw data location @xe1t-datamanager: %s",
                     raw_data_location)
        logging.info("Path to raw data: %s", raw_data_path)
        logging.info("Path to tsm data: %s", raw_data_tsm)
        logging.info("File/Folder for backup: %s", raw_data_filename)

        self.log.debug("Notifying run database")
        datum_new = {'type': datum['type'],
                     'host': destination,
                     'status': 'transferring',
                     'location': "n/a",
                     'checksum': None,
                     'creation_time': datetime.datetime.utcnow(),
                     }
        logging.info("new entry for rundb: %s", datum_new)

        if config.DATABASE_LOG == True:
            result = self.collection.update_one({'_id': self.run_doc['_id'],
                                                 },
                                                {'$push': {'data': datum_new}})

            if result.matched_count == 0:
                self.log.error("Race condition!  Could not copy because another "
                               "process seemed to already start.")
                return

        logging.info("Start tape download")

        # Sanity Check
        if self.tsm.check_client_installation() == False:
            logging.info("There is a problem with your dsmc client")
            return

        # Do download:
        tsm_download_result = self.tsm.download(
            raw_data_location, raw_data_path, raw_data_filename)
        if os.path.exists(raw_data_path + raw_data_filename) == False:
            logging.info("Download to %s failed.", raw_data_path)
            if config.DATABASE_LOG:
                # Notify the database if something went wrong during the download:
                logging.info("Notifiy the runDB: error")
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "error",
                              'data.$.location': "n/a",
                                                 'data.$.checksum': "n/a",
                              }
                     })

        # Rename
        file_list = []
        for (dirpath, dirnames, filenames) in os.walk(raw_data_path + raw_data_filename):
            file_list.extend(filenames)
            break

        for i_file in file_list:
            path_old = raw_data_path + raw_data_filename + "/" + i_file
            path_new = raw_data_path + raw_data_filename + "/" + i_file[12:]
            if not os.path.exists(path_new):
                os.rename(path_old, path_new)

        # Do checksum and summarize it:
        checksum_after = self.tsm.get_checksum_folder(
            raw_data_path + "/" + raw_data_filename)
        logging.info("Summary of the download for checksum comparison:")
        logging.info("Number of downloaded files: %s",
                     tsm_download_result["tno_restored_objects"])
        logging.info("Transferred amount of data: %s",
                     tsm_download_result["tno_restored_bytes"])
        logging.info("Network transfer rate: %s",
                     tsm_download_result["tno_network_transfer_rate"])
        logging.info("Download time: %s",
                     tsm_download_result["tno_data_transfer_time"])
        logging.info("Number of failed downloads: %s",
                     tsm_download_result["tno_failed_objects"])
        logging.info("MD5 Hash (database entry): %s", datum['checksum'])
        logging.info("MD5 Hash (downloaded data): %s", checksum_after)

        if checksum_after == datum['checksum']:
            logging.info(
                "The download/restore of the raw data set %s was [SUCCESSFUL]", raw_data_filename)
            logging.info("Raw data set located at: %s",
                         raw_data_path + raw_data_filename)
        elif checksum_after != datum['checksum']:
            logging.info(
                "The download/restore of the raw data set %s [FAILED]", raw_data_filename)
            logging.info("Checksums do not agree!")

        # Notifiy the database for final registration
        if checksum_after == datum['checksum']:

            if config.DATABASE_LOG:
                # Notify the database if everything was fine:
                logging.info("Notifiy the runDB: transferred")
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "transferred",
                              'data.$.location': raw_data_path + raw_data_filename,
                              'data.$.checksum': checksum_after,
                              }
                     })
            else:
                logging.info("Database is not notified")

        elif checksum_after != datum['checksum']:
            if config.DATABASE_LOG:
                # Notify the database if something went wrong during the download:
                logging.info("Notifiy the runDB: error")
                self.collection.update({'_id': self.run_doc['_id'],
                                        'data': {
                    '$elemMatch': datum_new}},
                    {'$set': {'data.$.status': "error",
                              'data.$.location': "n/a",
                                                 'data.$.checksum': "n/a",
                              }
                     })
            else:
                logging.info("Database is not notified")

        return 0
Beispiel #36
0
 def each_location(self, data_doc):
     #print("each location")
     hostname = config.get_hostname()
     destination = config.get_config("tsm-server")
Beispiel #37
0
    def do_possible_transfers(self,
                              option_type='upload',
                              data_type='raw'):
        """Determine candidate transfers.
        :param option_type: 'upload' or 'download'
         :type str
        :param data_type: 'raw' or 'processed'
         :type str
        :return:
        """

        # Get the 'upload' or 'download' options.
        options = config.get_transfer_options(option_type)

        # If no options, can't do anything
        if options is None:
            return None, None

        # If should be purged, don't pull
        PurgeObj = BufferPurger()
        PurgeObj.run_doc = self.run_doc
        if option_type == 'download' and data_type == 'raw' and PurgeObj.check_purge_requirements():
            self.log.info("Skip raw download that would be purged")
            return None, None

        start = time.time()

        # For this run, where do we have transfer access?
        datum_there = None
        datum_here = None
        for remote_host in options:
            self.log.debug(remote_host)

            # Get transfer protocol
            method = config.get_config(remote_host)['method']

            if not method:
                print("Must specify transfer protocol (method) for " + remote_host)
                raise

            datum_here, datum_there = self.local_data_finder(data_type,
                                                             option_type,
                                                             remote_host)

            # Delete the old data base entry if rucio transfers are requested
            # and an old upload failed by a bad connection error.
            if method == "rucio" and datum_there != None and datum_there['status'] == 'RSEreupload' and config.DATABASE_LOG == True:
                self.log.info(
                    "Former upload of %s failed with error", datum_here['location'])
                self.log.info(
                    "[('Connection aborted.', BadStatusLine('',))] -> Delete runDB status and start again")

                self.collection.update({'_id': self.run_doc['_id']},
                                       {'$pull': {'data': datum_there}})

            # Upload logic for everything exepct tape
            if option_type == 'upload' and method != "tsm" and datum_here and (datum_there is None or datum_there['status'] == 'RSEreupload'):
                self.copy_handshake(datum_here, remote_host,
                                    method, option_type, data_type)
                break

            # Download logic for everything exepct tape
            if option_type == 'download' and datum_there and datum_here is None and method != "tsm":
                self.copy_handshake(
                    datum_there, config.get_hostname(), method, option_type, data_type)
                break

            # Upload tsm:
            if option_type == 'upload' and datum_here and datum_there is None and method == "tsm":
                self.copy_tsm(datum_here, config.get_config(
                    remote_host)['name'], method, option_type)
                break

            # Download tsm:
            if option_type == 'download' and datum_there and datum_here is None and method == "tsm":
                self.copy_tsm_download(
                    datum_there, config.get_hostname(), method, option_type)
                break

        dataset = None
        if datum_there is not None:
            dataset = datum_there['location'].split('/').pop()
        elif datum_here is not None:
            dataset = datum_here['location'].split('/').pop()

        if dataset is not None:  # Not sure why it does this sometimes
            end = time.time()
            elapsed = end - start
            self.log.info(method + " " + option_type +
                          " dataset " + dataset + " took %d seconds" % elapsed)
Beispiel #38
0
 def each_location(self, data_doc):
     if data_doc['host'] == config.get_hostname():
         self.locations.append(data_doc['location'])
Beispiel #39
0
    def copyGFAL(self, datum_original, datum_destination, server, option_type, nstreams, grid_cert):
        """Copy data via GFAL function
        WARNING: Only SRM<->Local implemented (not yet SRM<->SRM)
        """
        dataset = datum_original['location'].split('/').pop()

        # gfal-copy arguments:
        #   -f: overwrite
        #   -r: recursive
        #   -n: number of streams (4 for now, but doesn't work on xe1t-datamanager so use lcg-cp instead)
        #   -t: timeout in seconds
        #   -K: specify checksum algorithm
        # --cert: path to initialized GRID certificate (voms-proxy-init  -voms xenon.biggrid.nl -valid 168:00 -out user_cert)
        command = "time gfal-copy -v -f -r -p -t 32400 -K adler32 --cert %s -n %d " % (
            grid_cert, nstreams)

        status = -1

        if option_type == 'upload':
            logging.info(option_type + ": %s to %s" % (datum_original['location'],
                                                       server + datum_destination['location']))

            # Simultaneous LFC registration
            #lfc_config = config.get_config("lfc")

            # Warning: Processed data dir not implemented for LFC here
            #lfc_address = lfc_config['hostname']+lfc_config['dir_'+datum_original['type']]

            # Use GSIFTP address instead of POSIX from Stash (to avoid login node)
            if config.get_hostname() == 'login':
                config_original = config.get_config(datum_original['host'])
                server_original = config_original['hostname']
                full_command = command + \
                    server_original + datum_original['location'] + " " + \
                    server + datum_destination['location']  # +" "+ \
                # lfc_address+"/"+dataset

            # Use SRM address instead of POSIX from Midway (to avoid worker nodes)
            # elif config.get_hostname() == 'midway-login1':
            #    server_original = 'srm://srm1.rcc.uchicago.edu:8443/srm/v2/server?SFN='
            #    full_command = command+ \
            #               server_original+datum_original['location']+" "+ \
            #               server+datum_destination['location'] #+" "+ \
                # lfc_address+"/"+dataset

            else:
                full_command = command + \
                    "file://" + datum_original['location'] + " " + \
                    server + datum_destination['location']  # +" "+ \
                # lfc_address+"/"+dataset

        else:  # download
            logging.info(option_type + ": %s to %s" % (server + datum_original['location'],
                                                       datum_destination['location']))

            full_command = command + \
                server + datum_original['location'] + " " + \
                "file://" + datum_destination['location']

        self.log.info(full_command)

        try:
            gfal_out = subprocess.check_output(
                full_command, stderr=subprocess.STDOUT, shell=True)

        except subprocess.CalledProcessError as gfal_exec:
            self.log.error(gfal_exec.output.rstrip().decode('ascii'))
            self.log.error("Error: gfal-copy status = %d\n" %
                           gfal_exec.returncode)
            raise

        gfal_out_ascii = gfal_out.rstrip().decode('ascii')
        if "error" in gfal_out_ascii.lower():  # Some errors don't get caught above
            self.log.error(gfal_out_ascii)
            raise

        else:
            self.log.info(gfal_out_ascii)  # To print timing
Beispiel #40
0
    def each_run(self):
        if self.has_tag('donotprocess'):
            self.log.debug("Do not process tag found, skip processing")
            return

        if 'processor' not in self.run_doc or \
                'DEFAULT' not in self.run_doc['processor']:
            self.log.debug("processor or DEFAUT tag not in run_doc, skip processing")
            return

        processing_parameters = self.run_doc['processor']['DEFAULT']
        if 'gains' not in processing_parameters or \
            'drift_velocity_liquid' not in processing_parameters or \
            'electron_lifetime_liquid' not in processing_parameters:
            self.log.info("gains or e-lifetime not in run_doc, skip processing")
            return

        thishost = config.get_hostname()

        versions = ['v%s' % pax.__version__]

        have_processed, have_raw = self.local_data_finder(thishost,
                                                          versions)

        # Skip if no raw data
        if not have_raw:
            self.log.debug("Skipping %s with no raw data",
                           self.run_doc['name'])
            return

        if self.run_doc['reader']['ini']['write_mode'] != 2:
            self.log.debug("write_mode != 2, skip processing")
            return

        # Get number of events in data set (not set for early runs <1000)
        events = self.run_doc.get('trigger', {}).get('events_built', 0)

        # Skip if 0 events in dataset
        if events == 0:
            self.log.debug("Skipping %s with 0 events", self.run_doc['name'])
            return

        # Specify number of cores for pax multiprocess
        if events < 1000:
            # Reduce to 1 CPU for small number of events (sometimes pax stalls
            # with too many CPU)
            ncpus = 1
        else:
            ncpus = config.NCPU - 1 # 4 based on Figure 2 here https://xecluster.lngs.infn.it/dokuwiki/doku.php?id=xenon:xenon1t:shockley:performance#automatic_processing
                                    # -1 for pax I/O worker

        # Process all specified versions
        for version in versions:
            pax_hash = "n/a"

            out_location = config.get_processing_dir(thishost,
                                                     version)

            if have_processed[version]:
                self.log.debug("Skipping %s already processed with %s",
                               self.run_doc['name'],
                               version)
                continue

            queue_list = qsub.get_queue(thishost)
            # Should check version here too
            if self.run_doc['name'] in queue_list:
                self.log.debug("Skipping %s currently in queue",
                               self.run_doc['name'])
                continue

            self.log.info("Processing %s with pax_%s (%s) and %d cores, output to %s",
                          self.run_doc['name'], version, pax_hash, ncpus,
                          out_location)


            _process(self.run_doc['name'], have_raw['location'], thishost,
                     version, pax_hash, out_location,
                     self.run_doc['detector'],
                     ncpus)
Beispiel #41
0
    def each_run(self):
        if self.has_tag('donotprocess'):
            self.log.debug("Do not process tag found, skip processing")
            return

        if 'processor' not in self.run_doc or \
                'DEFAULT' not in self.run_doc['processor']:
            self.log.debug(
                "processor or DEFAUT tag not in run_doc, skip processing")
            return

        processing_parameters = self.run_doc['processor']['DEFAULT']
        if 'gains' not in processing_parameters or \
            'drift_velocity_liquid' not in processing_parameters or \
            'electron_lifetime_liquid' not in processing_parameters:
            self.log.info(
                "gains or e-lifetime not in run_doc, skip processing")
            return

        thishost = config.get_hostname()

        if thishost != 'midway-login1':
            return

        versions = ['v%s' % pax.__version__]

        have_processed, have_raw = self.local_data_finder(thishost, versions)

        # Skip if no raw data
        if not have_raw:
            self.log.debug("Skipping %s with no raw data",
                           self.run_doc['name'])
            return

        if self.run_doc['reader']['ini']['write_mode'] != 2:
            self.log.debug("write_mode != 2, skip processing")
            return

        # Get number of events in data set (not set for early runs <1000)
        events = self.run_doc.get('trigger', {}).get('events_built', 0)

        # Skip if 0 events in dataset
        if events == 0:
            self.log.debug("Skipping %s with 0 events", self.run_doc['name'])
            return

        # Specify number of cores for pax multiprocess
        if events < 1000:
            # Reduce to 1 CPU for small number of events (sometimes pax stalls
            # with too many CPU)
            ncpus = 1
        else:
            ncpus = config.NCPU - 1  # 4 based on Figure 2 here https://xecluster.lngs.infn.it/dokuwiki/doku.php?id=xenon:xenon1t:shockley:performance#automatic_processing
            # -1 for pax I/O worker

        # Process all specified versions
        for version in versions:
            pax_hash = "n/a"

            out_location = config.get_processing_dir(thishost, version)

            if have_processed[version]:
                self.log.debug("Skipping %s already processed with %s",
                               self.run_doc['name'], version)
                continue

            queue_list = qsub.get_queue(thishost)
            # Should check version here too
            if self.run_doc['name'] in queue_list:
                self.log.debug("Skipping %s currently in queue",
                               self.run_doc['name'])
                continue

            self.log.info(
                "Processing %s with pax_%s (%s) and %d cores, output to %s",
                self.run_doc['name'], version, pax_hash, ncpus, out_location)

            _process(self.run_doc['name'], have_raw['location'], thishost,
                     version, pax_hash, out_location, self.run_doc['detector'],
                     ncpus)
Beispiel #42
0
    def each_run(self):
        """Set ownership and permissons for files/folders"""
        for data_doc in self.run_doc['data']:
            # Is not local, skip
            if 'host' not in data_doc or data_doc[
                    'host'] != config.get_hostname():
                continue

            #extract path:
            f_path = data_doc['location']
            f_type = data_doc['type']

            #apply changes according to processed/raw and analysis facility
            if f_type == 'processed':
                logging.info('Change ownership and permission for %s', f_path)
                logging.info('Change to username %s and group %s',
                             self.chown_user[self.hostname],
                             self.chown_group[self.hostname])
                logging.info('Set permission: %s', self.chmod[self.hostname])
                logging.info('Set ownership and permissions at %s',
                             config.get_hostname())
                if config.get_hostname() == "midway-login1":
                    subprocess.call(
                        ['chmod', self.chmod[self.hostname], f_path])
                    subprocess.call([
                        'chown',
                        str(self.chown_user[self.hostname] + ":" +
                            self.chown_group[self.hostname]), f_path
                    ])
                elif config.get_hostname() == "tegner-login-1":
                    subprocess.call(
                        ['chmod', self.chmod[self.hostname], f_path])
                    subprocess.call([
                        'chown',
                        str(self.chown_user[self.hostname] + ":" +
                            self.chown_group[self.hostname]), f_path
                    ])
                    subprocess.call([
                        'setfacl', '-R', '-M',
                        '/cfs/klemming/projects/xenon/misc/basic_file', f_path
                    ])
                else:
                    logging.info('Analysis facility does not match')
            elif f_type == 'raw':
                logging.info('Change ownership and permission for %s', f_path)
                logging.info('Change to username %s and group %s',
                             self.chown_user[self.hostname],
                             self.chown_group[self.hostname])
                logging.info('Set permission: %s', self.chmod[self.hostname])
                logging.info('Set ownership and permissions at %s',
                             config.get_hostname())
                if config.get_hostname() == "midway-login1":
                    subprocess.call(
                        ['chmod', '-R', self.chmod[self.hostname], f_path])
                    subprocess.call([
                        'chown', '-R',
                        str(self.chown_user[self.hostname] + ":" +
                            self.chown_group[self.hostname]), f_path
                    ])
                elif config.get_hostname() == "tegner-login-1":
                    subprocess.call(
                        ['chmod', self.chmod[self.hostname], f_path])
                    subprocess.call([
                        'chown',
                        str(self.chown_user[self.hostname] + ":" +
                            self.chown_group[self.hostname]), f_path
                    ])
                    subprocess.call([
                        'setfacl', '-R', '-M',
                        '/cfs/klemming/projects/xenon/misc/basic', f_path
                    ])
                else:
                    logging.info('Analysis facility does not match')

            else:
                logging.info("Nothing to change: Ownership/Permission")
Beispiel #43
0
Datei: qsub.py Projekt: rynge/cax
def get_number_in_queue(host=config.get_hostname(), partition=''):
    # print (len(get_queue(host, partition)), host, partition)
    return len(get_queue(host, partition))
Beispiel #44
0
    def each_location(self, data_doc):
        # Only data waiting to be verified
        if data_doc[
                'status'] != 'verifying':  # and data_doc['status'] != 'transferred':
            self.log.debug('Location ' + data_doc['host'] +
                           ' does not need to add new checksum')
            return

        if data_doc['status'] == 'transferred' and \
           (config.get_hostname() == 'xe1t-datamanager' or config.get_hostname() == 'login'):
            return

        # Data must be hosted somewhere
        if 'host' not in data_doc:
            return

        # Data must be here locally
        if data_doc['host'] != config.get_hostname():

            # Special case of midway-srm accessible via POSIX on midway-login1
            if not (data_doc['host'] == "midway-srm"
                    and config.get_hostname() == "midway-login1"):
                self.log.debug('Location not here')
                return

        # This status is given after checksumming
        status = 'transferred'

        # Find file and perform checksum
        if os.path.isdir(data_doc['location']):
            value = checksumdir.dirhash(data_doc['location'], 'sha512')
        elif os.path.isfile(data_doc['location']):
            value = checksumdir._filehash(data_doc['location'], hashlib.sha512)
        else:
            # Data not actually found
            self.log.error("Location %s not found." % data_doc['location'])
            value = None
            status = 'error'

        if config.DATABASE_LOG:
            if data_doc['status'] == 'verifying':
                self.log.info("Adding a checksum to run "
                              "%d %s" %
                              (self.run_doc['number'], data_doc['type']))
                self.collection.update(
                    {
                        '_id': self.run_doc['_id'],
                        'data': {
                            '$elemMatch': data_doc
                        }
                    }, {
                        '$set': {
                            'data.$.status': status,
                            'data.$.checksum': value
                        }
                    })
            elif data_doc['checksum'] != value or status == 'error':
                self.log.info("Checksum fail "
                              "%d %s" %
                              (self.run_doc['number'], data_doc['type']))
                self.collection.update(
                    {
                        '_id': self.run_doc['_id'],
                        'data': {
                            '$elemMatch': data_doc
                        }
                    }, {'$set': {
                        'data.$.checksumproblem': True
                    }})