def update_dmfilestat_diskusage(resultpk): ''' Task to update DMFileStat.diskspace for all associated with this resultpk This task is launched at the end of pipeline execution. NOTE: This can be a long-lived task ''' logid = {'logid':"%s" % ('tasks')} try: result = Results.objects.get(pk=resultpk) search_dirs = [result.get_report_dir(), result.experiment.expDir] cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=result.get_report_dir(), save_list=True) for dmtype in FILESET_TYPES: dmfilestat = result.get_filestat(dmtype) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except SoftTimeLimitExceeded: logger.warn("Time exceeded update_diskusage for (%d) %s" % (resultpk,result.resultsName), extra = logid) except: raise try: disk_total = 0 for dmfilestat in [ result.get_filestat(dmtype) for dmtype in FILESET_TYPES]: if dmfilestat.dmfileset.type == dmactions_types.SIG: dmfilestat.result.experiment.diskusage = dmfilestat.diskspace if dmfilestat.diskspace != None else 0 dmfilestat.result.experiment.save() else: partial = dmfilestat.diskspace disk_total += int(partial) if partial != None else 0 result.diskusage = disk_total result.save() # See dmaction._update_related_objects() which also updates Exp & Results diskusage fields except: logger.error(traceback.format_exc(), extra = logid) raise
def update_dmfilestats_diskspace(dmfilestat): ''' Task to update DMFileStat.diskspace ''' search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir] try: cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir()) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except: logger.exception(traceback.format_exc(), extra = logid)
def update_dmfilestats_diskspace(dmfilestat): ''' Task to update DMFileStat.diskspace ''' search_dirs = [ dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir ] try: cached_file_list = dm_utils.get_walk_filelist( search_dirs, list_dir=dmfilestat.result.get_report_dir()) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except: logger.exception(traceback.format_exc(), extra=logid)
def backfill_dmfilestats_diskspace(): ''' Backfill records with DMFileStat.diskspace = None, one at a time These could be older data sets or new ones where update_diskusage task failed ''' dmfilestats = DMFileStat.objects.filter(diskspace=None, action_state='L', files_in_use='').order_by('-created') if dmfilestats.count() > 0: dmfilestat = dmfilestats[0] search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir] try: cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir(), save_list=True) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except: logger.error(traceback.format_exc(), extra = logid) raise
def main(cl_args): '''Main function''' for dmtype in FILESET_TYPES: sys.stdout.write("%s\n" % dmtype) stats = models.DMFileStat.objects.filter(dmfileset__type=dmtype) \ .filter(action_state__in=['L', 'S', 'N', 'A']) \ .filter(diskspace=None) \ .order_by('created') num_datasets = 0 for index, item in enumerate([stat for stat in stats if stat.diskspace is None]): if index == 0: sys.stdout.write("Missing disk space:\n") num_datasets = index if cl_args.repair: sys.stdout.write("\t%s..." % (item.result.resultsName)) try: if DEBUG: sys.stdout.write("\nWe would run update_diskspace() here, if not in debug mode") else: sys.stdout.write("%d\n" % update_diskspace(item)) except: print traceback.format_exc() else: sys.stdout.write("\t%s\n" % item.result.resultsName) if num_datasets == 0: sys.stdout.write("...No datasets found\n") sys.exit(0)
def main(cl_args): """Main function""" for dmtype in FILESET_TYPES: sys.stdout.write("%s\n" % dmtype) stats = (models.DMFileStat.objects.filter( dmfileset__type=dmtype).filter( action_state__in=["L", "S", "N", "A"]).filter( diskspace=None).order_by("created")) num_datasets = 0 for index, item in enumerate( [stat for stat in stats if stat.diskspace is None]): if index == 0: sys.stdout.write("Missing disk space:\n") num_datasets = index if cl_args.repair: sys.stdout.write("\t%s..." % (item.result.resultsName)) try: if DEBUG: sys.stdout.write( "\nWe would run update_diskspace() here, if not in debug mode" ) else: sys.stdout.write("%d\n" % update_diskspace(item)) except Exception: print(traceback.format_exc()) else: sys.stdout.write("\t%s\n" % item.result.resultsName) if num_datasets == 0: sys.stdout.write("...No datasets found\n") sys.exit(0)
def update_dmfilestat_diskusage(resultpk): """ Task to update DMFileStat.diskspace for all associated with this resultpk This task is launched at the end of pipeline execution. NOTE: This can be a long-lived task """ logid = {"logid": "%s" % ("tasks")} try: result = Results.objects.get(pk=resultpk) search_dirs = [result.get_report_dir(), result.experiment.expDir] cached_file_list = dm_utils.get_walk_filelist( search_dirs, list_dir=result.get_report_dir(), save_list=True) for dmtype in FILESET_TYPES: dmfilestat = result.get_filestat(dmtype) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except SoftTimeLimitExceeded: logger.warn( "Time exceeded update_diskusage for (%d) %s" % (resultpk, result.resultsName), extra=logid, ) except: raise try: disk_total = 0 for dmfilestat in [ result.get_filestat(dmtype) for dmtype in FILESET_TYPES ]: if dmfilestat.dmfileset.type == dmactions_types.SIG: dmfilestat.result.experiment.diskusage = ( dmfilestat.diskspace if dmfilestat.diskspace != None else 0) dmfilestat.result.experiment.save() else: partial = dmfilestat.diskspace disk_total += int(partial) if partial != None else 0 result.diskusage = disk_total result.save() # See dmaction._update_diskspace_and_diskusage() which also updates Exp & Results diskusage fields except Exception: logger.error(traceback.format_exc(), extra=logid) raise
def backfill_dmfilestats_diskspace(): ''' Backfill records with DMFileStat.diskspace = None, one at a time These could be older data sets or new ones where update_diskusage task failed ''' dmfilestats = DMFileStat.objects.filter( diskspace=None, action_state='L', files_in_use='').order_by('-created') if dmfilestats.count() > 0: dmfilestat = dmfilestats[0] search_dirs = [ dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir ] try: cached_file_list = dm_utils.get_walk_filelist( search_dirs, list_dir=dmfilestat.result.get_report_dir(), save_list=True) dmfilestat_utils.update_diskspace(dmfilestat, cached=cached_file_list) except: logger.error(traceback.format_exc(), extra=logid) raise
def _action_complete_update(user, user_comment, dmfilestat, action): logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid) if action == ARCHIVE: action_state = 'AD' msg = "%0.1f MB %s archived to %s." % (dmfilestat.diskspace, dmfilestat.dmfileset.type, dmfilestat.archivepath) elif action == DELETE: action_state = 'DD' msg = "%0.1f MB %s deleted." % (dmfilestat.diskspace, dmfilestat.dmfileset.type) elif action == EXPORT: action_state = 'L' msg = "%0.1f MB %s exported to %s." % (dmfilestat.diskspace, dmfilestat.dmfileset.type, dmfilestat.archivepath) dmfilestat.archivepath = None dmfilestat.save() elif action == TEST: return # Update disk usage values in dmfilestat objects after a delete or archive action. Should be zero or close. # update diskusage on Experiment or Results object if data was deleted/moved if action != EXPORT: diskspace = update_diskspace(dmfilestat) if dmfilestat.dmfileset.type == dmactions_types.SIG: dmfilestat.result.experiment.diskusage = diskspace if diskspace != None else 0 dmfilestat.result.experiment.save() else: result = dmfilestat.result disk_total = 0 mylist = [ result.get_filestat(dmactions_types.BASE).diskspace, result.get_filestat(dmactions_types.OUT).diskspace, result.get_filestat(dmactions_types.INTR).diskspace ] for partial in mylist: disk_total += int(partial) if partial != None else 0 result.diskusage = disk_total result.save() # See data/tasks.update_dmfilestat_diskusage() which also updates Exp & Results diskusage fields _update_related_objects(user, user_comment, dmfilestat, action, msg, action_state)
def _update_diskspace_and_diskusage(dmfilestat): logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) diskspace = update_diskspace(dmfilestat) # update diskusage on Experiment or Results object if data was deleted/moved # See data/tasks.update_dmfilestat_diskusage() which also updates Exp & Results diskusage fields if dmfilestat.dmfileset.type == dmactions_types.SIG: dmfilestat.result.experiment.diskusage = diskspace if diskspace != None else 0 dmfilestat.result.experiment.save() else: result = dmfilestat.result disk_total = 0 mylist = [ result.get_filestat(dmactions_types.BASE).diskspace, result.get_filestat(dmactions_types.OUT).diskspace, result.get_filestat(dmactions_types.INTR).diskspace ] for partial in mylist: disk_total += int(partial) if partial != None else 0 result.diskusage = disk_total result.save()
def destination_validation(dmfilestat, backup_directory=None, manual_action=False): ''' Tests to validate destination directory: Does destination directory exist. Is there sufficient disk space. Write permissions. ''' def _skipdiskspacecheck(directory): ''' The hidden file .no_size_check should be placed into the root directory of the scratch drive mounted on the local system for the tape drive system. ''' if os.path.exists(os.path.join(directory, ".no_size_check")): logger.info("%s: Exists: %s" % (sys._getframe().f_code.co_name, os.path.join(directory, ".no_size_check")), extra=logid) return True else: logger.info("%s: Not Found: %s" % (sys._getframe().f_code.co_name, os.path.join(directory, ".no_size_check")), extra=logid) return False logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) if backup_directory in [None, 'None', '']: backup_directory = dmfilestat.dmfileset.backup_directory # check for valid destination try: if backup_directory in [None, 'None', '', '/']: raise DMExceptions.MediaNotSet( "Backup media for %s is not configured. Please use Data Management Configuration page." % dmfilestat.dmfileset.type) if not os.path.isdir(backup_directory): raise DMExceptions.MediaNotAvailable( "Backup media for %s is not a directory: %s" % (dmfilestat.dmfileset.type, backup_directory)) # check if destination is external filesystem # ie, catch the error of writing to a mountpoint which is unmounted. # Use the tool 'mountpoint' which returns 0 if its mountpoint and mounted # If its a subdirectory to a mountpoint, then the isdir() test will fail when not mounted. if not is_mounted(backup_directory): raise DMExceptions.MediaNotAvailable( "Backup media for %s is not mounted: %s" % (dmfilestat.dmfileset.type, backup_directory)) except Exception as e: logger.error("%s" % e, extra=logid) raise # check for sufficient disk space (units: kilobytes) if _skipdiskspacecheck(backup_directory): logger.warn("%s - skipping destination disk space check" % (sys._getframe().f_code.co_name), extra=logid) pass else: if dmfilestat.diskspace is None: diskspace = update_diskspace(dmfilestat) else: diskspace = dmfilestat.diskspace try: freespace = getSpaceMB(backup_directory) pending = 0 if manual_action: # add up all objects that will be processed before this one exp_ids = [] for obj in DMFileStat.objects.filter(action_state__in=['AG', 'EG', 'SA', 'SE']): try: if obj.archivepath and not os.path.normpath(obj.archivepath).startswith(os.path.normpath(backup_directory)): continue elif not os.path.normpath(obj.dmfileset.backup_directory).startswith(os.path.normpath(backup_directory)): continue if obj.dmfileset.type == dmactions_types.SIG: if obj.result.experiment_id not in exp_ids: exp_ids.append(obj.result.experiment_id) pending += obj.diskspace else: pending += obj.diskspace except: pass logger.debug("Required %dMB, pending %dMB, free %dMB" % (diskspace, pending, freespace), extra=logid) if diskspace >= freespace: raise DMExceptions.InsufficientDiskSpace( "Not enough space to write files at %s (free=%dMB)" % (backup_directory, freespace)) elif diskspace >= (freespace - pending): raise DMExceptions.InsufficientDiskSpace( "Not enough space to write files at %s (free=%dMB, pending=%dMB)" % (backup_directory, freespace, pending)) except Exception as e: logger.debug("%s" % str(e), extra=logid) raise # check for write permission. cmd = ['sudo', '/opt/ion/iondb/bin/sudo_utils.py', 'check_write_permission', backup_directory] try: process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) _, stderrdata = process.communicate() except Exception as err: logger.debug("Sub Process execution failed %s" % err, extra=logid) if process.returncode: raise DMExceptions.FilePermission(stderrdata) # check for existence of source directory - needed first to create destination folder # and, of course, as source of files to copy. but we check here instead of failing halfway # thru the setup. This shows inconsistency between dmfilestat action_status and filesystem. if dmfilestat.dmfileset.type == dmactions_types.SIG: src_dir = dmfilestat.result.experiment.expDir else: src_dir = dmfilestat.result.get_report_dir() if not os.path.exists(src_dir): raise DMExceptions.SrcDirDoesNotExist(src_dir)
def destination_validation(dmfilestat, backup_directory=None, manual_action=False): ''' Tests to validate destination directory: Does destination directory exist. Is there sufficient disk space. Write permissions. ''' def _skipdiskspacecheck(directory): ''' The hidden file .no_size_check should be placed into the root directory of the scratch drive mounted on the local system for the tape drive system. ''' if os.path.exists(os.path.join(directory, ".no_size_check")): logger.info("%s: Exists: %s" % (sys._getframe().f_code.co_name, os.path.join(directory, ".no_size_check"))) return True else: logger.info("%s: Not Found: %s" % (sys._getframe().f_code.co_name, os.path.join(directory, ".no_size_check"))) return False logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid) if backup_directory in [None, 'None', '']: backup_directory = dmfilestat.dmfileset.backup_directory # check for valid destination try: if backup_directory in [None, 'None', '', '/']: raise DMExceptions.MediaNotSet("Backup media for %s is not configured. Please use Data Management Configuration page." % dmfilestat.dmfileset.type) if not os.path.isdir(backup_directory): raise DMExceptions.MediaNotAvailable("Backup media for %s is not a directory: %s" % (dmfilestat.dmfileset.type, backup_directory)) # check if destination is external filesystem # ie, catch the error of writing to a mountpoint which is unmounted. # Use the tool 'mountpoint' which returns 0 if its mountpoint and mounted # If its a subdirectory to a mountpoint, then the isdir() test will fail when not mounted. if not is_mounted(backup_directory): raise DMExceptions.MediaNotAvailable("Backup media for %s is not mounted: %s" % (dmfilestat.dmfileset.type, backup_directory)) except Exception as e: logger.error("%s" % e, extra = logid) raise # check for sufficient disk space (units: kilobytes) if _skipdiskspacecheck(backup_directory): logger.warn("%s - skipping destination disk space check" % (sys._getframe().f_code.co_name), extra = logid) pass else: if dmfilestat.diskspace is None: diskspace = update_diskspace(dmfilestat) else: diskspace = dmfilestat.diskspace try: freespace = getSpaceMB(backup_directory) pending = 0 if manual_action: # add up all objects that will be processed before this one exp_ids = [] for obj in DMFileStat.objects.filter(action_state__in=['AG', 'EG', 'SA', 'SE']): try: if obj.archivepath and not os.path.normpath(obj.archivepath).startswith(os.path.normpath(backup_directory)): continue elif not os.path.normpath(obj.dmfileset.backup_directory).startswith(os.path.normpath(backup_directory)): continue if obj.dmfileset.type == dmactions_types.SIG: if obj.result.experiment_id not in exp_ids: exp_ids.append(obj.result.experiment_id) pending += obj.diskspace else: pending += obj.diskspace except: pass logger.debug("Required %dMB, pending %dMB, free %dMB" % (diskspace, pending, freespace), extra = logid) if diskspace >= freespace: raise DMExceptions.InsufficientDiskSpace("Not enough space to write files at %s (free=%dMB)" % (backup_directory, freespace)) elif diskspace >= (freespace - pending): raise DMExceptions.InsufficientDiskSpace("Not enough space to write files at %s (free=%dMB, pending=%dMB)" % (backup_directory, freespace, pending)) except Exception as e: logger.debug("%s" % str(e), extra = logid) raise # check for write permission. NOTE: this function is called by apache2 user while the action function # will be executed within a celery task which takes the uid/gid of celeryd process - in our case that is currently root. # This test is too restrictive. try: foo = tempfile.NamedTemporaryFile(dir=backup_directory) foo.close() except Exception as e: if e.errno in [errno.EPERM, errno.EACCES]: # Operation not permitted errmsg = "Insufficient write permission in %s" % backup_directory else: errmsg = e logger.error(errmsg, extra = logid) raise DMExceptions.FilePermission(errmsg) # check for existence of source directory - needed first to create destination folder # and, of course, as source of files to copy. but we check here instead of failing halfway # thru the setup. This shows inconsistency between dmfilestat action_status and filesystem. if dmfilestat.dmfileset.type == dmactions_types.SIG: src_dir = dmfilestat.result.experiment.expDir else: src_dir = dmfilestat.result.get_report_dir() if not os.path.exists(src_dir): raise DMExceptions.SrcDirDoesNotExist(src_dir)