def show_keepers(): for dmtype in FILESET_TYPES: # filename stem storename = "%s_%s_keepers" % (slugify(dmtype), gethostname()) # All SigProc Stat objects sigprocstats = models.DMFileStat.objects.filter( dmfileset__type=dmtype).order_by('created') print "All %s objects count: %d" % (dmtype, sigprocstats.count()) # Limit to objects marked keep sigprocstats = sigprocstats.filter( result__experiment__storage_options='KI') print "All %s objects marked Keep count: %d" % (dmtype, sigprocstats.count()) # Limit to objects with files on filesystem sigprocstats = sigprocstats.filter(action_state='L') print "All %s objects marked Keep and Local count: %d" % ( dmtype, sigprocstats.count()) _write_csv(sigprocstats, storename) _write_excel(sigprocstats, storename)
def archive(user, user_comment, dmfilestat, lockfile, msg_banner, backup_directory=None, confirmed=False): '''DM Action which copies files then deletes them''' logid = {'logid': "%s" % (lockfile)} logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) msg = "Archiving %s - %s Using v.%s" % ( dmfilestat.dmfileset.type, dmfilestat.result.resultsName, dmfilestat.dmfileset.version) logger.info(msg, extra=logid) _update_related_objects(user, user_comment, dmfilestat, ARCHIVE, msg) try: action_validation(dmfilestat, ARCHIVE, confirmed) destination_validation(dmfilestat, backup_directory) _create_destination(dmfilestat, ARCHIVE, dmfilestat.dmfileset.type, backup_directory) except: raise try: if dmfilestat.dmfileset.type == dmactions_types.OUT: _create_archival_files(dmfilestat) filelistdict = _get_file_list_dict(dmfilestat, ARCHIVE, user, user_comment, msg_banner) locallockid = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type)) set_action_state(dmfilestat, 'AG', ARCHIVE) _process_fileset_task(filelistdict, lockfile, locallockid) prepare_for_data_import(dmfilestat) except: set_action_state(dmfilestat, 'E', ARCHIVE) raise
def project_msg_banner(user, project_msg, action): logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=d) try: msg = "" thistag = "" grpstatus = "" logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=d) for pk, status_list in project_msg.items(): report = Results.objects.get(id=pk) msg += "(%s) %s " % (report.resultsName, action.title()) for category, status in status_list.items(): msg += " %s, " % str(category) msg += status grpstatus = status thistag = "%s_%s_%s" % (str(pk), action, slugify(category)) try: # urlresolvers is currently throwing an exception, unknown why. TS- # url = reverse('dm_log', args=(pk,)) url = "/data/datamanagement/log/%s/" % pk msg += ( " <a href='%s' data-toggle='modal' data-target='#modal_report_log'>%s</a></br>" % (url, "View Report Log") ) # TODO: i18n - Can we change text since it's persisted into DB? except Exception: logger.error(traceback.format_exc(), extra=d) logger.debug("MESSAGE: %s" % msg, extra=d) # If message already exists (ie, scheduled task) delete it. Message.objects.filter(tags=thistag).delete() if len(project_msg) > 1: thistag = "%s_%s_%s" % ("project", action, slugify(category)) if grpstatus.lower() == "scheduled": func = Message.info elif grpstatus.lower() == "success": func = Message.success else: func = Message.error except Exception: func = Message.error logger.error(traceback.format_exc(), extra=d) return func(msg, route=user, tags=thistag)
def project_msg_banner(user, project_msg, action): logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=d) try: msg = '' thistag = '' grpstatus = '' logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=d) for pk, status_list in project_msg.iteritems(): report = Results.objects.get(id=pk) msg += "(%s) %s " % (report.resultsName, action.title()) for category, status in status_list.iteritems(): msg += " %s, " % str(category) msg += status grpstatus = status thistag = "%s_%s_%s" % (str(pk), action, slugify(category)) try: # urlresolvers is currently throwing an exception, unknown why. TS- # url = reverse('dm_log', args=(pk,)) url = '/data/datamanagement/log/%s/' % pk msg += " <a href='%s' data-toggle='modal' data-target='#modal_report_log'>View Report Log</a></br>" % ( url) except: logger.error(traceback.format_exc(), extra=d) logger.debug("MESSAGE: %s" % msg, extra=d) # If message already exists (ie, scheduled task) delete it. Message.objects.filter(tags=thistag).delete() if len(project_msg) > 1: thistag = "%s_%s_%s" % ('project', action, slugify(category)) if grpstatus.lower() == 'scheduled': func = Message.info elif grpstatus.lower() == 'success': func = Message.success else: func = Message.error except: func = Message.error logger.error(traceback.format_exc(), extra=d) return func(msg, route=user, tags=thistag)
def test(user, user_comment, dmfilestat, lockfile, msg_banner, backup_directory=None): '''DM Action which prints to log file only. Used for testing only''' logid = {'logid': "%s" % (lockfile)} logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) msg = "Testing %s - %s Using v.%s" % ( dmfilestat.dmfileset.type, dmfilestat.result.resultsName, dmfilestat.dmfileset.version) logger.info(msg, extra=logid) _update_related_objects(user, user_comment, dmfilestat, TEST, msg) try: filelistdict = _get_file_list_dict(dmfilestat, TEST, user, user_comment, msg_banner) locallockid = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type)) _process_fileset_task(filelistdict, lockfile, locallockid) except: raise
def show_keepers(): for dmtype in FILESET_TYPES: # filename stem storename = "%s_%s_keepers" % (slugify(dmtype), gethostname()) # All SigProc Stat objects sigprocstats = models.DMFileStat.objects.filter(dmfileset__type=dmtype).order_by('created') print "All %s objects count: %d" % (dmtype, sigprocstats.count()) # Limit to objects marked keep sigprocstats = sigprocstats.filter(result__experiment__storage_options = 'KI') print "All %s objects marked Keep count: %d" % (dmtype, sigprocstats.count()) # Limit to objects with files on filesystem sigprocstats = sigprocstats.filter(action_state='L') print "All %s objects marked Keep and Local count: %d" % (dmtype, sigprocstats.count()) _write_csv(sigprocstats, storename) _write_excel(sigprocstats, storename)
def delete(user, user_comment, dmfilestat, lockfile, msg_banner, confirmed=False): '''DM Action which deletes files''' logid = {'logid': "%s" % (lockfile)} logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) msg = "Deleting %s - %s Using v.%s" % ( dmfilestat.dmfileset.type, dmfilestat.result.resultsName, dmfilestat.dmfileset.version) logger.info(msg, extra=logid) _update_related_objects(user, user_comment, dmfilestat, DELETE, msg) try: action_validation(dmfilestat, DELETE, confirmed) except: raise try: if dmfilestat.dmfileset.type == dmactions_types.OUT: _create_archival_files(dmfilestat) filelistdict = _get_file_list_dict(dmfilestat, DELETE, user, user_comment, msg_banner) locallockid = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type)) set_action_state(dmfilestat, 'DG', DELETE) _process_fileset_task(filelistdict, lockfile, locallockid) except: set_action_state(dmfilestat, 'E', DELETE) raise
def manage_data(deviceid, dmfileset, pathlist, auto_acknowledge_enabled, auto_action_enabled): logid = {'logid': "%s" % ('manage_data')} logger.debug("manage_data: %s %s" % (dmfileset['auto_action'], dmfileset['type']), extra=logid) def getfirstnotpreserved(dmfilestats, action, threshdate): '''QuerySet of DMFileStat objects. Returns first instance of an object with preserve_data set to False and passes the action_validation test''' logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid) logger.debug("Looking at %d dmfilestat objects" % dmfilestats.count(), extra=logid) for archiveme in dmfilestats: if not archiveme.getpreserved(): try: dmactions.action_validation(archiveme, action) return archiveme except(DMExceptions.FilesInUse, DMExceptions.FilesMarkedKeep): logger.debug("%s Failed action_validation. Try next fileset" % archiveme.result.resultsName, extra=logid) except DMExceptions.BaseInputLinked: # want to allow Basecalling Input delete if all results are expired related_objs = DMFileStat.objects.filter( result__experiment=archiveme.result.experiment, dmfileset__type=dmactions_types.BASE) if related_objs.count() == related_objs.filter(created__lt=threshdate).count(): archiveme.allow_delete = True return archiveme else: logger.debug("%s Failed action_validation. Try next fileset" % archiveme.result.resultsName, extra=logid) except: logger.error(traceback.format_exc(), extra=logid) else: logger.debug("Skipped a preserved fileset: %s" % archiveme.result.resultsName, extra=logid) logger.info("%d filestat objects are preserved." % dmfilestats.count(), extra=logid) raise DMExceptions.NoDMFileStat("NONE FOUND") try: # logger.debug("manage_data lock for %s (%d)" % (dmfileset['type'], os.getpid()), extra = logid) # Create lock file to prevent more than one celery task for each process_type and partition lock_id = "%s_%s" % (hex(deviceid), slugify(dmfileset['type'])) logid = {'logid': "%s" % (lock_id)} applock = TaskLock(lock_id) if not applock.lock(): logger.info("Currently processing: %s(%s)" % (lock_id, applock.get()), extra=logid) return logger.debug("lock file created: %s(%s)" % (lock_id, applock.get()), extra=logid) except Exception as e: logger.error(traceback.format_exc(), extra=logid) try: #--------------------------------------------------------------------------- # Database object filtering #--------------------------------------------------------------------------- actiondmfilestat = None user_comment = "Auto Action" # Order by incrementing pk. This puts them in chronological order. # Select DMFileStat objects of category DMFileSet.type (1/4th of all objects) dmfilestats = DMFileStat.objects.filter(dmfileset__type=dmfileset['type']).order_by('created') tot_obj = dmfilestats.count() # Select objects not yet processed dmfilestats = dmfilestats.filter(action_state__in=['L', 'S', 'N', 'A']) tot_act = dmfilestats.count() # Select objects that are old enough threshdate = datetime.now(pytz.UTC) - timedelta(days=dmfileset['auto_trigger_age']) dmfilestats = dmfilestats.filter(created__lt=threshdate) tot_exp = dmfilestats.count() # Select objects stored on the deviceid query = Q() for path in pathlist: if dmfileset['type'] == dmactions_types.SIG: query |= Q(result__experiment__expDir__startswith=path) # These categories have files in both directory paths. elif dmfileset['type'] in [dmactions_types.INTR, dmactions_types.BASE]: query |= Q(result__experiment__expDir__startswith=path) query |= Q(result__reportstorage__dirPath__startswith=path) else: query |= Q(result__reportstorage__dirPath__startswith=path) dmfilestats = dmfilestats.filter(query) tot_onpath = dmfilestats.count() # Exclude objects marked 'Keep' upfront to optimize db access if dmfileset['type'] == dmactions_types.SIG: dmfilestats = dmfilestats.exclude(result__experiment__storage_options="KI") else: dmfilestats = dmfilestats.exclude(preserve_data=True) tot_notpreserved = dmfilestats.count() # Compress to single log entry logger.info("Total: %d Active: %d Expired: %d On Path: %d Not Preserved: %d" % (tot_obj, tot_act, tot_exp, tot_onpath, tot_notpreserved), extra=logid) #--------------------------------------------------------------------------- # Archive #--------------------------------------------------------------------------- if dmfileset['auto_action'] == 'ARC': ''' Rules: 1) archive a fileset as soon as age threshold reached, regardless of disk threshold 2) select oldest fileset 3) select unprocessed fileset 4) Archiving does not require 'S' -> 'N' -> 'A' progression before action 5) Do not include filesets marked 'E' in auto-action - can get stuck on that fileset forever ''' logger.info("Exceed %d days. Eligible to archive: %d" % (dmfileset['auto_trigger_age'], dmfilestats.count()), extra=logid) actiondmfilestat = None # Bail out if disabled if auto_action_enabled != True: logger.info("Data management auto-action is disabled.", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) return # Select first object stored on the deviceid try: actiondmfilestat = getfirstnotpreserved(dmfilestats, dmactions.ARCHIVE, threshdate) logger.info("Picked: %s" % (actiondmfilestat.result.resultsName), extra=logid) except DMExceptions.NoDMFileStat: logger.debug("No filesets to archive on this device", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) except: logger.error(traceback.format_exc(), extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) else: applock.update(actiondmfilestat.result.resultsName) archive_action('dm_agent', user_comment, actiondmfilestat, lock_id) #--------------------------------------------------------------------------- # Delete #--------------------------------------------------------------------------- elif dmfileset['auto_action'] == 'DEL': ''' Rules: If auto-acknowledge is True: promote to Acknowledged and delete when age and disk threshold reached. If auto-acknowledge is False: If fileset type is SIG: 'S' -> 'N' -> 'A' progression Else: promote to 'A' delete an 'A' fileset ''' logger.info("Exceed %d days. Eligible to delete: %d" % (dmfileset['auto_trigger_age'], dmfilestats.count()), extra=logid) if auto_acknowledge_enabled: if dmfileset['type'] == dmactions_types.SIG: logger.debug("Sig Proc Input Files auto acknowledge enabled", extra=logid) ''' Do not require 'S' -> 'N' -> 'A' progression before action; mark 'A' and process ''' a_list = dmfilestats.filter(action_state='A') if a_list.count() > 0: # there are filesets acknowledged and ready to be deleted. Covers situation where user has # already acknowledged but recently enabled auto-acknowledge as well. # deleteme = a_list[0] try: actiondmfilestat = getfirstnotpreserved(a_list, dmactions.DELETE, threshdate) logger.info("Picked: %s" % (actiondmfilestat.result.resultsName), extra=logid) except DMExceptions.NoDMFileStat: logger.info("No filesets to delete on this device", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) except: logger.error(traceback.format_exc(), extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) if actiondmfilestat == None: # Select oldest fileset regardless if its 'L','S','N','A'. This covers situation where user # recently enabled auto-acknowledge try: actiondmfilestat = getfirstnotpreserved(dmfilestats, dmactions.DELETE, threshdate) logger.info("Picked: %s" % (actiondmfilestat.result.resultsName), extra=logid) except DMExceptions.NoDMFileStat: logger.info("No filesets to delete on this device", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) except: logger.error(traceback.format_exc(), extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) else: if dmfileset['type'] == dmactions_types.SIG: logger.debug("Sig Proc Input Files auto acknowledge disabled", extra=logid) ''' Need to select # of filesets and mark 'S' Need to find 'S' filesets and notify (set to 'N') Set queue length to 5 sum(S,N,A) == queue length ''' # Get email recipient try: recipient = User.objects.get(username='******').email except: recipient = None # Need to select Experiments to process with appropriate dmfilestats action states. exps = Experiment.objects.exclude( storage_options='KI').exclude(expDir='').order_by('date') l_list = exps.filter( results_set__dmfilestat__in=dmfilestats.filter(action_state='L')).distinct() s_list = exps.filter( results_set__dmfilestat__in=dmfilestats.filter(action_state='S')).distinct() n_list = exps.filter( results_set__dmfilestat__in=dmfilestats.filter(action_state='N')).distinct() a_list = exps.filter( results_set__dmfilestat__in=dmfilestats.filter(action_state='A')).distinct() logger.info("Experiments (Keep=False) L:%d S:%d N:%d A:%d" % (l_list.count(), s_list.count(), n_list.count(), a_list.count()), extra=logid) queue_length = 5 to_select = queue_length - (a_list.count() + n_list.count() + s_list.count()) if to_select > 0: # Mark to_select number of oldest Experiments, from 'L', to 'S' promoted = dmfilestats.filter( result__experiment__id__in=list(l_list[:to_select].values_list('id', flat=True))) if auto_action_enabled: promoted.update(action_state='S') for dmfilestat in promoted: EventLog.objects.add_entry( dmfilestat.result, "Signal Processing Input Selected for Deletion", username='******') # Get updated list of Selected items selected = dmfilestats.filter(action_state='S') # Send Selected items to be notified if selected.count() > 0 and auto_action_enabled: logger.debug("notify recipient %s" % (recipient), extra=logid) if notify([dmfilestat.result.resultsName for dmfilestat in selected], recipient): selected.update(action_state='N') for dmfilestat in selected: EventLog.objects.add_entry( dmfilestat.result, "Notification for Deletion Sent", username='******') try: actiondmfilestat = getfirstnotpreserved( dmfilestats.filter(action_state='A'), dmactions.DELETE, threshdate) logger.info("Picked: %s" % (actiondmfilestat.result.resultsName), extra=logid) except DMExceptions.NoDMFileStat: logger.info("No filesets to delete on this device", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) except: logger.error(traceback.format_exc(), extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) else: try: actiondmfilestat = getfirstnotpreserved(dmfilestats, dmactions.DELETE, threshdate) logger.info("Picked: %s" % (actiondmfilestat.result.resultsName), extra=logid) except DMExceptions.NoDMFileStat: logger.info("No filesets to delete on this device", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) except: logger.error(traceback.format_exc(), extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) # Bail out if disabled if auto_action_enabled != True: logger.info("Data management auto-action is disabled.", extra=logid) applock.unlock() logger.debug("Worker PID %d lock_id destroyed %s" % (os.getpid(), lock_id), extra=logid) return if actiondmfilestat is not None: applock.update(actiondmfilestat.result.resultsName) delete_action('dm_agent', user_comment, actiondmfilestat, lock_id, confirmed=getattr(actiondmfilestat, 'allow_delete', False)) else: logger.error("Unknown or unhandled action: %s" % dmfileset['auto_action'], extra=logid) applock.unlock() except (DMExceptions.FilePermission, DMExceptions.InsufficientDiskSpace, DMExceptions.MediaNotSet, DMExceptions.MediaNotAvailable, DMExceptions.FilesInUse) as e: applock.unlock() message = Message.objects.filter(tags__contains=e.tag) if not message: Message.error(e.message, tags=e.tag) # TODO: TS-6525: This logentry will repeat for an Archive action every 30 seconds until the cause of the exception is fixed. # at least, while the message banner is raised, suppress additional Log Entries. EventLog.objects.add_entry(actiondmfilestat.result, "%s - %s" % (dmfileset['type'], e.message), username='******') except DMExceptions.SrcDirDoesNotExist as e: applock.unlock() msg = "Src Dir not found: %s. Setting action_state to Deleted" % e.message EventLog.objects.add_entry(actiondmfilestat.result, msg, username='******') actiondmfilestat.setactionstate('DD') logger.info(msg, extra=logid) except Exception as inst: applock.unlock() msg = '' if actiondmfilestat: msg = "Auto-action error on %s " % actiondmfilestat.result.resultsName msg += " Error: %s" % str(inst) logger.error("%s - %s" % (dmfileset['type'], msg), extra=logid) logger.error(traceback.format_exc(), extra=logid) if actiondmfilestat: EventLog.objects.add_entry(actiondmfilestat.result, "%s - %s" % (dmfileset['type'], msg), username='******') return
def _process_fileset_task(dmfilestat, action, user, user_comment, lockfile, msg_banner): ''' This function generates a list of files to process, then hands the list to a recursive celery task function. The recursion continues until the list is empty. The calling function exits immediately. ''' logid = {'logid':"%s" % (lockfile)} logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid) if dmfilestat.isdeleted(): errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName) logger.warn(errmsg, extra = logid) raise Exception(errmsg) elif dmfilestat.isarchived(): if not os.path.exists(dmfilestat.archivepath): errmsg = "Cannot access backup location %s" % dmfilestat.archivepath logger.warn(errmsg, extra = logid) raise Exception(errmsg) else: # search archived directory search_dirs = [dmfilestat.archivepath] else: # search both results directory and raw data directory search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir] # Create a lock file here to block any other actions on this report (see TS-8411) lock_id = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type)) locallock = TaskLock(lock_id, timeout=60) # short timeout in case lock release code doesn't get called if not(locallock.lock()): logger.warn("lock file exists: %s(%s)" % (lock_id, locallock.get()), extra = logid) # Release the task lock try: applock = TaskLock(lockfile) applock.unlock() except: logger.error(traceback.format_exc(), extra = logid) return logger.info("lock file created: %s(%s)" % (lock_id, locallock.get()), extra = logid) if action == ARCHIVE: dmfilestat.setactionstate('AG') elif action == DELETE: dmfilestat.setactionstate('DG') elif action == EXPORT: dmfilestat.setactionstate('EG') # List of all files associated with the report cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir()) #Determine if this file type is eligible to use a keep list kpatterns = _get_keeper_list(dmfilestat, action) #Create a list of files eligible to process list_of_file_dict = [] is_thumbnail = dmfilestat.result.isThumbnail add_linked_sigproc = False if (action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True for start_dir in search_dirs: logger.debug("Searching: %s" % start_dir, extra = logid) to_process = [] to_keep = [] if os.path.isdir(start_dir): to_process, to_keep = dm_utils._file_selector(start_dir, dmfilestat.dmfileset.include, dmfilestat.dmfileset.exclude, kpatterns, is_thumbnail, add_linked_sigproc, cached=cached_file_list) logger.info("%d files to process at %s" % (len(list(set(to_process) - set(to_keep))), start_dir), extra = logid) list_of_file_dict.append( { 'pk':dmfilestat.id, 'action':action, 'archivepath':dmfilestat.archivepath, 'start_dir':start_dir, 'to_process':to_process, 'to_keep':to_keep, 'total_cnt':len(list(set(to_process) - set(to_keep))), 'processed_cnt':0, 'total_size':0, 'user':user, 'user_comment':user_comment, 'lockfile':lockfile, 'msg_banner':msg_banner, } ) try: pfilename = set_action_param_var(list_of_file_dict) # Call the recursive celery task function to process the list _process_task.delay(pfilename) except: logger.error("We got an error here, _process_fileset_task", extra = logid) raise finally: if locallock: locallock.unlock() return