Ejemplo n.º 1
0
def update_diskspace(dmfilestat, cached=None):
    """Update diskspace field in dmfilestat object"""
    try:
        # search both results directory and raw data directory
        search_dirs = [
            dmfilestat.result.get_report_dir(),
            dmfilestat.result.experiment.expDir,
        ]

        if not cached:
            cached = dm_utils.get_walk_filelist(
                search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        # Create a list of files eligible to process
        # exclude onboard_results folder if thumbnail or if fullchip was reanalyzed from signal processing
        sigproc_results_dir = os.path.join(dmfilestat.result.get_report_dir(),
                                           "sigproc_results")
        exclude_onboard_results = dmfilestat.result.isThumbnail or (
            "onboard_results" not in os.path.realpath(sigproc_results_dir))

        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(
                    start_dir,
                    dmfilestat.dmfileset.include,
                    dmfilestat.dmfileset.exclude,
                    [],
                    exclude_onboard_results,
                    add_linked_sigproc=True,
                    cached=cached,
                )

                # process files in list
                for path in to_process[1:]:
                    try:
                        # logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
Ejemplo n.º 2
0
def copy_files_to_destination(source_dir, destination, dmfileset,
                              cached_file_list, log, add_warning):

    to_process, to_keep = dm_utils._file_selector(source_dir,
                                                  dmfileset.include,
                                                  dmfileset.exclude, [],
                                                  cached=cached_file_list)
    logger.info('[Data Import] Importing %d files from %s to %s' %
                (len(to_process), source_dir, destination))
    log('Copy files to destination: %d files, source=%s destination=%s' %
        (len(to_process), source_dir, destination))

    plugin_warnings = {}
    for i, filepath in enumerate(to_process):
        log('%s' % filepath, flush=True)
        try:
            _copy_to_dir(filepath, source_dir, destination)
        except Exception as e:
            # log and ignore errors from plugin files
            if 'plugin_out' in filepath:
                plugin_name = filepath.split('plugin_out/')[1].split('_out')[0]
                plugin_warnings[plugin_name] = plugin_warnings.get(
                    plugin_name, 0) + 1
                log(traceback.format_exc())
            else:
                raise

    for plugin, count in plugin_warnings.items():
        add_warning('Unable to copy %d files for plugin %s' % (count, plugin))

    if dmfileset.type == dmactions_types.OUT:
        # make sure we have plugin_out folder
        plugin_out = os.path.join(destination, 'plugin_out')
        if not os.path.isdir(plugin_out):
            oldmask = os.umask(0000)  #grant write permission to plugin user
            os.mkdir(plugin_out)
            os.umask(oldmask)

        # remove pdf folder, it may have incorrect permissions
        pdf_dir = os.path.join(destination, 'pdf')
        if os.path.exists(pdf_dir):
            shutil.rmtree(pdf_dir, ignore_errors=True)

    # for onboard results need to create sigproc_results link
    if dmfileset.type == dmactions_types.BASE:
        if os.path.exists(os.path.join(destination, 'onboard_results')):
            os.symlink(
                os.path.join(destination, 'onboard_results',
                             'sigproc_results'),
                os.path.join(destination, 'sigproc_results'))

    log('Copy files to destination %s done.' % dmfileset.type)
Ejemplo n.º 3
0
def get_diskspace(source_dir, dmfileset, cached_file_list, add_warning):
    try:
        to_process, to_keep = dm_utils._file_selector(source_dir, dmfileset.include, dmfileset.exclude, [], cached=cached_file_list)
        total_size = 0
        for path in to_process:
            if not os.path.islink(path):
                total_size += os.lstat(path)[6]
        diskspace = float(total_size)/(1024*1024)
    except:
        logger.error(traceback.format_exc())
        add_warning('Error calculating diskspace for %s' % dmfileset.type)
        diskspace = None
    
    return diskspace
Ejemplo n.º 4
0
def update_diskspace(dmfilestat, cached=None):
    '''Update diskspace field in dmfilestat object'''
    try:
        # search both results directory and raw data directory
        search_dirs = [
            dmfilestat.result.get_report_dir(),
            dmfilestat.result.experiment.expDir
        ]

        if not cached:
            cached = dm_utils.get_walk_filelist(
                search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        #Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(
                    start_dir,
                    dmfilestat.dmfileset.include,
                    dmfilestat.dmfileset.exclude, [],
                    is_thumbnail,
                    add_linked_sigproc=True,
                    cached=cached)

                #process files in list
                for path in to_process[1:]:
                    try:
                        #logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
Ejemplo n.º 5
0
def update_diskspace(dmfilestat, cached=None):
    '''Update diskspace field in dmfilestat object'''
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        if not cached:
            cached = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

        total_size = 0

        # Create a list of files eligible to process
        # exclude onboard_results folder if thumbnail or if fullchip was reanalyzed from signal processing
        sigproc_results_dir = os.path.join(dmfilestat.result.get_report_dir(), 'sigproc_results')
        exclude_onboard_results = dmfilestat.result.isThumbnail or ('onboard_results' not in os.path.realpath(sigproc_results_dir))

        for start_dir in search_dirs:
            to_process = []
            if os.path.isdir(start_dir):
                to_process, _ = dm_utils._file_selector(start_dir,
                                                        dmfilestat.dmfileset.include,
                                                        dmfilestat.dmfileset.exclude,
                                                        [],
                                                        exclude_onboard_results,
                                                        add_linked_sigproc=True,
                                                        cached=cached)

                # process files in list
                for path in to_process[1:]:
                    try:
                        # logger.debug("%d %s %s" % (j, 'diskspace', path), extra = logid)
                        if not os.path.islink(path):
                            total_size += os.lstat(path)[6]

                    except Exception as inst:
                        if inst.errno == errno.ENOENT:
                            pass
                        else:
                            errmsg = "update_diskspace %s" % (inst)
                            logger.error(errmsg, extra=logid)

        diskspace = float(total_size) / (1024 * 1024)
    except:
        diskspace = None
        raise
    finally:
        dmfilestat.diskspace = diskspace
        dmfilestat.save()
    return diskspace
Ejemplo n.º 6
0
def get_diskspace(source_dir, dmfileset, cached_file_list, add_warning):
    try:
        to_process, to_keep = dm_utils._file_selector(
            source_dir, dmfileset.include, dmfileset.exclude, [], cached=cached_file_list)
        total_size = 0
        for path in to_process:
            if not os.path.islink(path):
                total_size += os.lstat(path)[6]
        diskspace = float(total_size) / (1024 * 1024)
    except:
        logger.error(traceback.format_exc())
        add_warning('Error calculating diskspace for %s' % dmfileset.type)
        diskspace = None

    return diskspace
Ejemplo n.º 7
0
def copy_files_to_destination(source_dir, destination, dmfileset, cached_file_list, log, add_warning):

    to_process, to_keep = dm_utils._file_selector(
        source_dir, dmfileset.include, dmfileset.exclude, [], cached=cached_file_list)
    logger.info('[Data Import] Importing %d files from %s to %s' %
                (len(to_process), source_dir, destination))
    log('Copy files to destination: %d files, source=%s destination=%s' %
        (len(to_process), source_dir, destination))

    plugin_warnings = {}
    for i, filepath in enumerate(to_process):
        log('%s' % filepath, flush=True)
        try:
            _copy_to_dir(filepath, source_dir, destination)
        except Exception as e:
            # log and ignore errors from plugin files
            if 'plugin_out' in filepath:
                plugin_name = filepath.split('plugin_out/')[1].split('_out')[0]
                plugin_warnings[plugin_name] = plugin_warnings.get(plugin_name, 0) + 1
                log(traceback.format_exc())
            else:
                raise

    for plugin, count in plugin_warnings.items():
        add_warning('Unable to copy %d files for plugin %s' % (count, plugin))

    if dmfileset.type == dmactions_types.OUT:
        # make sure we have plugin_out folder
        plugin_out = os.path.join(destination, 'plugin_out')
        if not os.path.isdir(plugin_out):
            oldmask = os.umask(0000)  # grant write permission to plugin user
            os.mkdir(plugin_out)
            os.umask(oldmask)

        # remove pdf folder, it may have incorrect permissions
        pdf_dir = os.path.join(destination, 'pdf')
        if os.path.exists(pdf_dir):
            shutil.rmtree(pdf_dir, ignore_errors=True)

    # for onboard results need to create sigproc_results link
    if dmfileset.type == dmactions_types.BASE:
        if os.path.exists(os.path.join(destination, 'onboard_results')):
            os.symlink(os.path.join(destination, 'onboard_results', 'sigproc_results'),
                       os.path.join(destination, 'sigproc_results'))

    log('Copy files to destination %s done.' % dmfileset.type)
Ejemplo n.º 8
0
def get_file_list(dmfilestat):
    """Return list of files selected by this DMFileStat record and list of files to not process.
    There are some cases in which the list of selected files contains files which should not be
    processed.  Those are in the to_keep list."""
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid)

    to_process = []
    to_keep = []
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        cached_file_list = dm_utils.get_walk_filelist(
            search_dirs, list_dir=dmfilestat.result.get_report_dir())
    except:
        # If this function has an error, this file set should be marked 'E'
        dmfilestat.setactionstate('E')
        logger.error(traceback.format_exc(), extra=logid)
        return (to_process, to_keep)

    try:
        # Determine if this file type is eligible to use a keep list
        kpatterns = _get_keeper_list(dmfilestat, '')

        # Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            if os.path.isdir(start_dir):
                tmp_process, tmp_keep = dm_utils._file_selector(start_dir,
                                                                dmfilestat.dmfileset.include,
                                                                dmfilestat.dmfileset.exclude,
                                                                kpatterns,
                                                                is_thumbnail,
                                                                cached=cached_file_list)
                to_process += tmp_process
                to_keep += tmp_keep
            else:
                logger.error(traceback.format_exc(), extra=logid)
    except:
        logger.error(traceback.format_exc(), extra=logid)

    return (to_process, to_keep)
Ejemplo n.º 9
0
def get_file_list(dmfilestat):
    """Return list of files selected by this DMFileStat record and list of files to not process.
    There are some cases in which the list of selected files contains files which should not be
    processed.  Those are in the to_keep list."""
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    to_process = []
    to_keep = []
    try:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

        cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())
    except:
        # If this function has an error, this file set should be marked 'E'
        dmfilestat.setactionstate('E')
        logger.error(traceback.format_exc(), extra = logid)
        return (to_process, to_keep)

    try:
        #Determine if this file type is eligible to use a keep list
        kpatterns = _get_keeper_list(dmfilestat, '')

        #Create a list of files eligible to process
        is_thumbnail = dmfilestat.result.isThumbnail
        for start_dir in search_dirs:
            if os.path.isdir(start_dir):
                tmp_process, tmp_keep = dm_utils._file_selector(start_dir,
                                                     dmfilestat.dmfileset.include,
                                                     dmfilestat.dmfileset.exclude,
                                                     kpatterns,
                                                     is_thumbnail,
                                                     cached=cached_file_list)
                to_process += tmp_process
                to_keep += tmp_keep
            else:
                logger.error(traceback.format_exc(), extra = logid)
    except:
        logger.error(traceback.format_exc(), extra = logid)
        
    return (to_process, to_keep)
Ejemplo n.º 10
0
def search_for_files(dmfilestats, reset, report):
    '''Look for files for the given DM category still in the filesystem.
    This is the long-lived function so we enable ctrl-c interrupt to
    exit the loop and still write the log file.
    '''
    try:
        print("Ctrl-C to exit")
        tracking = []
        num_dmfs = len(dmfilestats)
        for i, dmfs in enumerate(dmfilestats):
            sys.stdout.write("\r%05d/%05d %s" %
                             (i + 1, num_dmfs, progress[i % 7]))
            sys.stdout.flush()
            to_process = []
            to_keep = []
            # For each dmfilestat object, check if files still exist in filesystem
            # 1. Do not rely on cache.filelist
            dirs = [
                dmfs.result.get_report_dir(), dmfs.result.experiment.expDir
            ]
            for start_dir in [dir for dir in dirs if os.path.isdir(dir)]:
                tmp_process, tmp_keep = _file_selector(
                    start_dir,
                    dmfs.dmfileset.include,
                    dmfs.dmfileset.exclude,
                    _get_keeper_list(dmfs, 'delete'),
                    dmfs.result.isThumbnail,
                    False,
                    cached=get_walk_filelist(dirs))
                to_process += tmp_process
                to_keep += tmp_keep

            orphans = list(set(to_process) - set(to_keep))
            logs = models.EventLog.objects.for_model(dmfs.result)
            # We only want to track those datasets with lots of files displaced.
            if len(orphans) > 10:
                #if dmfs.action_state in ['DD', 'AD']:   # Is it marked Deleted?
                if dmfs.action_state in ['DD']:  # Is it marked Deleted?
                    print "\nReport: %s" % (dmfs.result.resultsName)
                    print "Report Directory: %s" % dmfs.result.get_report_dir()
                    print "Status: %s" % 'Deleted' if dmfs.action_state == 'DD' else 'Archived'
                    print "Category: %s" % dmfs.dmfileset.type
                    print "Raw Data Directory: %s" % dmfs.result.experiment.expDir
                    print "No. files: %d" % len(orphans)
                    print "Action Date: %s" % logs[len(logs) - 1].created
                    print "Action Log: %s" % logs[len(logs) - 1].text
                    tracking.append({
                        'report':
                        dmfs.result.resultsName,
                        'report_dir':
                        dmfs.result.get_report_dir(),
                        'state':
                        'Deleted' if dmfs.action_state == 'DD' else 'Archived',
                        'rawdatadir':
                        dmfs.result.experiment.expDir,
                        'num_files':
                        len(orphans),
                        'reset':
                        reset,
                        'action_state':
                        dmfs.action_state,
                        'action_date':
                        '%s' % logs[len(logs) - 1].created,
                        'action_text':
                        logs[len(logs) - 1].text
                    })
                    if reset:
                        try:
                            print "Deleting the cached.filelist file"
                            cachefilename = os.path.join(
                                dmfs.result.get_report_dir(),
                                "cached.filelist")
                            if os.path.exists(cachefilename):
                                #os.unlink(cachefilename)
                                os.rename(cachefilename,
                                          cachefilename + ".hide")
                        except OSError:
                            print traceback.format_exc()
                        dmfs.action_state = "L" if dmfs.action_state == 'DD' else "SA"
                        dmfs.save()
                        print "Reset to %s: %s" % (dmfs.action_state,
                                                   dmfs.result.resultsName)

                    if not report:
                        for entry in orphans:
                            print entry
            elif len(orphans) > 0:
                if not report:
                    print "\rLeft-overs Report: %s" % dmfs.result.resultsName
                    for entry in orphans:
                        print entry

        sys.stdout.write("\n ")
    except (KeyboardInterrupt):
        pass
    except:
        print traceback.format_exc()
    finally:
        return tracking
Ejemplo n.º 11
0
def search_for_files(dmfilestats, reset, report):
    '''Look for files for the given DM category still in the filesystem.
    This is the long-lived function so we enable ctrl-c interrupt to
    exit the loop and still write the log file.
    '''
    try:
        print ("Ctrl-C to exit")
        tracking = []
        num_dmfs = len(dmfilestats)
        for i, dmfs in enumerate(dmfilestats):
            sys.stdout.write("\r%05d/%05d %s" % (i + 1, num_dmfs, progress[i % 7]))
            sys.stdout.flush()
            to_process = []
            to_keep = []
            # For each dmfilestat object, check if files still exist in filesystem
            # 1. Do not rely on cache.filelist
            dirs = [dmfs.result.get_report_dir(), dmfs.result.experiment.expDir]
            for start_dir in [dir for dir in dirs if os.path.isdir(dir)]:
                tmp_process, tmp_keep = _file_selector(start_dir,
                                                       dmfs.dmfileset.include,
                                                       dmfs.dmfileset.exclude,
                                                       _get_keeper_list(dmfs, 'delete'),
                                                       dmfs.result.isThumbnail,
                                                       False,
                                                       cached=get_walk_filelist(dirs))
                to_process += tmp_process
                to_keep += tmp_keep

            orphans = list(set(to_process) - set(to_keep))
            logs = models.EventLog.objects.for_model(dmfs.result)
            # We only want to track those datasets with lots of files displaced.
            if len(orphans) > 10:
                # if dmfs.action_state in ['DD', 'AD']:   # Is it marked Deleted?
                if dmfs.action_state in ['DD']:   # Is it marked Deleted?
                    print "\nReport: %s" % (dmfs.result.resultsName)
                    print "Report Directory: %s" % dmfs.result.get_report_dir()
                    print "Status: %s" % 'Deleted' if dmfs.action_state == 'DD' else 'Archived'
                    print "Category: %s" % dmfs.dmfileset.type
                    print "Raw Data Directory: %s" % dmfs.result.experiment.expDir
                    print "No. files: %d" % len(orphans)
                    print "Action Date: %s" % logs[len(logs) - 1].created
                    print "Action Log: %s" % logs[len(logs) - 1].text
                    tracking.append({'report': dmfs.result.resultsName,
                                     'report_dir': dmfs.result.get_report_dir(),
                                     'state': 'Deleted' if dmfs.action_state == 'DD' else 'Archived',
                                     'rawdatadir': dmfs.result.experiment.expDir,
                                     'num_files': len(orphans),
                                     'reset': reset,
                                     'action_state': dmfs.action_state,
                                     'action_date': '%s' % logs[len(logs) - 1].created,
                                     'action_text': logs[len(logs) - 1].text})
                    if reset:
                        try:
                            print "Deleting the cached.filelist file"
                            cachefilename = os.path.join(dmfs.result.get_report_dir(), "cached.filelist")
                            if os.path.exists(cachefilename):
                                # os.unlink(cachefilename)
                                os.rename(cachefilename, cachefilename + ".hide")
                        except OSError:
                            print traceback.format_exc()
                        dmfs.action_state = "L" if dmfs.action_state == 'DD' else "SA"
                        dmfs.save()
                        print "Reset to %s: %s" % (dmfs.action_state, dmfs.result.resultsName)

                    if not report:
                        for entry in orphans:
                            print entry
            elif len(orphans) > 0:
                if not report:
                    print "\rLeft-overs Report: %s" % dmfs.result.resultsName
                    for entry in orphans:
                        print entry

        sys.stdout.write("\n ")
    except (KeyboardInterrupt):
        pass
    except:
        print traceback.format_exc()
    finally:
        return tracking
Ejemplo n.º 12
0
def _get_file_list_dict(dmfilestat, action, user, user_comment, msg_banner):
    '''
    This function generates a list of files to process.
    '''
    logid = {'logid': "%s" % ('dmactions')}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra=logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra=logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra=logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    # Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    # Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (
        action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra=logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                          dmfilestat.dmfileset.include,
                                                          dmfilestat.dmfileset.exclude,
                                                          kpatterns,
                                                          is_thumbnail,
                                                          add_linked_sigproc,
                                                          cached=cached_file_list)
        logger.info("%d files to process at %s" %
                    (len(list(set(to_process) - set(to_keep))), start_dir), extra=logid)
        list_of_file_dict.append(
            {
                'pk': dmfilestat.id,
                'action': action,
                'archivepath': dmfilestat.archivepath,
                'start_dir': start_dir,
                'to_process': to_process,
                'to_keep': to_keep,
                'total_cnt': len(list(set(to_process) - set(to_keep))),
                'processed_cnt': 0,
                'total_size': 0,
                'user': user,
                'user_comment': user_comment,
                'lockfile': '',
                'msg_banner': msg_banner,
            }
        )
    return list_of_file_dict
Ejemplo n.º 13
0
def _process_fileset_task(dmfilestat, action, user, user_comment, lockfile, msg_banner):
    '''
    This function generates a list of files to process, then hands the list to a recursive
    celery task function.  The recursion continues until the list is empty.  The calling
    function exits immediately.
    '''
    logid = {'logid':"%s" % (lockfile)}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra = logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra = logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # Create a lock file here to block any other actions on this report (see TS-8411)
    lock_id = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type))
    locallock = TaskLock(lock_id, timeout=60) # short timeout in case lock release code doesn't get called

    if not(locallock.lock()):
        logger.warn("lock file exists: %s(%s)" % (lock_id, locallock.get()), extra = logid)
        # Release the task lock
        try:
            applock = TaskLock(lockfile)
            applock.unlock()
        except:
            logger.error(traceback.format_exc(), extra = logid)
        return

    logger.info("lock file created: %s(%s)" % (lock_id, locallock.get()), extra = logid)

    if action == ARCHIVE:
        dmfilestat.setactionstate('AG')
    elif action == DELETE:
        dmfilestat.setactionstate('DG')
    elif action == EXPORT:
        dmfilestat.setactionstate('EG')

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    #Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    #Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra = logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                 dmfilestat.dmfileset.include,
                                                 dmfilestat.dmfileset.exclude,
                                                 kpatterns,
                                                 is_thumbnail,
                                                 add_linked_sigproc,
                                                 cached=cached_file_list)
        logger.info("%d files to process at %s" % (len(list(set(to_process) - set(to_keep))), start_dir), extra = logid)
        list_of_file_dict.append(
            {
                'pk':dmfilestat.id,
                'action':action,
                'archivepath':dmfilestat.archivepath,
                'start_dir':start_dir,
                'to_process':to_process,
                'to_keep':to_keep,
                'total_cnt':len(list(set(to_process) - set(to_keep))),
                'processed_cnt':0,
                'total_size':0,
                'user':user,
                'user_comment':user_comment,
                'lockfile':lockfile,
                'msg_banner':msg_banner,
            }
        )

    try:
        pfilename = set_action_param_var(list_of_file_dict)

        # Call the recursive celery task function to process the list
        _process_task.delay(pfilename)
            
    except:
        logger.error("We got an error here, _process_fileset_task", extra = logid)
        raise
    finally:
        if locallock:
            locallock.unlock()

    return
Ejemplo n.º 14
0
def _process_fileset_task(dmfilestat, action, user, user_comment, lockfile, msg_banner):
    '''
    This function generates a list of files to process, then hands the list to a recursive
    celery task function.  The recursion continues until the list is empty.  The calling
    function exits immediately.
    '''
    logid = {'logid':"%s" % (lockfile)}
    logger.debug("Function: %s()" % sys._getframe().f_code.co_name, extra = logid)

    if dmfilestat.isdeleted():
        errmsg = "The %s for %s are deleted" % (dmfilestat.dmfileset.type, dmfilestat.result.resultsName)
        logger.warn(errmsg, extra = logid)
        raise Exception(errmsg)
    elif dmfilestat.isarchived():
        if not os.path.exists(dmfilestat.archivepath):
            errmsg = "Cannot access backup location %s" % dmfilestat.archivepath
            logger.warn(errmsg, extra = logid)
            raise Exception(errmsg)
        else:
            # search archived directory
            search_dirs = [dmfilestat.archivepath]
    else:
        # search both results directory and raw data directory
        search_dirs = [dmfilestat.result.get_report_dir(), dmfilestat.result.experiment.expDir]

    # Create a lock file here to block any other actions on this report (see TS-8411)
    lock_id = "%s_%s" % (dmfilestat.result.resultsName, dm_utils.slugify(dmfilestat.dmfileset.type))
    locallock = TaskLock(lock_id, timeout=60) # short timeout in case lock release code doesn't get called

    if not(locallock.lock()):
        logger.warn("lock file exists: %s(%s)" % (lock_id, locallock.get()), extra = logid)
        # Release the task lock
        try:
            applock = TaskLock(lockfile)
            applock.unlock()
        except:
            logger.error(traceback.format_exc(), extra = logid)
        return

    logger.info("lock file created: %s(%s)" % (lock_id, locallock.get()), extra = logid)

    if action == ARCHIVE:
        dmfilestat.setactionstate('AG')
    elif action == DELETE:
        dmfilestat.setactionstate('DG')
    elif action == EXPORT:
        dmfilestat.setactionstate('EG')

    # List of all files associated with the report
    cached_file_list = dm_utils.get_walk_filelist(search_dirs, list_dir=dmfilestat.result.get_report_dir())

    #Determine if this file type is eligible to use a keep list
    kpatterns = _get_keeper_list(dmfilestat, action)

    #Create a list of files eligible to process
    list_of_file_dict = []
    is_thumbnail = dmfilestat.result.isThumbnail
    add_linked_sigproc = False if (action == DELETE or dmfilestat.dmfileset.type == dmactions_types.INTR) else True
    for start_dir in search_dirs:
        logger.debug("Searching: %s" % start_dir, extra = logid)
        to_process = []
        to_keep = []
        if os.path.isdir(start_dir):
            to_process, to_keep = dm_utils._file_selector(start_dir,
                                                 dmfilestat.dmfileset.include,
                                                 dmfilestat.dmfileset.exclude,
                                                 kpatterns,
                                                 is_thumbnail,
                                                 add_linked_sigproc,
                                                 cached=cached_file_list)
        logger.info("%d files to process at %s" % (len(list(set(to_process) - set(to_keep))), start_dir), extra = logid)
        list_of_file_dict.append(
            {
                'pk':dmfilestat.id,
                'action':action,
                'archivepath':dmfilestat.archivepath,
                'start_dir':start_dir,
                'to_process':to_process,
                'to_keep':to_keep,
                'total_cnt':len(list(set(to_process) - set(to_keep))),
                'processed_cnt':0,
                'total_size':0,
                'user':user,
                'user_comment':user_comment,
                'lockfile':lockfile,
                'msg_banner':msg_banner,
            }
        )

    try:
        pfilename = set_action_param_var(list_of_file_dict)

        # Call the recursive celery task function to process the list
        _process_task.delay(pfilename)
            
    except:
        logger.error("We got an error here, _process_fileset_task", extra = logid)
        raise
    finally:
        if locallock:
            locallock.unlock()

    return