Ejemplo n.º 1
0
def get_closed_projects(projs, pj_con, seconds):
    """Takes list of project and gives project list that are closed
    more than given time(as seconds)

    :param list projs: list of projects to check
    :param obj pj_con: connection object to project database
    :param int seconds: Days/hours converted as seconds to check
    """
    closed_projs = []
    for proj in projs:
        if proj not in pj_con.name_view.keys():
            logger.warn(
                "Project {} is not in database, so SKIPPING it..".format(proj))
            continue
        proj_db_obj = pj_con.get_entry(proj)
        try:
            proj_close_date = proj_db_obj['close_date']
        except KeyError:
            logger.warn(
                "Project {} is either open or too old, so SKIPPING it..".
                format(proj))
            continue
        if misc.to_seconds(days=misc.days_old(
                proj_close_date, date_format='%Y-%m-%d')) > seconds:
            closed_projs.append(proj)
    return closed_projs
Ejemplo n.º 2
0
def get_closed_proj_info(prj, pdoc):
    """check and return a dict if project is closed"""
    pdict = None
    if not pdoc:
        logger.warn(
            "Seems like project {} dont have a proper statudb document, skipping it"
            .format(prj))
    elif "close_date" in pdoc:
        closed_date = pdoc['close_date']
        closed_days = misc.days_old(closed_date, "%Y-%m-%d")
        if closed_days is not None and isinstance(closed_days, int):
            pdict = {
                'name':
                pdoc.get('project_name'),
                'pid':
                pdoc.get('project_id'),
                'closed_date':
                closed_date,
                'closed_days':
                closed_days,
                'bioinfo_responsible':
                pdoc.get('project_summary',
                         {}).get('bioinfo_responsible',
                                 '').encode('ascii', 'ignore')
            }
        else:
            logger.warn(
                "Problem calculating closed days for project {} with close data {}. Skipping it"
                .format(pdoc.get('project_name'), closed_date))

    return pdict
Ejemplo n.º 3
0
def cleanup_uppmax(site, days, dry_run=False):
    """Remove project/run that have been closed more than 'days'
    from the given 'site' on uppmax

    :param str site: site where the cleanup should be performed
    :param int days: number of days to check for closed projects
    """
    days = check_days(site, days, config)
    if not days:
        return
    root_dir = CONFIG.get("cleanup").get(site).get("root")
    deleted_log = CONFIG.get("cleanup").get("deleted_log")
    assert os.path.exists(os.path.join(root_dir, deleted_log)), "Log directory {} doesn't exist in {}".format(
        deleted_log, root_dir
    )
    log_file = os.path.join(root_dir, "{fl}/{fl}.log".format(fl=deleted_log))

    # make a connection for project db #
    pcon = statusdb.ProjectSummaryConnection()
    assert pcon, "Could not connect to project database in StatusDB"

    if site != "archive":
        ## work flow for cleaning up illumina/analysis ##
        projects = [p for p in os.listdir(root_dir) if re.match(filesystem.PROJECT_RE, p)]
        list_to_delete = get_closed_projects(projects, pcon, days)
    else:
        ##work flow for cleaning archive ##
        list_to_delete = []
        archived_in_swestore = filesystem.list_runs_in_swestore(
            path=CONFIG.get("cleanup").get("swestore").get("root"), no_ext=True
        )
        runs = [r for r in os.listdir(root_dir) if re.match(filesystem.RUN_RE, r)]
        with filesystem.chdir(root_dir):
            for run in runs:
                fc_date = run.split("_")[0]
                if misc.days_old(fc_date) > days:
                    if run in archived_in_swestore:
                        list_to_delete.append(run)
                    else:
                        logger.warn(
                            "Run {} is older than {} days but not in " "swestore, so SKIPPING".format(run, days)
                        )

    ## delete and log
    for item in list_to_delete:
        if dry_run:
            logger.info("Will remove {} from {}".format(item, root_dir))
            continue
        try:
            shutil.rmtree(os.path.join(root_dir, item))
            logger.info("Removed project {} from {}".format(item, root_dir))
            with open(log_file, "a") as to_log:
                to_log.write("{}\t{}\n".format(item, datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M")))
        except OSError:
            logger.warn("Could not remove path {} from {}".format(item, root_dir))
            continue
Ejemplo n.º 4
0
def cleanup_uppmax(site, days, dry_run=False):
    """Remove project/run that have been closed more than 'days'
    from the given 'site' on uppmax

    :param str site: site where the cleanup should be performed
    :param int days: number of days to check for closed projects
    """
    days = check_days(site, days, config)
    if not days:
        return
    root_dir = CONFIG.get('cleanup').get(site).get('root')
    deleted_log = CONFIG.get('cleanup').get('deleted_log')
    assert os.path.exists(os.path.join(root_dir,deleted_log)), "Log directory {} doesn't exist in {}".format(deleted_log,root_dir)
    log_file = os.path.join(root_dir,"{fl}/{fl}.log".format(fl=deleted_log))

    # make a connection for project db #
    pcon = statusdb.ProjectSummaryConnection()
    assert pcon, "Could not connect to project database in StatusDB"

    if site != "archive":
        ## work flow for cleaning up illumina/analysis ##
        projects = [ p for p in os.listdir(root_dir) if re.match(filesystem.PROJECT_RE,p) ]
        list_to_delete = get_closed_projects(projects, pcon, days)
    else:
        ##work flow for cleaning archive ##
        list_to_delete = []
        archived_in_swestore = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root'), no_ext=True)
        runs = [ r for r in os.listdir(root_dir) if re.match(filesystem.RUN_RE,r) ]
        with filesystem.chdir(root_dir):
            for run in runs:
                fc_date = run.split('_')[0]
                if misc.days_old(fc_date) > days:
                    if run in archived_in_swestore:
                        list_to_delete.append(run)
                    else:
                        logger.warn("Run {} is older than {} days but not in "
                                    "swestore, so SKIPPING".format(run, days))

    ## delete and log
    for item in list_to_delete:
        if dry_run:
            logger.info('Will remove {} from {}'.format(item,root_dir))
            continue
        try:
            shutil.rmtree(os.path.join(root_dir,item))
            logger.info('Removed project {} from {}'.format(item,root_dir))
            with open(log_file,'a') as to_log:
                to_log.write("{}\t{}\n".format(item,datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M')))
        except OSError:
            logger.warn("Could not remove path {} from {}"
                        .format(item,root_dir))
            continue
Ejemplo n.º 5
0
def cleanup_swestore(days, dry_run=False):
    """Remove archived runs from swestore

    :param int days: Threshold days to check and remove
    """
    days = check_days('swestore', days, config)
    if not days:
        return
    runs = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root'))
    for run in runs:
        date = run.split('_')[0]
        if misc.days_old(date) > days:
            if dry_run:
                logger.info('Will remove file {} from swestore'.format(run))
                continue
            misc.call_external_command('irm -f {}'.format(run))
            logger.info('Removed file {} from swestore'.format(run))
Ejemplo n.º 6
0
def cleanup_swestore(seconds, dry_run=False):
    """Remove archived runs from swestore

    :param int seconds: Days/hours converted as seconds to check
    """
    seconds = check_default(site, seconds, CONFIG)
    if not seconds:
        return
    runs = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root'))
    for run in runs:
        date = run.split('_')[0]
        if misc.to_seconds(misc.days_old(date)) > seconds:
            if dry_run:
                logger.info('Will remove file {} from swestore'.format(run))
                continue
            misc.call_external_command('irm -f {}'.format(run))
            logger.info('Removed file {} from swestore'.format(run))
Ejemplo n.º 7
0
def cleanup_swestore(days, dry_run=False):
    """Remove archived runs from swestore

    :param int days: Threshold days to check and remove
    """
    days = check_days('swestore', days, config)
    if not days:
        return
    runs = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root'))
    for run in runs:
        date = run.split('_')[0]
        if misc.days_old(date) > days:
            if dry_run:
                logger.info('Will remove file {} from swestore'.format(run))
                continue
            misc.call_external_command('irm -f {}'.format(run))
            logger.info('Removed file {} from swestore'.format(run))
Ejemplo n.º 8
0
def cleanup_swestore(seconds, dry_run=False):
    """Remove archived runs from swestore

    :param int seconds: Days/hours converted as seconds to check
    """
    seconds = check_default(site, seconds, CONFIG)
    if not seconds:
        return
    runs = filesystem.list_runs_in_swestore(
        path=CONFIG.get('cleanup').get('swestore').get('root'))
    for run in runs:
        date = run.split('_')[0]
        if misc.to_seconds(misc.days_old(date)) > seconds:
            if dry_run:
                logger.info('Will remove file {} from swestore'.format(run))
                continue
            misc.call_external_command('irm -f {}'.format(run))
            logger.info('Removed file {} from swestore'.format(run))
Ejemplo n.º 9
0
def get_closed_proj_info(prj, pdoc):
    """check and return a dict if project is closed"""
    pdict = None
    if not pdoc:
        logger.warn("Seems like project {} dont have a proper statudb document, skipping it".format(prj))
    elif "close_date" in pdoc:
        closed_date = pdoc['close_date']
        closed_days = misc.days_old(closed_date, "%Y-%m-%d")
        if closed_days is not None and isinstance(closed_days, int):
            pdict = {'name' : pdoc.get('project_name'),
                     'pid' : pdoc.get('project_id'),
                     'closed_date' : closed_date,
                     'closed_days' : closed_days,
                     'bioinfo_responsible' : pdoc.get('project_summary',{}).get('bioinfo_responsible','')}
        else:
            logger.warn("Problem calculating closed days for project {} with close data {}. Skipping it".format(
                        pdoc.get('project_name'), closed_date))
                     
    return pdict
Ejemplo n.º 10
0
def get_closed_projects(projs, pj_con, days):
    """Takes list of project and gives project list that are closed
    more than given check 'days'

    :param list projs: list of projects to check
    :param obj pj_con: connection object to project database
    :param int days: number of days to check
    """
    closed_projs = []
    for proj in projs:
        if proj not in pj_con.name_view.keys():
            logger.warn("Project {} is not in database, so SKIPPING it..".format(proj))
            continue
        proj_db_obj = pj_con.get_entry(proj)
        try:
            proj_close_date = proj_db_obj["close_date"]
        except KeyError:
            logger.warn("Project {} is either open or too old, so SKIPPING it..".format(proj))
            continue
        if misc.days_old(proj_close_date, date_format="%Y-%m-%d") > days:
            closed_projs.append(proj)
    return closed_projs
Ejemplo n.º 11
0
def get_closed_projects(projs, pj_con, seconds):
    """Takes list of project and gives project list that are closed
    more than given time(as seconds)

    :param list projs: list of projects to check
    :param obj pj_con: connection object to project database
    :param int seconds: Days/hours converted as seconds to check
    """
    closed_projs = []
    for proj in projs:
        if proj not in pj_con.name_view.keys():
            logger.warn("Project {} is not in database, so SKIPPING it.."
                        .format(proj))
            continue
        proj_db_obj = pj_con.get_entry(proj)
        try:
            proj_close_date = proj_db_obj['close_date']
        except KeyError:
            logger.warn("Project {} is either open or too old, so SKIPPING it..".format(proj))
            continue
        if misc.to_seconds(days=misc.days_old(proj_close_date,date_format='%Y-%m-%d')) > seconds:
            closed_projs.append(proj)
    return closed_projs