def get_closed_projects(projs, pj_con, seconds): """Takes list of project and gives project list that are closed more than given time(as seconds) :param list projs: list of projects to check :param obj pj_con: connection object to project database :param int seconds: Days/hours converted as seconds to check """ closed_projs = [] for proj in projs: if proj not in pj_con.name_view.keys(): logger.warn( "Project {} is not in database, so SKIPPING it..".format(proj)) continue proj_db_obj = pj_con.get_entry(proj) try: proj_close_date = proj_db_obj['close_date'] except KeyError: logger.warn( "Project {} is either open or too old, so SKIPPING it..". format(proj)) continue if misc.to_seconds(days=misc.days_old( proj_close_date, date_format='%Y-%m-%d')) > seconds: closed_projs.append(proj) return closed_projs
def get_closed_proj_info(prj, pdoc): """check and return a dict if project is closed""" pdict = None if not pdoc: logger.warn( "Seems like project {} dont have a proper statudb document, skipping it" .format(prj)) elif "close_date" in pdoc: closed_date = pdoc['close_date'] closed_days = misc.days_old(closed_date, "%Y-%m-%d") if closed_days is not None and isinstance(closed_days, int): pdict = { 'name': pdoc.get('project_name'), 'pid': pdoc.get('project_id'), 'closed_date': closed_date, 'closed_days': closed_days, 'bioinfo_responsible': pdoc.get('project_summary', {}).get('bioinfo_responsible', '').encode('ascii', 'ignore') } else: logger.warn( "Problem calculating closed days for project {} with close data {}. Skipping it" .format(pdoc.get('project_name'), closed_date)) return pdict
def cleanup_uppmax(site, days, dry_run=False): """Remove project/run that have been closed more than 'days' from the given 'site' on uppmax :param str site: site where the cleanup should be performed :param int days: number of days to check for closed projects """ days = check_days(site, days, config) if not days: return root_dir = CONFIG.get("cleanup").get(site).get("root") deleted_log = CONFIG.get("cleanup").get("deleted_log") assert os.path.exists(os.path.join(root_dir, deleted_log)), "Log directory {} doesn't exist in {}".format( deleted_log, root_dir ) log_file = os.path.join(root_dir, "{fl}/{fl}.log".format(fl=deleted_log)) # make a connection for project db # pcon = statusdb.ProjectSummaryConnection() assert pcon, "Could not connect to project database in StatusDB" if site != "archive": ## work flow for cleaning up illumina/analysis ## projects = [p for p in os.listdir(root_dir) if re.match(filesystem.PROJECT_RE, p)] list_to_delete = get_closed_projects(projects, pcon, days) else: ##work flow for cleaning archive ## list_to_delete = [] archived_in_swestore = filesystem.list_runs_in_swestore( path=CONFIG.get("cleanup").get("swestore").get("root"), no_ext=True ) runs = [r for r in os.listdir(root_dir) if re.match(filesystem.RUN_RE, r)] with filesystem.chdir(root_dir): for run in runs: fc_date = run.split("_")[0] if misc.days_old(fc_date) > days: if run in archived_in_swestore: list_to_delete.append(run) else: logger.warn( "Run {} is older than {} days but not in " "swestore, so SKIPPING".format(run, days) ) ## delete and log for item in list_to_delete: if dry_run: logger.info("Will remove {} from {}".format(item, root_dir)) continue try: shutil.rmtree(os.path.join(root_dir, item)) logger.info("Removed project {} from {}".format(item, root_dir)) with open(log_file, "a") as to_log: to_log.write("{}\t{}\n".format(item, datetime.strftime(datetime.now(), "%Y-%m-%d %H:%M"))) except OSError: logger.warn("Could not remove path {} from {}".format(item, root_dir)) continue
def cleanup_uppmax(site, days, dry_run=False): """Remove project/run that have been closed more than 'days' from the given 'site' on uppmax :param str site: site where the cleanup should be performed :param int days: number of days to check for closed projects """ days = check_days(site, days, config) if not days: return root_dir = CONFIG.get('cleanup').get(site).get('root') deleted_log = CONFIG.get('cleanup').get('deleted_log') assert os.path.exists(os.path.join(root_dir,deleted_log)), "Log directory {} doesn't exist in {}".format(deleted_log,root_dir) log_file = os.path.join(root_dir,"{fl}/{fl}.log".format(fl=deleted_log)) # make a connection for project db # pcon = statusdb.ProjectSummaryConnection() assert pcon, "Could not connect to project database in StatusDB" if site != "archive": ## work flow for cleaning up illumina/analysis ## projects = [ p for p in os.listdir(root_dir) if re.match(filesystem.PROJECT_RE,p) ] list_to_delete = get_closed_projects(projects, pcon, days) else: ##work flow for cleaning archive ## list_to_delete = [] archived_in_swestore = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root'), no_ext=True) runs = [ r for r in os.listdir(root_dir) if re.match(filesystem.RUN_RE,r) ] with filesystem.chdir(root_dir): for run in runs: fc_date = run.split('_')[0] if misc.days_old(fc_date) > days: if run in archived_in_swestore: list_to_delete.append(run) else: logger.warn("Run {} is older than {} days but not in " "swestore, so SKIPPING".format(run, days)) ## delete and log for item in list_to_delete: if dry_run: logger.info('Will remove {} from {}'.format(item,root_dir)) continue try: shutil.rmtree(os.path.join(root_dir,item)) logger.info('Removed project {} from {}'.format(item,root_dir)) with open(log_file,'a') as to_log: to_log.write("{}\t{}\n".format(item,datetime.strftime(datetime.now(),'%Y-%m-%d %H:%M'))) except OSError: logger.warn("Could not remove path {} from {}" .format(item,root_dir)) continue
def cleanup_swestore(days, dry_run=False): """Remove archived runs from swestore :param int days: Threshold days to check and remove """ days = check_days('swestore', days, config) if not days: return runs = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root')) for run in runs: date = run.split('_')[0] if misc.days_old(date) > days: if dry_run: logger.info('Will remove file {} from swestore'.format(run)) continue misc.call_external_command('irm -f {}'.format(run)) logger.info('Removed file {} from swestore'.format(run))
def cleanup_swestore(seconds, dry_run=False): """Remove archived runs from swestore :param int seconds: Days/hours converted as seconds to check """ seconds = check_default(site, seconds, CONFIG) if not seconds: return runs = filesystem.list_runs_in_swestore(path=CONFIG.get('cleanup').get('swestore').get('root')) for run in runs: date = run.split('_')[0] if misc.to_seconds(misc.days_old(date)) > seconds: if dry_run: logger.info('Will remove file {} from swestore'.format(run)) continue misc.call_external_command('irm -f {}'.format(run)) logger.info('Removed file {} from swestore'.format(run))
def cleanup_swestore(seconds, dry_run=False): """Remove archived runs from swestore :param int seconds: Days/hours converted as seconds to check """ seconds = check_default(site, seconds, CONFIG) if not seconds: return runs = filesystem.list_runs_in_swestore( path=CONFIG.get('cleanup').get('swestore').get('root')) for run in runs: date = run.split('_')[0] if misc.to_seconds(misc.days_old(date)) > seconds: if dry_run: logger.info('Will remove file {} from swestore'.format(run)) continue misc.call_external_command('irm -f {}'.format(run)) logger.info('Removed file {} from swestore'.format(run))
def get_closed_proj_info(prj, pdoc): """check and return a dict if project is closed""" pdict = None if not pdoc: logger.warn("Seems like project {} dont have a proper statudb document, skipping it".format(prj)) elif "close_date" in pdoc: closed_date = pdoc['close_date'] closed_days = misc.days_old(closed_date, "%Y-%m-%d") if closed_days is not None and isinstance(closed_days, int): pdict = {'name' : pdoc.get('project_name'), 'pid' : pdoc.get('project_id'), 'closed_date' : closed_date, 'closed_days' : closed_days, 'bioinfo_responsible' : pdoc.get('project_summary',{}).get('bioinfo_responsible','')} else: logger.warn("Problem calculating closed days for project {} with close data {}. Skipping it".format( pdoc.get('project_name'), closed_date)) return pdict
def get_closed_projects(projs, pj_con, days): """Takes list of project and gives project list that are closed more than given check 'days' :param list projs: list of projects to check :param obj pj_con: connection object to project database :param int days: number of days to check """ closed_projs = [] for proj in projs: if proj not in pj_con.name_view.keys(): logger.warn("Project {} is not in database, so SKIPPING it..".format(proj)) continue proj_db_obj = pj_con.get_entry(proj) try: proj_close_date = proj_db_obj["close_date"] except KeyError: logger.warn("Project {} is either open or too old, so SKIPPING it..".format(proj)) continue if misc.days_old(proj_close_date, date_format="%Y-%m-%d") > days: closed_projs.append(proj) return closed_projs
def get_closed_projects(projs, pj_con, seconds): """Takes list of project and gives project list that are closed more than given time(as seconds) :param list projs: list of projects to check :param obj pj_con: connection object to project database :param int seconds: Days/hours converted as seconds to check """ closed_projs = [] for proj in projs: if proj not in pj_con.name_view.keys(): logger.warn("Project {} is not in database, so SKIPPING it.." .format(proj)) continue proj_db_obj = pj_con.get_entry(proj) try: proj_close_date = proj_db_obj['close_date'] except KeyError: logger.warn("Project {} is either open or too old, so SKIPPING it..".format(proj)) continue if misc.to_seconds(days=misc.days_old(proj_close_date,date_format='%Y-%m-%d')) > seconds: closed_projs.append(proj) return closed_projs