def process_log(muted, sims_dir= ''): ''' verify directories indicated in log exist in given directory. ''' available= get_available_muts(muted) ### cleaning data set ### i.e. accounting for aborted runs. available, miss_data= check_availability(available, dir_check=sims_dir) available, empty= clean_empty(available,str_format= '',dir_check= sims_dir,requested= ['.vcf.gz']) return available
def process_dir(sims_dir= ''): ''' verify directories indicated in log exist in given directory. ''' available= [name for name in os.listdir(sims_dir)] ### cleaning data set ### i.e. accounting for aborted runs. available, miss_data= check_availability(available, dir_check=sims_dir) available, empty= clean_empty(available,str_format= '',dir_check= sims_dir,requested= ['.vcf.gz','fa.gz']) print('missing: {}, no vcf: {}'.format(len(miss_data),len(empty))) return available