def vprint(*text,**kwa): """ use similar to python3 print function additional argument mv or min_verbosity and append (to know whether append to file or not) """ kwa.update({"file":file_to_print_to,"verbosity":verbose}) v_print(*text,**kwa)
def stage(file_ls,source_base,target_base,mode,run_type='auto',ignore_ls=None,job_fn=None,out_fn=None,name=None,afterok=None,afterany=None,startonhold=False,verbose=0,file_to_print_to=None): #print("file_ls:",file_ls) # this function should only run on dmn, # where all file systems are seen #empty the file to print to (usually things are appended if file_to_print_to is not None: v_print("",file=file_to_print_to,append=False) #prints to stdout if file_to_print_to is None def vprint(*text,**kwa): """ use similar to python3 print function additional argument mv or min_verbosity and append (to know whether append to file or not) """ kwa.update({"file":file_to_print_to,"verbosity":verbose}) v_print(*text,**kwa) def add_if_newer(file,file_ls): #print(file) source_time = os.path.getmtime(os.path.join(source_base,file)) #vprint("source:",os.path.join(source_base,file), source_time,mv=1) try: target_time = os.path.getmtime(os.path.join(target_base,file)) #vprint("target:",os.path.join(target_base,file),target_time,mv=1) #The int conversion is necessary, cause the if int(source_time) > int(target_time): file_ls.append(file) vprint("Staging, source newer:"+file,mv=1) else: vprint("Not staging, not newer on source: "+file,mv=1) except OSError: file_ls.append(file) vprint("Staging, not exist on target:"+file,mv=1) def add_if_not_exist(file,file_ls): if not os.path.exists(os.path.join(os.path.expanduser(target_base),file)): file_ls.append(file) def add_if(file,file_ls): for ign in ignore_ls: #try: if ign in file: vprint("Not staging, matching ignore pattern {}: ".format(ign)+file,mv=1) return #except: # print("ign, file:",ign, file) # raise if mode == "newer": add_if_newer(file,file_ls) elif mode == "non-exist": add_if_not_exist(file,file_ls) elif mode == force: file_ls.append(file) vprint("output of",__file__,mv=10) #verboseprint = print if verbose else lambda *a, **k: None if afterany is None: afterany = [] if afterok is None: afterok = [] if ignore_ls is None: ignore_ls = [] host = socket.gethostname() if 'dmn' not in host: warnings.warn("This script should be run on mendel data mover nodes where all file-systems are seen. However, filename is {}".format(host),UserWarning) source_base = os.path.expanduser(source_base) target_base = os.path.expanduser(target_base) # attention, this is now duplicated (here and in the write_jobscript function), find a solution for this time = datetime.datetime.now().strftime("%Y%m%d-%H.%M%S") if job_fn is None: job_fn = os.path.expanduser("~/vervet_project/staging/jobscript/stage_{}.sh".format(time)) if out_fn is None: out_fn = os.path.expanduser("~/vervet_project/staging/log/stage_{}".format(time)) # all staging modes should preserve timestamp modes = ['non-exist','newer','force'] if mode not in modes: raise ValueError('stage_mode must be in {}'.format(modes)) # if mode == "non-exist": # #remove files from file-list that exist on target # nonexist_file_ls = [file for file in file_ls if not os.path.exists(os.path.join(os.path.expanduser(target_base),file))] # file_ls = nonexist_file_ls # # #print('before:',file_ls) # vprint("staging form '" +source_base+"' to '" + target_base + "'",mv=1) # vprint("staging mode: "+mode,mv=1) # vprint("run type: "+run_type,mv=1) # n_files = 0 # if mode == "newer":# and (run_type == 'direct' or run_type == 'auto'): # #remove files from file-list that are newer on target than on source # newer_on_source = [] # for file in file_ls: # #print('from filelist:',file) # if os.path.isdir(os.path.join(source_base,file)): # #print('is a dir:',file) # for root, _, fs in os.walk(os.path.join(source_base,file)): # #print('fs:',fs) # for f in fs: # #print('f:',f) # n_files+=1 # add_if_newer(os.path.join(root[len(source_base)+1:],f),newer_on_source) # else: # n_files += 1 # add_if_newer(file,newer_on_source) # vprint("Staging " + str(len(newer_on_source)) + " out of " + str(n_files) + " files." ,mv=1) # file_ls = newer_on_source #walk through the directories and decide whether to add the files #TODO: incorporate the size check into the function here! Then only one loop is necessary... n_files = 0 retained_files = [] for file in file_ls: #print("file:",file) if os.path.isdir(os.path.join(source_base,file)): for root, _, fs in os.walk(os.path.join(source_base,file)): #print('fs:',fs) for f in fs: #print('f:',f) n_files+=1 rel_path = root[len(source_base)+1:] add_if(os.path.join(rel_path,f),retained_files) else: n_files += 1 add_if(file,retained_files) vprint("Staging " + str(len(retained_files)) + " out of " + str(n_files) + " files." ,mv=1) file_ls = retained_files if not file_ls: vprint("Nothing to stage in mode {0}".format(mode)) return (None, None, 0) sizes = [] for file in file_ls: try: sizes.append(os.path.getsize(os.path.join(source_base,file))) except OSError, e: vprint(warnings.warn("Can't check size of file. Does not exist. Copy operation might not be optimised. "+str(e),UserWarning),mv=1)
def local_prepare_staging(file_ls_or_fnfname,partner,direction,mode,run_type='auto',afterok=None,afterany=None,startonhold=False,job_fn=None,out_fn=None,job_name=None,verbose=0,project='vervet',file_to_print_to=None): """ this is run anywhere (lws12, mendel login or dmn) and establishes ssh connection to dmn where staging proceeds file_ls_or_fnfname is either a list of files to stage or the path to a file that contains the filenames to stage """ #empty the file to print to (usually things are appended if file_to_print_to is not None: v_print("",file=file_to_print_to,append=False) #prints to stdout if file_to_print_to is None def vprint(*text,**kwa): """ use similar to python3 print function additional argument mv or min_verbosity and append (to know whether append to file or not) """ kwa.update({"file":file_to_print_to,"verbosity":verbose}) v_print(*text,**kwa) vprint(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+" - running local_prepare_staging in stage.py",mv=1) if afterany is None: afterany = [] if afterok is None: afterok = [] if type(file_ls_or_fnfname) is str: with open(file_ls_or_fnfname,'r') as f: file_ls = [s.strip() for s in f.readlines()] elif type(file_ls_or_fnfname) is list: file_ls = file_ls_or_fnfname else: raise TypeError('First argument should be list of filenames or path to a file that contains filenames. But it is {0}:{1}'.format(type(file_ls_or_fnfname),file_ls_or_fnfname)) # sanity check for input partners = ['lab','scratch'] if partner not in partners: raise ValueError('staging partner (source/destination other than /project) must be in {}'.format(stage_partners)) host = socket.gethostname() directions = ['in','out'] if direction not in directions: raise ValueError('direction must be in {}'.format(directions)) modes = ['non-exist','newer','force'] if mode not in modes: raise ValueError('stage_mode must be in {}'.format(modes)) project_base = os.path.join("~/",project + '_project') scratch_base = os.path.join("~/",project + '_' +partner) if direction == 'in': source_base = project_base destination_base = scratch_base elif direction == 'out': source_base = scratch_base destination_base = project_base if partner == 'scratch' and 'login' not in host and 'dmn' not in host: raise Exception('staging to scratch only implemented when running on mendel. Your host name is {}'.format(host)) if mode == "non-exist": #check wether files exist locally nonexist_file_ls = [file for file in file_ls if not os.path.exists(os.path.join(os.path.expanduser(destination_base),file))] file_ls = nonexist_file_ls if not file_ls: vprint("Nothing to stage in mode {0}".format(mode),mv=0) return (None, None,0) command = "dmn_stage.py -m {mode} -t {run_type} {source_base} {target_base} {files}".format(mode=mode,run_type=run_type,source_base=source_base,target_base=destination_base,files=' '.join(file_ls)) command += " -v " + str(verbose) if job_fn is not None: command += " -j " + job_fn if out_fn is not None: command += " -o " + out_fn if job_name is not None: command += " -n " + job_name if afterok: command += " --afterok {}".format(' '.join(afterok)) if afterany: command += " --afterany {}".format(' '.join(afterany)) if startonhold: command += " -H" if file_to_print_to is not None: command += " -l {}.still_using_stagepy".format(file_to_print_to) #print(command) if 'dmn' in host: vprint('command:',mv=1) p = subprocess.Popen(command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE) else: vprint('command submitted to dmn via ssh dmn.mendel.gmi.oeaw.ac.at nohup <command>:',mv=1) p = subprocess.Popen("ssh dmn.mendel.gmi.oeaw.ac.at nohup {0}".format(command), shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE) out, err = p.communicate() rc = p.returncode vprint(command,mv=1) #if run_type != 'submit': # if out is not None: # print('dmn_stage.py','out:',out, file=sys.stdout) # if err is not None: # print('dmn_stage.py','err:',err, file=sys.stderr) vprint('dmn_stage.py out: ' + out.strip(),mv=1) vprint('dmn_stage.py err: ' + err.strip(),mv=1) return out, err, rc