Ejemplo n.º 1
0
 def vprint(*text,**kwa):
     """
     use similar to python3 print function
     additional argument mv or min_verbosity
     and append (to know whether append to file or not)
     """
     kwa.update({"file":file_to_print_to,"verbosity":verbose})
     v_print(*text,**kwa)
Ejemplo n.º 2
0
def stage(file_ls,source_base,target_base,mode,run_type='auto',ignore_ls=None,job_fn=None,out_fn=None,name=None,afterok=None,afterany=None,startonhold=False,verbose=0,file_to_print_to=None):
    #print("file_ls:",file_ls)
    # this function should only run on dmn,
    # where all file systems are seen
    #empty the file to print to (usually things are appended
    if file_to_print_to is not None:
        v_print("",file=file_to_print_to,append=False)
    #prints to stdout if file_to_print_to is None
    def vprint(*text,**kwa):
        """
        use similar to python3 print function
        additional argument mv or min_verbosity
        and append (to know whether append to file or not)
        """
        kwa.update({"file":file_to_print_to,"verbosity":verbose})
        v_print(*text,**kwa)

    def add_if_newer(file,file_ls):
        #print(file)
        source_time = os.path.getmtime(os.path.join(source_base,file))
        #vprint("source:",os.path.join(source_base,file), source_time,mv=1)
        try:
            target_time = os.path.getmtime(os.path.join(target_base,file))
            #vprint("target:",os.path.join(target_base,file),target_time,mv=1)
            #The int conversion is necessary, cause the 
            if int(source_time) > int(target_time):
                file_ls.append(file)
                vprint("Staging, source newer:"+file,mv=1)
            else:
                vprint("Not staging, not newer on source: "+file,mv=1)
        except OSError:
            file_ls.append(file)
            vprint("Staging, not exist on target:"+file,mv=1)
    
    def add_if_not_exist(file,file_ls):
        if not os.path.exists(os.path.join(os.path.expanduser(target_base),file)):
            file_ls.append(file)

    def add_if(file,file_ls):
        for ign in ignore_ls:
            #try:
            if ign in file:
                vprint("Not staging, matching ignore pattern {}: ".format(ign)+file,mv=1)
                return
            #except:
            #    print("ign, file:",ign, file)
            #    raise
        if mode == "newer":
            add_if_newer(file,file_ls)
        elif mode == "non-exist":
            add_if_not_exist(file,file_ls)
        elif mode == force:
            file_ls.append(file)

    vprint("output of",__file__,mv=10)


    #verboseprint = print if verbose else lambda *a, **k: None

    if afterany is None:
        afterany = []
    if afterok is None:
        afterok = []
    if ignore_ls is None:
        ignore_ls = []

    host = socket.gethostname()
    if 'dmn' not in host:
        warnings.warn("This script should be run on mendel data mover nodes where all file-systems are seen. However, filename is {}".format(host),UserWarning)

    source_base = os.path.expanduser(source_base)
    target_base = os.path.expanduser(target_base)    
    
    # attention, this is now duplicated (here and in the write_jobscript function), find a solution for this
    time = datetime.datetime.now().strftime("%Y%m%d-%H.%M%S")
    if job_fn is None:
        job_fn = os.path.expanduser("~/vervet_project/staging/jobscript/stage_{}.sh".format(time))
    if out_fn is None:
        out_fn = os.path.expanduser("~/vervet_project/staging/log/stage_{}".format(time))
     
    # all staging modes should preserve timestamp
    modes = ['non-exist','newer','force']
    if mode not in modes:
        raise ValueError('stage_mode must be in {}'.format(modes))
    
#    if mode == "non-exist":
#        #remove files from file-list that exist on target
#        nonexist_file_ls = [file for file in file_ls if not os.path.exists(os.path.join(os.path.expanduser(target_base),file))]
#        file_ls = nonexist_file_ls
#    
#    #print('before:',file_ls)    
#    vprint("staging form '" +source_base+"' to '" + target_base + "'",mv=1)
#    vprint("staging mode: "+mode,mv=1)
#    vprint("run type: "+run_type,mv=1)
#    n_files = 0
#    if mode == "newer":# and (run_type == 'direct' or run_type == 'auto'):
#        #remove files from file-list that are newer on target than on source
#        newer_on_source = []
#        for file in file_ls:
#            #print('from filelist:',file)
#            if os.path.isdir(os.path.join(source_base,file)):
#                #print('is a dir:',file)
#                for root, _, fs in os.walk(os.path.join(source_base,file)):
#                    #print('fs:',fs)
#                    for f in fs:
#                        #print('f:',f)
#                        n_files+=1
#                        add_if_newer(os.path.join(root[len(source_base)+1:],f),newer_on_source)
#            else:
#                n_files += 1
#                add_if_newer(file,newer_on_source)
#        vprint("Staging " + str(len(newer_on_source)) + " out of " + str(n_files) + " files." ,mv=1)
#        file_ls = newer_on_source
    

    #walk through the directories and decide whether to add the files
    #TODO: incorporate the size check into the function here! Then only one loop is necessary...
    n_files = 0
    retained_files = []
    for file in file_ls:
        #print("file:",file)
        if os.path.isdir(os.path.join(source_base,file)):   
            for root, _, fs in os.walk(os.path.join(source_base,file)):
                #print('fs:',fs)
                for f in fs:
                    #print('f:',f)
                    n_files+=1
                    rel_path = root[len(source_base)+1:]
                    add_if(os.path.join(rel_path,f),retained_files)
        else:
            n_files += 1
            add_if(file,retained_files)         
    vprint("Staging " + str(len(retained_files)) + " out of " + str(n_files) + " files." ,mv=1)
    file_ls = retained_files

    if not file_ls:
        vprint("Nothing to stage in mode {0}".format(mode))
        return (None, None, 0)
        

    sizes = []
    for file in file_ls:
        try:
            sizes.append(os.path.getsize(os.path.join(source_base,file)))
        except OSError, e:
            vprint(warnings.warn("Can't check size of file. Does not exist. Copy operation might not be optimised. "+str(e),UserWarning),mv=1)
Ejemplo n.º 3
0
def local_prepare_staging(file_ls_or_fnfname,partner,direction,mode,run_type='auto',afterok=None,afterany=None,startonhold=False,job_fn=None,out_fn=None,job_name=None,verbose=0,project='vervet',file_to_print_to=None):
    """
    this is run anywhere (lws12, mendel login or dmn) and establishes ssh connection to dmn 
    where staging proceeds
    file_ls_or_fnfname is either a list of files to stage or the path to a file that contains the filenames to stage
    """
    #empty the file to print to (usually things are appended
    if file_to_print_to is not None:
        v_print("",file=file_to_print_to,append=False)
        #prints to stdout if file_to_print_to is None
    def vprint(*text,**kwa):
        """
        use similar to python3 print function
        additional argument mv or min_verbosity
        and append (to know whether append to file or not)
        """
        kwa.update({"file":file_to_print_to,"verbosity":verbose})
        v_print(*text,**kwa)   

    vprint(datetime.datetime.now().strftime("%Y%m%d-%H%M%S")+" - running local_prepare_staging in stage.py",mv=1)
    
    if afterany is None:
        afterany = []
    if afterok is None:
        afterok = []

    if type(file_ls_or_fnfname) is str:
        with open(file_ls_or_fnfname,'r') as f:
            file_ls = [s.strip() for s in f.readlines()]
    elif type(file_ls_or_fnfname) is list:
        file_ls = file_ls_or_fnfname
    else:
        raise TypeError('First argument should be list of filenames or path to a file that contains filenames. But it is {0}:{1}'.format(type(file_ls_or_fnfname),file_ls_or_fnfname))

    # sanity check for input 
    partners = ['lab','scratch']
    if partner not in partners:
        raise ValueError('staging partner (source/destination other than /project) must be in {}'.format(stage_partners))
    host = socket.gethostname()

    directions = ['in','out']
    if direction not in directions:
        raise ValueError('direction must be in {}'.format(directions))

    modes = ['non-exist','newer','force']
    if mode not in modes:
        raise ValueError('stage_mode must be in {}'.format(modes))
    
    project_base = os.path.join("~/",project + '_project')
    scratch_base = os.path.join("~/",project + '_'  +partner)    

    if direction == 'in':
        source_base = project_base
        destination_base = scratch_base
    elif direction == 'out':
        source_base = scratch_base
        destination_base = project_base
        
    if partner == 'scratch' and 'login' not in host and 'dmn' not in host:
        raise Exception('staging to scratch only implemented when running on mendel. Your host name is {}'.format(host)) 

    if mode == "non-exist":
        #check wether files exist locally
        nonexist_file_ls = [file for file in file_ls if not os.path.exists(os.path.join(os.path.expanduser(destination_base),file))]
        file_ls = nonexist_file_ls
    
    if not file_ls:
        vprint("Nothing to stage in mode {0}".format(mode),mv=0)
        return (None, None,0)


    command = "dmn_stage.py -m {mode} -t {run_type}  {source_base} {target_base} {files}".format(mode=mode,run_type=run_type,source_base=source_base,target_base=destination_base,files=' '.join(file_ls))

    command += " -v " + str(verbose) 

    if job_fn is not None:
        command += " -j " + job_fn

    if out_fn is not None:
        command += " -o " + out_fn

    if job_name is not None:
        command += " -n " + job_name

    if afterok:
        command += " --afterok {}".format(' '.join(afterok))
    if afterany:
        command += " --afterany {}".format(' '.join(afterany))
    if startonhold:
        command += " -H"
    if file_to_print_to is not None:
        command += " -l {}.still_using_stagepy".format(file_to_print_to)
    
    #print(command)

    if 'dmn' in host:
        vprint('command:',mv=1)
        p = subprocess.Popen(command, shell=True,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    else:
        vprint('command submitted to dmn via ssh dmn.mendel.gmi.oeaw.ac.at nohup <command>:',mv=1)
        p = subprocess.Popen("ssh dmn.mendel.gmi.oeaw.ac.at nohup {0}".format(command), shell=True, stdout=subprocess.PIPE,stderr=subprocess.PIPE)
    out, err = p.communicate()
    rc = p.returncode        
    
    
    vprint(command,mv=1)

   #if run_type != 'submit':
   #     if out is not None:
   #         print('dmn_stage.py','out:',out, file=sys.stdout)
   #     if err is not None:    
   #         print('dmn_stage.py','err:',err, file=sys.stderr)

    vprint('dmn_stage.py out: ' + out.strip(),mv=1)
    vprint('dmn_stage.py err: ' + err.strip(),mv=1)
    return out, err, rc