Exemple #1
0
def compare_files(db,rds,v,admin):
    ''' Compare files of remote and local version of a dataset
        :argument rds: dictionary of remote dataset object selected attributes  
        :argument v:  local version object   
        :return: result set, NB updating VerisonFiles object in databse if calculating checksums 
    '''
    extra=None
    local_files_num=len(v.files)
    # if there are no files on db for local version add them
    if v.files==[]:
        rows=[]
        for f in v.build_filepaths():
            rows.append(dict(filename=f.split("/")[-1], version_id=v.id))
        if admin:   
            add_bulk_items(db, VersionFile, rows)
        else:
            for r in rows:
                write_log("new file "+ str(r) + "\n")
        local_files_num=len(rows)
    # first compare tracking_ids if all are present in local version
    # if a file is INVALID or missing skip both tracking-id and checksums comparison
    local_ids=[x for x in v.tracking_ids() if x not in [None,""]]
    if "INVALID" not in local_ids or local_files_num != len(rds['files']):
        return extra
    if len(local_ids)>0:
        extra = compare_tracking_ids(rds['tracking_ids'],local_ids)
    # calculate checksums and update local db if necessary  
    # uncomment this to check also if tracking_ids are the same
    #if extra is None or  extra==set([]):
    # if tracking_ids are not present compare checksums
    if extra is None:
        local_sums=[]
        cktype=str(rds['checksum_type']).lower()
        for f in v.files:
            try:
                cksum=f.__dict__[cktype] 
            except (TypeError, KeyError):
                #print("type or key error ",cktype)
                cksum=None
            if cksum in ["",None]:
                cksum=check_hash(v.path+"/"+f.filename,cktype)
                if admin: 
                    update_item(db,VersionFile,f.id,{cktype:cksum})
                else:
                    write_log(" ".join([cktype,str(f.id),cksum,"\n"]))
            local_sums.append(cksum) 
        extra = compare_checksums(rds['checksums'],local_sums)
    return extra 
def compare_files(db,rds,v,admin):
    ''' Compare files of remote and local version of a dataset
        :argument rds: dictionary of remote dataset object selected attributes  
        :argument v:  local version object   
        :return: result set, NB updating VerisonFiles object in databse if calculating checksums 
    '''
    extra=set([])
    # if there are no files on db for local version add them
    if v.filenames()==[]:
        rows=[]
        for f in v.build_filepaths():
            checksum=check_hash(f,'sha256')
            rows.append(dict(filename=f.split("/")[-1], sha256=checksum, version_id=v.id))
        if admin:   
            add_bulk_items(db, VersionFile, rows)
        else:
            for r in rows:
                write_log("new file "+ str(r) + "\n")
    # first compare tracking_ids if all are present in local version
    local_ids=v.tracking_ids()
    if (local_ids and "" not in local_ids):
        extra = compare_tracking_ids(rds['tracking_ids'],local_ids)
    # if tracking_ids are the same or if they are not present compare checksums
    # calculate checksums and update local db if necessary  
    if extra==set([]):
        local_sums=[]
        if rds['checksum_type'] in ['md5','MD5']:
            for f in v.files:
                if f.md5 in ["", None]:
                    f.md5 = check_hash(v.path+"/"+f.filename,'md5')
                    if admin: 
                        update_item(db,VersionFile,f.id,{'md5':f.md5})
                    else:
                        write_log(" ".join(['md5',str(f.id),f.md5,"\n"]))
                local_sums.append(f.md5) 
        else:
            for f in v.files:
                if f.sha256 in ["",None]:
                    f.sha256=check_hash(v.path+"/"+f.filename,'sha256')
                    if admin: 
                        update_item(db,VersionFile,f.id,{'sha256':f.sha256})
                    else:
                        write_log(" ".join(['sha256',str(f.id),f.sha256,"\n"]))
                local_sums.append(f.sha256) 
        extra = compare_checksums(rds['checksums'],local_sums)
    return extra 
        print(inst_obj.id,new)
        #P use following two lines  if tmp/tree
        #kw_version['version'] = find_version(bits[:-1], version)
        #kw_version['path'] = '/'.join(bits[:-1])
        kw_version['instance_id'] = inst_obj.id
        for v in versions:
            # add version to db if not already existing
            kw_version['version'] = v
            files = list_drs_files(inst+"/"+v) 
            kw_version['path'] = tree_path("/".join([inst,v,files[0]])) 
            #print(kw_version.items())
            v_obj,new = insert_unique(db, Version, **kw_version)
            print(v)
            print(v_obj.id,new)
            if v_obj.filenames==[]: 
                rows=[]
                for f in files:
                    checksum=check_hash(v_obj.path+"/"+f,'md5')
                    rows.append(dict(filename=f, md5=checksum, version_id=v_obj.id))
                    add_bulk_items(db, VersionFile, rows)
            else:
                kw_files['version_id']=v_obj.id
                for f in files:
                    kw_files['filename']=f
                    kw_files['md5']=check_hash(v_obj.path+"/"+f,'md5')
                    insert_unique(db, VersionFile, **kw_files)

# need to have function to map bits of path to db instance fields!!
    #model,experiment,variable,mip,ensemble
    #kwargs[k]=
Exemple #4
0
        fversion=check_version(vers_path+"/"+fpaths[0])
        if fversion: 
            kw_version['version']= fversion
        else:
            kw_version['version']= "NA" 
# add instance to database if does not exist yet
    inst_obj,new = insert_unique(db, Instance, **kw_instance)
# create dictionary of fields for new version
    kw_version['instance_id'] = inst_obj.id
# add version to database if does not exist yet
    v_obj,new = insert_unique(db, Version, **kw_version)
# check if files objects exist already if not add from files dictionary 
# add both tracking-ids and checksums, i checksums are "None" calculate sha256
    for i,f in enumerate(kw_files):
        if f['checksum']=="None":
            kw_files[i][ctype]=check_hash(v_obj.path+"/"+f['filename'],ctype)
            f.pop('checksum')
        else:
            kw_files[i][ctype]=f.pop('checksum')
        if f['tracking_id']=="":
            kw_files[i]['tracking_id']=get_trackid(v_obj.path+"/"+f['filename'])
        kw_files[i]['version_id']=v_obj.id
# add files to database with bulk insert
    if v_obj.filenames()==[]: 
        add_bulk_items(db, VersionFile, kw_files)
# if some files exist already use insert_unique instead 
    else:
        for i,f in enumerate(kw_files):
            insert_unique(db, VersionFile, **f)