def compare_files(db,rds,v,admin): ''' Compare files of remote and local version of a dataset :argument rds: dictionary of remote dataset object selected attributes :argument v: local version object :return: result set, NB updating VerisonFiles object in databse if calculating checksums ''' extra=None local_files_num=len(v.files) # if there are no files on db for local version add them if v.files==[]: rows=[] for f in v.build_filepaths(): rows.append(dict(filename=f.split("/")[-1], version_id=v.id)) if admin: add_bulk_items(db, VersionFile, rows) else: for r in rows: write_log("new file "+ str(r) + "\n") local_files_num=len(rows) # first compare tracking_ids if all are present in local version # if a file is INVALID or missing skip both tracking-id and checksums comparison local_ids=[x for x in v.tracking_ids() if x not in [None,""]] if "INVALID" not in local_ids or local_files_num != len(rds['files']): return extra if len(local_ids)>0: extra = compare_tracking_ids(rds['tracking_ids'],local_ids) # calculate checksums and update local db if necessary # uncomment this to check also if tracking_ids are the same #if extra is None or extra==set([]): # if tracking_ids are not present compare checksums if extra is None: local_sums=[] cktype=str(rds['checksum_type']).lower() for f in v.files: try: cksum=f.__dict__[cktype] except (TypeError, KeyError): #print("type or key error ",cktype) cksum=None if cksum in ["",None]: cksum=check_hash(v.path+"/"+f.filename,cktype) if admin: update_item(db,VersionFile,f.id,{cktype:cksum}) else: write_log(" ".join([cktype,str(f.id),cksum,"\n"])) local_sums.append(cksum) extra = compare_checksums(rds['checksums'],local_sums) return extra
def compare_files(db,rds,v,admin): ''' Compare files of remote and local version of a dataset :argument rds: dictionary of remote dataset object selected attributes :argument v: local version object :return: result set, NB updating VerisonFiles object in databse if calculating checksums ''' extra=set([]) # if there are no files on db for local version add them if v.filenames()==[]: rows=[] for f in v.build_filepaths(): checksum=check_hash(f,'sha256') rows.append(dict(filename=f.split("/")[-1], sha256=checksum, version_id=v.id)) if admin: add_bulk_items(db, VersionFile, rows) else: for r in rows: write_log("new file "+ str(r) + "\n") # first compare tracking_ids if all are present in local version local_ids=v.tracking_ids() if (local_ids and "" not in local_ids): extra = compare_tracking_ids(rds['tracking_ids'],local_ids) # if tracking_ids are the same or if they are not present compare checksums # calculate checksums and update local db if necessary if extra==set([]): local_sums=[] if rds['checksum_type'] in ['md5','MD5']: for f in v.files: if f.md5 in ["", None]: f.md5 = check_hash(v.path+"/"+f.filename,'md5') if admin: update_item(db,VersionFile,f.id,{'md5':f.md5}) else: write_log(" ".join(['md5',str(f.id),f.md5,"\n"])) local_sums.append(f.md5) else: for f in v.files: if f.sha256 in ["",None]: f.sha256=check_hash(v.path+"/"+f.filename,'sha256') if admin: update_item(db,VersionFile,f.id,{'sha256':f.sha256}) else: write_log(" ".join(['sha256',str(f.id),f.sha256,"\n"])) local_sums.append(f.sha256) extra = compare_checksums(rds['checksums'],local_sums) return extra
print(inst_obj.id,new) #P use following two lines if tmp/tree #kw_version['version'] = find_version(bits[:-1], version) #kw_version['path'] = '/'.join(bits[:-1]) kw_version['instance_id'] = inst_obj.id for v in versions: # add version to db if not already existing kw_version['version'] = v files = list_drs_files(inst+"/"+v) kw_version['path'] = tree_path("/".join([inst,v,files[0]])) #print(kw_version.items()) v_obj,new = insert_unique(db, Version, **kw_version) print(v) print(v_obj.id,new) if v_obj.filenames==[]: rows=[] for f in files: checksum=check_hash(v_obj.path+"/"+f,'md5') rows.append(dict(filename=f, md5=checksum, version_id=v_obj.id)) add_bulk_items(db, VersionFile, rows) else: kw_files['version_id']=v_obj.id for f in files: kw_files['filename']=f kw_files['md5']=check_hash(v_obj.path+"/"+f,'md5') insert_unique(db, VersionFile, **kw_files) # need to have function to map bits of path to db instance fields!! #model,experiment,variable,mip,ensemble #kwargs[k]=
fversion=check_version(vers_path+"/"+fpaths[0]) if fversion: kw_version['version']= fversion else: kw_version['version']= "NA" # add instance to database if does not exist yet inst_obj,new = insert_unique(db, Instance, **kw_instance) # create dictionary of fields for new version kw_version['instance_id'] = inst_obj.id # add version to database if does not exist yet v_obj,new = insert_unique(db, Version, **kw_version) # check if files objects exist already if not add from files dictionary # add both tracking-ids and checksums, i checksums are "None" calculate sha256 for i,f in enumerate(kw_files): if f['checksum']=="None": kw_files[i][ctype]=check_hash(v_obj.path+"/"+f['filename'],ctype) f.pop('checksum') else: kw_files[i][ctype]=f.pop('checksum') if f['tracking_id']=="": kw_files[i]['tracking_id']=get_trackid(v_obj.path+"/"+f['filename']) kw_files[i]['version_id']=v_obj.id # add files to database with bulk insert if v_obj.filenames()==[]: add_bulk_items(db, VersionFile, kw_files) # if some files exist already use insert_unique instead else: for i,f in enumerate(kw_files): insert_unique(db, VersionFile, **f)