def save_to(self, path, mirror_prefix=None): fname = path + "/" + self.file if os.path.exists(fname): if md5sum(fname) == self.md5: # file already exists and checksum matches print(fname + " " + self.md5 + " exists, skipping") return True else: raise Exception( "{} exists but checksum does not match {}".format( fname, self.md5)) while 1: # ues mirror if specified url = self.url if mirror_prefix is None else mirror_prefix + self.file try: suc = wget(url, path, retry=5, timeout=5) except KeyboardInterrupt: os.unlink(fname) return False if suc: if md5sum(fname) == self.md5: return True else: print("md5 not match, retrying") continue # md5 not match else: os.unlink(fname) return False
def __init__(self, pathin=None, get_EXIF_data=True, \ get_os_data=True, calc_md5=True, \ get_PIL_size=True, basepath=None, dictin=None): if dictin is not None: for key, value in dictin.iteritems(): setattr(self, key, value) else: #t1 = time.time() self.pathin = pathin self.folder, self.filename = os.path.split(pathin) self.find_relpath() #t2 = time.time() if get_os_data: self.get_os_data() #t3 = time.time() if get_EXIF_data: self.read_EXIF_data() self.EXIF_date_to_attrs() #t4 = time.time() if get_PIL_size: self.get_PIL_size() #t5 = time.time() if calc_md5: self.md5sum = md5sum.md5sum(self.pathin) #t6 = time.time() for attr in empty_attrs: setattr(self, attr, None)
def upload(self, container, folder): cont = self.conn.get_container(container) cont.enable_static_web('index.html') local_files = Sync.list_files(folder) total = len(local_files) for i,local_file in enumerate(local_files): print "#%d of %d"%(i+1,total) name = os.path.relpath(local_file, folder) # print name obj = cont.create_object(name) retry = 5 try: etag = obj.etag logging.debug("Checking md5sum of " + name) checksum = md5sum.md5sum(local_file) logging.debug("Etag: " + etag) logging.debug("MD5SUM: " + checksum) if etag == checksum: print name + " matches! Skipping..." else: obj.load_from_filename(local_file, True, callback) except SSLError, e: retry -= 1 print "Retries left: %d"%(retry) if retry < 0: raise cloudfiles.errors.ResponseError(e)
def putDictlist(self, dictlist): """ Answer to a PUT dictionary command. Used to store a dictionary. """ dictpath = "%s/dictionaries" % (home) files = os.listdir(dictpath) files = [f for f in files if os.path.isfile(os.path.join(dictpath, f))] sdicts = dict() for f in files: md5digest = md5sum(os.path.join(dictpath, f)) sdicts[md5digest] = f toGet = [] print "Downloading dictionaries:" for key in dictlist: if key in sdicts: print " [ ]", dictlist[key] if not sdicts[key][:-4] == dictlist[key]: old = os.path.join(dictpath, sdicts[key]) new = os.path.join(dictpath, dictlist[key]) + ".txt" os.rename(old, new) if not key in sdicts: print " [X]", dictlist[key] toGet.append(dictlist[key]) val = sdicts.values() for dname in toGet: dname += ".txt" check = os.path.join(dictpath, dname) if os.path.isfile(check): os.remove(check) self.sendGet("dictionaries", [toGet])
def getDictlist(self): dictlist = dict() with open("%s/dictionaries" % (dictpath), "r") as f: for line in f: line = line.replace("\n", "") md5digest = md5sum("%s/%s.txt" % (dictpath, line)) dictlist[md5digest] = line self.sendPut("dictlist", [dictlist])
def _compare_checksums(session, vdi, backup): print("Starting to checksum VDI on server side") task = session.xenapi.Async.VDI.checksum(vdi) print("Checksumming local backup") backup_checksum = md5sum.md5sum(backup) print("Waiting for server-side checksum to finish...") checksum = _wait_for_task_result(session=session, task=task) assert backup_checksum == checksum
def get_gen_md5(filepath): ZERO_BYTE_HASH = 'd41d8cd98f00b204e9800998ecf8427e' ONE_BYTE_HASH = '68b329da9893e34099c7d8ad5cb9c940' #!!!just one byte: 0x0A, result of echo > file gen_md5 = md5sum(filepath) if (gen_md5 == ZERO_BYTE_HASH or gen_md5 == ONE_BYTE_HASH): return None return gen_md5
def _compare_checksums(session, vdi, backup): print("Starting to checksum VDI on server side") # VDI.checksum is a hidden call, and therefore should not be used by # clients - it's output, or the checksum algorithm it uses, is not # guaranteed to remain the same task = session.xenapi.Async.VDI.checksum(vdi) print("Checksumming local backup") backup_checksum = md5sum.md5sum(backup) print("Waiting for server-side checksum to finish...") checksum = _wait_for_task_result(session=session, task=task) assert backup_checksum == checksum
def __init__(self, chmpath): assert isinstance(chmpath, unicode) self.md5 = md5sum(chmpath) self.cfghome = os.path.join(home, '.pychmviewer') try: os.mkdir(self.cfghome, 0700) except exceptions.OSError: pass self.cfghome = os.path.join(self.cfghome, self.md5) try: os.mkdir(self.cfghome, 0700) except exceptions.OSError: pass bookmarkpath = os.path.join(self.cfghome, 'bookmark.db') lastconfpath = os.path.join(self.cfghome, 'last.db') self.bookmarkdb = bsddb.hashopen(bookmarkpath) self.lastconfdb = bsddb.hashopen(lastconfpath)
def __init__(self, chmpath): assert isinstance(chmpath, unicode) self.md5 = md5sum(chmpath) self.cfghome = os.path.join(home, ".pychmviewer") try: os.mkdir(self.cfghome, 0700) except exceptions.OSError: pass self.cfghome = os.path.join(self.cfghome, self.md5) try: os.mkdir(self.cfghome, 0700) except exceptions.OSError: pass bookmarkpath = os.path.join(self.cfghome, "bookmark.db") lastconfpath = os.path.join(self.cfghome, "last.db") self.bookmarkdb = bsddb.hashopen(bookmarkpath) self.lastconfdb = bsddb.hashopen(lastconfpath)
def hash_file(dirpath, filename, log): filepath = dirpath + '/' + filename md5filename = filepath + '.md5' if (not os.path.exists(md5filename) and os.path.getsize(filepath) > 1): if (ok_to_process('hash')): filemd5 = md5sum(filepath) md5file = open(md5filename, 'w') md5file.write(filemd5) md5file.close() log['md5'] = filemd5 log['fsop'] = 'HASH' global total_tasks total_tasks += 1 print(' md5=' + filemd5) print(md5filename) else: log['warnings'].append( 'user prevented further processing. exiting.')
import os import sys from md5sum import md5sum filedict = {} for p, dirs, files in os.walk(sys.argv[1]): for f in files: fp = os.path.join(p,f) md5 = md5sum(fp) if md5 in filedict: filedict[md5].append(fp) else: filedict[md5] = [fp] for k in filedict: if len(filedict[k]) > 1: print k,','.join(filedict[k])