def hashAndAdd(file): # Check if it's a valid MP3 file first by trying to get the ID3 info try: title, artist, album = mp3.getid3(file) except Exception as e: # So far the only exception is an invalid ID3 header found, so not much to grab print(e) return mtime = os.path.getmtime(file) (exists,dbmtime) = db.checkIfExists(dbcursor, unicode(str(os.path.abspath(file)).decode('utf-8'))) update = False # Gets back a tuple with (count of rows, mtime) # Check if the file has already been hashed if exists > 0: # If the file hasn't been modified since it was checked, don't bother hashing it if dbmtime >= mtime: return else: # Need to come up with an update statement... print("Updating", file) update = True tempfile = mp3.stripid3(file) strippedhash = hash.sha512file(tempfile[1]) os.close(tempfile[0]) os.remove(tempfile[1]) originalhash = hash.sha512file(file) info = mp3info(title, artist, album, unicode(strippedhash), unicode(originalhash), unicode(str(os.path.abspath(file)).decode('utf-8')), mtime) if not update: print(info,"Ins") db.insertIntoDB(dbcursor, info) else: #print(info,"upd") db.updateDB(dbcursor, info) dbconn.commit()
def hashMP4(filename): file = open(filename, 'rb') try: if not mp4.isMP4(file): return hash.sha512file(file) except Exception as e: # So far the only exception is an invalid MP4 header found, so not much to grab print(e) tempfile = mp4.stripMetadata(file) #print os.path.exists(tempfile.name) hashresult = hash.sha512file(tempfile.name) #print hashresult #print tempfile tempfile.close() os.remove(tempfile.name) return hashresult
def hashAndAdd(file): mtime = os.path.getmtime(file) (exists,dbmtime) = db.checkIfExists(dbcursor, unicode(str(os.path.abspath(file)).decode('utf-8'))) update = False # Gets back a tuple with (count of rows, mtime) # Check if the file has already been hashed if exists > 0: # If the file hasn't been modified since it was checked, don't bother hashing it if dbmtime >= mtime: return else: # Need to come up with an update statement... print("Updating", file) update = True hashresult = hash.sha512file(file) info = fileinfo(unicode(hashresult), unicode(str(os.path.abspath(file)).decode('utf-8')), mtime) if not update: print(info,"Ins") db.insertIntoDB(dbcursor, info) else: print(info,"upd") db.updateDB(dbcursor, info) dbconn.commit()
if hash1 == hash2: print "Removing " + dup + "!" os.remove(dup) """ src = os.path.abspath(join(root,filename)) # Need to find some way to recurse directories in sync with src dst = os.path.abspath(join(sys.argv[2],filename)) if not os.path.isfile(dst): shutil.move(src, dst) print("Moved {0} to {1}").format(src, dst) else: if re.search(".mp4",filename,re.IGNORECASE): srchash = hashMP4(src) dsthash = hashMP4(dst) else: srchash = hash.sha512file(src) dsthash = hash.sha512file(dst) if srchash == dsthash: print("{0} and {1} are identical. Deleting {2}.").format(src, dst, src) os.remove(src) else: filebase = os.path.splitext(os.path.split(src)[1])[0] fileext = os.path.splitext(os.path.split(src)[1])[1] count = 2 while os.path.isfile(os.path.abspath(join(sys.argv[2],filebase + " " + str(count) + fileext))): count = count + 1 newdst = os.path.abspath(join(sys.argv[2],filebase + " " + str(count) + fileext)) print("{0} and {1} are not identical. Renaming {2} to {3} and moving.").format(src, dst, src, newdst) #shutil.move(src, newdst) # Close the cursor & commit the DB one last time just for good measure #dbcursor.close()
for root, subfolders, files in os.walk(source_dir): # Since root contains the working folder, and we'll move onto subfolders later, # We only care about the filename (null, path, pathsuffix) = root.rpartition(source_dir) dup_folder = os.path.normpath(compare_dir + "/" + pathsuffix) # Mention what path we're working in. print("Comparing: %s" % os.path.abspath(root).encode("utf-8")) print("To: %s" % os.path.abspath(dup_folder).encode("utf-8")) for filename in files: # If is does, hash & add it to the db #hashAndAdd(os.path.abspath(join(root,filename))) dup = os.path.abspath(dup_folder + "/" + filename) filename = join(root,filename) if os.path.exists(dup): if re.search(".m4a",os.path.splitext(filename)[1],re.IGNORECASE) and re.search(".m4a",os.path.splitext(dup)[1],re.IGNORECASE): hash1 = hashMP4(filename) hash2 = hashMP4(dup) else: hash1 = hash.sha512file(filename) hash2 = hash.sha512file(dup) if hash1 == hash2: print("%s:\n %s" % (os.path.abspath(filename).encode("utf-8"), hash1)) print("%s:\n %s" % (os.path.abspath(dup).encode("utf-8"), hash2)) deleted_files = deleted_files + 1 space_saved = space_saved + os.path.getsize(dup) print("[%s] Removing %s" % (str(deleted_files), dup.encode("utf-8"))) os.remove(dup) if os.path.exists(dup_folder) and not os.listdir(dup_folder): os.rmdir(dup_folder) print("Deleted " + str(deleted_files) + ", saving " + str(space_saved) + " bytes of space")