def dedup(self, notebook=None): logging.debug("Retrieving note metadata") request = self._createSearchRequest(None, None, notebook, None, None, None) logging.debug(request) evernote = self.getEvernote() out.preloader.setMessage("Retrieving metadata...") result = evernote.findNotes(request, EDAM_USER_NOTES_MAX, False, 0) notes = result.notes logging.debug("First pass, comparing metadata of " + str(len(result.notes)) + " notes") notes_dict = {} for note in notes: # Use note title, contentLength and resource descriptors # as the best "unique" key we can make out of the metadata. # Anything more unique requires us to inspect the content, # which we try to avoid since it requires a per-note API call. # This will create false positives, which we resolve in another pass, # actually inspecting note content of a hopefully smaller # set of potential duplicates. noteId = note.title + " (" + str( note.contentLength) + ") with " + str( note.largestResourceMime) + " (" + str( note.largestResourceSize) + ")" if noteId in notes_dict: notes_dict[noteId].append(note) logging.debug(" note: " + noteId + "\" with guid " + note.guid + " potentially duplicated " + str(len(notes_dict[noteId]))) else: notes_dict[noteId] = [note] # logging.debug(" note: " + noteId # + "\" with guid " + note.guid) all_dups = [ dups for id, dups in notes_dict.iteritems() if len(dups) > 1 ] # list of lists total_dups = sum(map(len, all_dups)) # count total logging.debug("Second pass, testing content among " + str(len(all_dups)) + " groups, " + str(total_dups) + " notes") notes_dict = {} for dup_group in all_dups: for note in dup_group: out.preloader.setMessage("Retrieving content...") self.getEvernote().loadNoteContent(note) md5 = hashlib.md5() md5.update(note.content) noteHash = md5.hexdigest() noteId = md5.hexdigest() + " " + note.title if noteId in notes_dict: notes_dict[noteId].append(note) logging.debug("duplicate \"" + noteId + "\" with guid " + note.guid + ", duplicated " + str(len(notes_dict[noteId]))) else: notes_dict[noteId] = [note] logging.debug("new note \"" + noteId + "\" with guid " + note.guid) all_dups = [ dups for id, dups in notes_dict.iteritems() if len(dups) > 1 ] # list of lists total_dups = sum(map(len, all_dups)) # count total logging.debug("Third pass, deleting " + str(len(all_dups)) + " groups, " + str(total_dups) + " notes") removed_count = 0 for dup_group in all_dups: dup_group.pop() # spare the last one, delete the rest for note in dup_group: removed_count += 1 logging.debug("Deleting \"" + note.title + "\" created " + out.printDate(note.created) + " with guid " + note.guid + " (" + str(removed_count) + "/" + str(total_dups) + ")") out.preloader.setMessage("Removing note...") evernote.removeNote(note.guid) out.successMessage("Removed " + str(removed_count) + " duplicates within " + str(len(result.notes)) + " total notes")
def dedup(self, search=None, tags=None, notebooks=None, date=None, exact_entry=None, content_search=None, with_url=None, count=None, ): request = self._createSearchRequest(search, tags, notebooks, date, exact_entry, content_search) if not count: count = 20 else: count = int(count) logging.debug("Search count: %s", count) createFilter = True if search == "*" else False notes = [] evernote = self.getEvernote() stillDownloadingResults = True while stillDownloadingResults: offset = len(notes) result = evernote.findNotes(request, count, createFilter, offset) notes += result.notes total = result.totalNotes limit = min(total, count) stillDownloadingResults = len(notes) < total and len(notes) < count out.printLine("Downloaded metadata for " + len(result.notes) + " notes (" + len(notes) + "/" + limit + " of " + count + ")") if total == 0: out.failureMessage("Notes have not been found.") return tools.exitErr() notes_dict = {} for note in notes: noteId = note.title + ":" + note.contentHash if noteId in notes_dict: notes_dict[noteId].append(note) out.printLine("found dup! \"" + note.title + "\" with guid " + note.guid + ", duplicated " + len(notes_dict[noteId])) else: notes_dict[noteId] = [note] out.printLine("new note \"" + note.title + "\" with guid " + note.guid) all_dups = [dups for id, dups in notes_dict.iteritems() if len(dups) > 1] # list of lists total_dups = sum(map(len, all_dups)) # count total removed_total = 0 for dup_group in all_dups: group_size = len(dup_group) out.printLine("Deleting " + group_size + " notes titled \"" + dup_group[0].title + "\"") for note in dup_group: removed_total += 1 out.printLine("Deleting \"" + note.title + "\" created " + out.printDate(note.created) + " with guid " + note.guid + " (" + removed_total + "/" + total_dups + ")") evernote.removeNote(note.guid) out.printLine("removed " + removed_total + "duplicates")
def dedup(self, search=None, tag=None, notebook=None, date=None, exact_entry=None, content_search=None, with_url=None, count=None): request = self._createSearchRequest(search, tag, notebook, date, exact_entry, content_search) if not count: count = 20 else: count = int(count) logging.debug("Search count: %s", count) createFilter = True if search == "*" else False notes = [] evernote = self.getEvernote() stillDownloadingResults = True while stillDownloadingResults: offset = len(notes) result = evernote.findNotes(request, count, createFilter, offset) notes += result.notes total = result.totalNotes limit = min(total, count) stillDownloadingResults = len(notes) < total and len(notes) < count out.printLine("Downloaded metadata for " + str(len(result.notes)) + " notes (" + str(len(notes)) + "/" + str(limit) + " of " + str(count) + ")") if total == 0: out.failureMessage("Notes have not been found.") return tools.exitErr() notes_dict = {} for note in notes: noteId = note.title + ":" + note.contentHash if noteId in notes_dict: notes_dict[noteId].append(note) out.printLine("found dup! \"" + note.title + "\" with guid " + note.guid + ", duplicated " + len(notes_dict[noteId])) else: notes_dict[noteId] = [note] out.printLine("new note \"" + note.title + "\" with guid " + note.guid) all_dups = [ dups for id, dups in notes_dict.iteritems() if len(dups) > 1 ] # list of lists total_dups = sum(map(len, all_dups)) # count total removed_total = 0 for dup_group in all_dups: group_size = len(dup_group) out.printLine("Deleting " + group_size + " notes titled \"" + dup_group[0].title + "\"") for note in dup_group: removed_total += 1 out.printLine("Deleting \"" + note.title + "\" created " + out.printDate(note.created) + " with guid " + note.guid + " (" + str(removed_total) + "/" + str(total_dups) + ")") evernote.removeNote(note.guid) out.printLine("removed " + removed_total + "duplicates")
def dedup(self, notebook=None): logging.debug("Retrieving note metadata") request = self._createSearchRequest(None, None, notebook, None, None, None) logging.debug(request) evernote = self.getEvernote() out.preloader.setMessage("Retrieving metadata...") result = evernote.findNotes(request, EDAM_USER_NOTES_MAX, False, 0) notes = result.notes logging.debug("First pass, comparing metadata of " + str(len(result.notes)) + " notes") notes_dict = {} for note in notes: # Use note title, contentLength and resource descriptors # as the best "unique" key we can make out of the metadata. # Anything more unique requires us to inspect the content, # which we try to avoid since it requires a per-note API call. # This will create false positives, which we resolve in another pass, # actually inspecting note content of a hopefully smaller # set of potential duplicates. noteId = note.title + " (" + str(note.contentLength) + ") with " + str(note.largestResourceMime) + " (" + str(note.largestResourceSize) + ")" if noteId in notes_dict: notes_dict[noteId].append(note) logging.debug(" note: " + noteId + "\" with guid " + note.guid + " potentially duplicated " + str(len(notes_dict[noteId]))) else: notes_dict[noteId] = [note] # logging.debug(" note: " + noteId # + "\" with guid " + note.guid) all_dups = [dups for id, dups in notes_dict.iteritems() if len(dups) > 1] # list of lists total_dups = sum(map(len, all_dups)) # count total logging.debug("Second pass, testing content among " + str(len(all_dups)) + " groups, " + str(total_dups) + " notes") notes_dict = {} for dup_group in all_dups: for note in dup_group: out.preloader.setMessage("Retrieving content...") self.getEvernote().loadNoteContent(note) md5 = hashlib.md5() md5.update(note.content) noteHash = md5.hexdigest() noteId = md5.hexdigest() + " " + note.title if noteId in notes_dict: notes_dict[noteId].append(note) logging.debug("duplicate \"" + noteId + "\" with guid " + note.guid + ", duplicated " + str(len(notes_dict[noteId]))) else: notes_dict[noteId] = [note] logging.debug("new note \"" + noteId + "\" with guid " + note.guid) all_dups = [dups for id, dups in notes_dict.iteritems() if len(dups) > 1] # list of lists total_dups = sum(map(len, all_dups)) # count total logging.debug("Third pass, deleting " + str(len(all_dups)) + " groups, " + str(total_dups) + " notes") removed_count = 0 for dup_group in all_dups: dup_group.pop() # spare the last one, delete the rest for note in dup_group: removed_count += 1 logging.debug("Deleting \"" + note.title + "\" created " + out.printDate(note.created) + " with guid " + note.guid + " (" + str(removed_count) + "/" + str(total_dups) + ")") out.preloader.setMessage("Removing note...") evernote.removeNote(note.guid) out.successMessage("Removed " + str(removed_count) + " duplicates within " + str(len(result.notes)) + " total notes")