queue1.append({ '_op_type': 'delete', '_index': dbname, '_type': 'mbox', '_id': _id }) queue2.append({ '_op_type': 'delete', '_index': dbname, '_type': 'mbox_source', '_id': _id }) print("deleting: " + mid) while len(queue1) > 0: es.bulk(queue1[0:1024]) del queue1[0:1024] while len(queue2) > 0: es.bulk(queue2[0:1024]) del queue2[0:1024] # add new items to elasticsearch from imap uids = [] for mid, uid in mail.items(): if not mid in db: uids.append(uid) lists.append([uids, listname, imap4]) else: # File based import??
body['from'] = hit['_source']['from'].replace(obfuscate, "...") if targetLID: body['list_raw'] = targetLID body['list'] = targetLID if makePrivate: body['private'] = True if makePublic: body['private'] = False if not dryrun: js_arr.append({ '_op_type': 'delete' if deleteEmails else 'update', '_index': dbname, '_type': 'mbox', '_id': doc, 'doc': body }) count += 1 if (count % 500 == 0): print("Processed %u emails..." % count) if not dryrun: es.bulk(js_arr) js_arr = [] if len(js_arr) > 0: if not dryrun: es.bulk(js_arr) print("All done, processed %u docs in %u seconds" % (count, time.time() - then))
def main(): es = Elastic() dbname = es.getdbname() # get config and set up default databas es = Elastic() # default database name dbname = es.getdbname() args = options() print("Beginning list edit:") if args.sourceLID: print(" - List ID: %s" % args.sourceLID) else: print(" - MID: %s" % args.mid) if args.targetLID: print(" - Target ID: %s" % args.targetLID) if args.makePublic: print(" - Action: Mark all emails public") if args.makePrivate: print(" - Action: Mark all emails private") if args.deleteEmails: print(" - Action: Delete emails (sources will be kept!)") if args.obfuscate: print(" - Action: Obfuscate parts of email containing: %s" % args.obfuscate) if args.desc: print(" - Action: add description: %s" % args.desc) if args.dryrun: print("DRY RUN - NO CHANGES WILL BE MADE") else: LID = args.sourceLID if args.targetLID: LID = args.targetLID es.index( doc_type="mailinglists", id=LID, body = { 'list': LID, 'name': LID, 'description':args.desc } ) print("All done, updated description.") if args.targetLID or args.makePrivate or args.makePublic or args.deleteEmails or args.mid or args.obfuscate: if args.dryrun: print("DRY RUN - NO CHANGES WILL BE MADE") print("Updating docs...") then = time.time() terms = { 'wildcard' if args.wildcard else 'term': { 'list_raw': args.sourceLID } } if args.mid: terms = { 'term': { 'mid': args.mid } } query = { '_source': ['body', 'subject', 'from'] if args.obfuscate else False, 'query': { 'bool': { 'must': [ terms ] } } } proposed_changes = [] for page in es.scan_and_scroll(body = query): prop = process_hits(page, args, dbname) if prop: proposed_changes.extend(prop) tmp = [] count = len(proposed_changes) processed = 0 # Handle proposed changes in batches of 500 while len(proposed_changes) > 0: tmp.append(proposed_changes.pop(0)) if len(tmp) >= 500: if not args.dryrun: es.bulk(tmp) processed += len(tmp) tmp = [] print("Processed %u documents..." % processed) # Any stragglers remaining gets processed here if len(tmp) > 0: if not args.dryrun: es.bulk(tmp) processed += len(tmp) print("Processed %u documents..." % processed) print("All done, processed %u docs in %u seconds" % (count, time.time() - then))