예제 #1
0
            queue1.append({
                '_op_type': 'delete',
                '_index': dbname,
                '_type': 'mbox',
                '_id': _id
            })
            queue2.append({
                '_op_type': 'delete',
                '_index': dbname,
                '_type': 'mbox_source',
                '_id': _id
            })
            print("deleting: " + mid)

    while len(queue1) > 0:
        es.bulk(queue1[0:1024])
        del queue1[0:1024]

    while len(queue2) > 0:
        es.bulk(queue2[0:1024])
        del queue2[0:1024]

    # add new items to elasticsearch from imap

    uids = []
    for mid, uid in mail.items():
        if not mid in db:
            uids.append(uid)
    lists.append([uids, listname, imap4])
else:
    # File based import??
예제 #2
0
                body['from'] = hit['_source']['from'].replace(obfuscate, "...")
            if targetLID:
                body['list_raw'] = targetLID
                body['list'] = targetLID
            if makePrivate:
                body['private'] = True
            if makePublic:
                body['private'] = False
            if not dryrun:
                js_arr.append({
                    '_op_type': 'delete' if deleteEmails else 'update',
                    '_index': dbname,
                    '_type': 'mbox',
                    '_id': doc,
                    'doc': body
                })

            count += 1
            if (count % 500 == 0):
                print("Processed %u emails..." % count)
                if not dryrun:
                    es.bulk(js_arr)
                    js_arr = []

    if len(js_arr) > 0:
        if not dryrun:
            es.bulk(js_arr)

    print("All done, processed %u docs in %u seconds" %
          (count, time.time() - then))
예제 #3
0
def main():
    es = Elastic()
    dbname = es.getdbname()
    # get config and set up default databas
    es = Elastic()
    # default database name
    dbname = es.getdbname()
    
    args = options()
    
    print("Beginning list edit:")
    if args.sourceLID:
        print("  - List ID: %s" % args.sourceLID)
    else:
        print("  - MID: %s" % args.mid)
    if args.targetLID:
        print("  - Target ID: %s" % args.targetLID)
    if args.makePublic:
        print("  - Action: Mark all emails public")
    if args.makePrivate:
        print("  - Action: Mark all emails private")
    if args.deleteEmails:
        print("  - Action: Delete emails (sources will be kept!)")
    if args.obfuscate:
        print("  - Action: Obfuscate parts of email containing: %s" % args.obfuscate)
    
    if args.desc:
        print("  - Action: add description: %s" % args.desc)
        if args.dryrun:
            print("DRY RUN - NO CHANGES WILL BE MADE")
        else:
            LID = args.sourceLID
            if args.targetLID:
                LID = args.targetLID
            es.index(
                doc_type="mailinglists",
                id=LID,
                body = {
                    'list': LID,
                    'name': LID,
                    'description':args.desc
                }
            )
            print("All done, updated description.")
    
    if args.targetLID or args.makePrivate or args.makePublic or args.deleteEmails or args.mid or args.obfuscate:
        if args.dryrun:
            print("DRY RUN - NO CHANGES WILL BE MADE")
        print("Updating docs...")
        then = time.time()
        terms = {
            'wildcard' if args.wildcard else 'term': {
                'list_raw': args.sourceLID
            }
        }
        if args.mid:
            terms = {
                'term': {
                    'mid': args.mid
                }
            }
        query = {
            '_source': ['body', 'subject', 'from'] if args.obfuscate else False,
            'query': {
                'bool': {
                    'must': [
                        terms
                    ]
                }
            }
        }
        proposed_changes = []
        for page in es.scan_and_scroll(body = query):
            prop = process_hits(page, args, dbname)
            if prop:
                proposed_changes.extend(prop)
        
        tmp = []
        count = len(proposed_changes)
        processed = 0
        # Handle proposed changes in batches of 500
        while len(proposed_changes) > 0:
            tmp.append(proposed_changes.pop(0))
            if len(tmp) >= 500:
                if not args.dryrun:
                    es.bulk(tmp)
                processed += len(tmp)
                tmp = []
                print("Processed %u documents..." % processed)
        # Any stragglers remaining gets processed here
        if len(tmp) > 0:
            if not args.dryrun:
                es.bulk(tmp)
            processed += len(tmp)
            print("Processed %u documents..." % processed)
            
        print("All done, processed %u docs in %u seconds" % (count, time.time() - then))