def main():
    versions = []
    dup_versions = ArrayCount()
    miter = MongoDocumentIterator(fields=['JarMetadata.group_id', 'JarMetadata.artifact_id', 'JarMetadata.version'])

    print 'Found %d Documents' % (miter.total(),)

    while miter.has_next():
        d = miter.next()

        if d is not None:
            group_id = d['JarMetadata']['group_id']
            artifact_id = d['JarMetadata']['artifact_id']
            version = d['JarMetadata']['version']
            ga = '%s||%s||%s' % (group_id, artifact_id, version)

            if ga not in versions:
                versions.append(ga)
            else:
                dup_versions.incr(ga)

            print '[%d:%d:%d]: Processed %s' % (dup_versions.item_count(), len(versions), miter.count(), ga)

    print 'Total documents: %d, dups: %d, versions: %d' % (miter.total(), dup_versions.item_count(), len(versions))
    save_to_file('duplicates.json', json.dumps(dup_versions.get_series()))