Esempio n. 1
0
 def test_get_doc_count_by_type(self):
     self.assertEqual(get_doc_count_by_type(get_db(None), 'Application'), 2)
     self.assertEqual(
         get_doc_count_by_type(get_db('users'), 'CommCareUser'), 2)
     self.assertEqual(get_doc_count_by_type(get_db(None), 'CommCareUser'),
                      0)
     self.assertEqual(get_doc_count_by_type(get_db('users'), 'Application'),
                      0)
Esempio n. 2
0
    def forwards(apps, schema_editor):
        if settings.UNIT_TESTING:
            return

        try:
            get_blob_db()
        except Exception:
            raise MigrationError(
                "Cannot get blob db:\n{error}{message}".format(
                    error=traceback.format_exc(),
                    message=BLOB_DB_NOT_CONFIGURED,
                ))

        try:
            BlobMigrationState.objects.get(slug=slug)
            return  # already migrated
        except BlobMigrationState.DoesNotExist:
            pass

        migrator = MIGRATIONS[slug]
        total = 0
        for doc_type, model_class in doc_type_tuples_to_dict(
                migrator.doc_types).items():
            total += get_doc_count_by_type(model_class.get_db(), doc_type)
        if total > 500:
            message = MIGRATION_INSTRUCTIONS.format(slug=slug, total=total)
            raise MigrationNotComplete(message)

        # just do the migration if the number of documents is small
        migrated, skipped = migrator.migrate()
        if skipped:
            raise MigrationNotComplete(DOCS_SKIPPED_WARNING.format(skipped))
    def handle(self, **options):
        verify_only = options.get("verify_only", False)
        skip_verify = options.get("skip_verify", False)

        if verify_only and skip_verify:
            raise CommandError("verify_only and skip_verify are mutually exclusive")

        self.doc_count = get_doc_count_by_type(self.couch_db(), self.couch_doc_type())
        self.diff_count = 0
        self.doc_index = 0

        logger.info("Found {} {} docs and {} {} models".format(
            self.doc_count,
            self.couch_doc_type(),
            self.sql_class().objects.count(),
            self.sql_class().__name__,
        ))
        for doc in get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()]):
            self.doc_index += 1
            if not verify_only:
                self._migrate_doc(doc)
            if not skip_verify:
                self._verify_doc(doc, exit=not verify_only)

        logger.info(f"Processed {self.doc_index} documents")
        if not skip_verify:
            logger.info(f"Found {self.diff_count} differences")
Esempio n. 4
0
    def forwards(apps, schema_editor):
        if settings.UNIT_TESTING:
            return

        try:
            get_blob_db()
        except Exception:
            raise MigrationError(
                "Cannot get blob db:\n{error}{message}".format(
                    error=traceback.format_exc(),
                    message=BLOB_DB_NOT_CONFIGURED,
                ))

        try:
            BlobMigrationState.objects.get(slug=slug)
            return  # already migrated
        except BlobMigrationState.DoesNotExist:
            pass

        migrator = MIGRATIONS[slug]
        total = 0
        for doc_type, model_class in doc_type_tuples_to_dict(migrator.doc_types).items():
            total += get_doc_count_by_type(model_class.get_db(), doc_type)
        if total > 500:
            message = MIGRATION_INSTRUCTIONS.format(slug=slug, total=total)
            raise MigrationNotComplete(message)

        # just do the migration if the number of documents is small
        migrated, skipped = migrator.migrate()
        if skipped:
            raise MigrationNotComplete(DOCS_SKIPPED_WARNING.format(skipped))
Esempio n. 5
0
def get_number_of_forms_in_all_domains():
    """
    Return number of non-error forms (but incl. logs) total across all domains
    specifically as stored in couch.

    (Can't rewrite to pull from ES or SQL; this function is used as a point
    of comparison between row counts in other stores.)

    """

    return get_doc_count_by_type(XFormInstance.get_db(), 'XFormInstance')
Esempio n. 6
0
 def get_total_document_count(self):
     from corehq.dbaccessors.couchapps.all_docs import get_doc_count_by_type, get_doc_count_by_domain_type
     if self.domain:
         return sum(
             get_doc_count_by_domain_type(self.couchdb, self.domain,
                                          doc_type)
             for doc_type in self.doc_type_map)
     else:
         return sum(
             get_doc_count_by_type(self.couchdb, doc_type)
             for doc_type in self.doc_type_map)
Esempio n. 7
0
 def get_total_document_count(self):
     from corehq.dbaccessors.couchapps.all_docs import get_doc_count_by_type, get_doc_count_by_domain_type
     if self.domain:
         return sum(
             get_doc_count_by_domain_type(self.couchdb, self.domain, doc_type)
             for doc_type in self.doc_type_map
         )
     else:
         return sum(
             get_doc_count_by_type(self.couchdb, doc_type)
             for doc_type in self.doc_type_map
         )
Esempio n. 8
0
    def handle(self, **options):

        doc_count = get_doc_count_by_type(self.couch_db(),
                                          self.couch_doc_type())
        logger.info("Found {} {} docs and {} {} models".format(
            doc_count,
            self.couch_doc_type(),
            self.sql_class().objects.count(),
            self.sql_class().__name__,
        ))
        doc_index = 0
        for doc in get_all_docs_with_doc_types(self.couch_db(),
                                               [self.couch_doc_type()]):
            doc_index += 1
            logger.info("Looking at {} doc #{} of {} with id {}".format(
                self.couch_doc_type(), doc_index, doc_count, doc["_id"]))
            with transaction.atomic():
                model, created = self.update_or_create_sql_object(doc)
                logger.info("{} model for doc with id {}".format(
                    "Creating" if created else "Updated", doc["_id"]))
Esempio n. 9
0
    def handle(self, doc_type, **options):
        attrs = options.get('attrs', [])
        db = couch_config.get_db(options.get('db', None))
        blank_counts = defaultdict(lambda: 0)
        max_lengths = defaultdict(lambda: 0)

        print("Found {} {} docs\n".format(get_doc_count_by_type(db, doc_type),
                                          doc_type))

        docs = get_all_docs_with_doc_types(db, [doc_type])
        for doc in docs:
            for attr in attrs:
                if doc.get(attr):
                    max_lengths[attr] = max(len(doc[attr]), max_lengths[attr])
                else:
                    blank_counts[attr] += 1

        for attr in attrs:
            print("{} is {} blank and has max length of {}".format(
                attr, 'sometimes' if blank_counts[attr] else 'never',
                max_lengths[attr]))
    def handle(self, dry_run=False, **options):
        log_prefix = "[DRY RUN] " if dry_run else ""

        logger.info("{}Found {} {} docs and {} {} models".format(
            log_prefix,
            get_doc_count_by_type(self.couch_db(), self.couch_doc_type()),
            self.couch_doc_type(),
            self.sql_class().objects.count(),
            self.sql_class().__name__,
        ))
        for doc in get_all_docs_with_doc_types(self.couch_db(), [self.couch_doc_type()]):
            logger.info("{}Looking at doc with key {}".format(log_prefix, self.doc_key(doc)))
            with transaction.atomic():
                model, created = self.update_or_create_sql_object(doc)
                if not dry_run:
                    logger.info("{}{} model for doc with key {}".format(log_prefix,
                                                                        "Created" if created else "Updated",
                                                                        self.doc_key(doc)))
                    model.save()
                elif created:
                    model.delete()
 def count_items_to_be_migrated(cls):
     couch_count = get_doc_count_by_type(cls.couch_db(),
                                         cls.couch_doc_type())
     sql_count = cls.sql_class().objects.count()
     return couch_count - sql_count
 def _get_couch_doc_count_for_type(self):
     return get_doc_count_by_type(self.couch_db(), self.couch_doc_type())
Esempio n. 13
0
def get_doc_counts_per_doc_type(db, doc_types):
    return {doc_type: get_doc_count_by_type(db, doc_type) for doc_type in doc_types}
Esempio n. 14
0
 def __len__(self):
     if not hasattr(self, "_len"):
         self._len = get_doc_count_by_type(self.db, self.doc_type)
     return self._len
Esempio n. 15
0
 def assertNumLocations(self, number):
     self.assertEqual(SQLLocation.objects.count(), number)
     self.assertEqual(get_doc_count_by_type(self.db, 'Location'), number)
Esempio n. 16
0
 def test_get_doc_count_by_type(self):
     self.assertEqual(get_doc_count_by_type(get_db(None), 'Application'), 1)
     self.assertEqual(get_doc_count_by_type(get_db('users'), 'CommCareUser'), 1)
     self.assertEqual(get_doc_count_by_type(get_db(None), 'CommCareUser'), 0)
     self.assertEqual(get_doc_count_by_type(get_db('users'), 'Application'), 0)
Esempio n. 17
0
def migrate(slug, doc_type_map, doc_migrator_class, filename=None, reset=False,
            max_retry=2):
    """Migrate blobs

    :param slug: Migration name.
    :param doc_type_map: Dict of `doc_type_name: model_class` pairs.
    :param doc_migrator_class: A `BaseDocMigrator` subclass used to
    migrate documents.
    :param filename: File path for intermediate storage of migration
    data.
    :param reset: Reset existing migration state (if any), causing all
    documents to be reconsidered for migration, if this is true. This
    does not reset the django migration flag.
    flag, which is set when the migration completes successfully.
    :param max_retry: Number of times to retry migrating a document
    before giving up.
    :returns: A tuple `(<num migrated>, <num skipped>)`
    """
    couchdb = next(iter(doc_type_map.values())).get_db()
    assert all(m.get_db() is couchdb for m in doc_type_map.values()), \
        "documents must live in same couch db: %s" % repr(doc_type_map)

    dirpath = None
    if filename is None:
        dirpath = mkdtemp()
        filename = os.path.join(dirpath, "export.txt")

    def encode_content(data):
        if isinstance(data, unicode):
            data = data.encode("utf-8")
        return b64encode(data)

    total = sum(get_doc_count_by_type(couchdb, doc_type)
                for doc_type in doc_type_map)
    print("Migrating {} documents: {}...".format(
        total,
        ", ".join(sorted(doc_type_map))
    ))
    migrated = 0
    skipped = 0
    visited = 0
    start = datetime.now()
    doc_migrator = doc_migrator_class()
    load_attachments = doc_migrator.load_attachments
    blobs_key = doc_migrator.blobs_key
    iter_key = slug + "-blob-migration"
    docs_by_type = ResumableDocsByTypeIterator(couchdb, doc_type_map, iter_key)
    if reset:
        docs_by_type.discard_state()

    with open(filename, 'wb') as f:
        for doc in docs_by_type:
            visited += 1
            if doc.get(blobs_key):
                if load_attachments:
                    obj = BlobHelper(doc, couchdb)
                    doc["_attachments"] = {
                        name: {
                            "content_type": meta["content_type"],
                            "content": obj.fetch_attachment(name),
                        }
                        for name, meta in doc["_attachments"].items()
                    }
                    # make copy with encoded attachments for JSON dump
                    backup_doc = dict(doc)
                    backup_doc["_attachments"] = {
                        name: {
                            "content_type": meta["content_type"],
                            "content": encode_content(meta["content"]),
                        }
                        for name, meta in doc["_attachments"].items()
                    }
                else:
                    backup_doc = doc
                f.write('{}\n'.format(json.dumps(backup_doc)))
                f.flush()
                ok = doc_migrator.migrate(doc, couchdb)
                if ok:
                    migrated += 1
                else:
                    try:
                        docs_by_type.retry(doc, max_retry)
                    except TooManyRetries:
                        print("Skip: {doc_type} {_id}".format(**doc))
                        skipped += 1
                if (migrated + skipped) % 100 == 0:
                    elapsed = datetime.now() - start
                    remaining = elapsed / visited * total
                    print("Migrated {}/{} of {} documents in {} ({} remaining)"
                          .format(migrated, visited, total, elapsed, remaining))

    doc_migrator.after_migration()

    if dirpath is not None:
        os.remove(filename)
        os.rmdir(dirpath)

    print("Migrated {}/{} of {} documents ({} previously migrated, {} had no attachments)."
        .format(
            migrated,
            visited,
            total,
            total - visited,
            visited - (migrated + skipped)
        ))
    if skipped:
        print(MIGRATIONS_SKIPPED_WARNING.format(skipped))
    else:
        BlobMigrationState.objects.get_or_create(slug=slug)[0].save()
    return migrated, skipped
Esempio n. 18
0
 def assertNumLocations(self, number):
     self.assertEqual(SQLLocation.objects.count(), number)
     self.assertEqual(get_doc_count_by_type(Location.get_db(), 'Location'),
                      number)
Esempio n. 19
0
 def docs_count(self):
     return sum(get_doc_count_by_type(Application.get_db(), doc_type)
                for doc_type in apps_migration.doc_types)