Beispiel #1
0
 def test_get_db_for_doc_type(self):
     config = CouchConfig(config=self._config)
     self.assertEqual(
         config.get_db_for_doc_type('CommCareCase').uri, self.remote_db_uri)
     self.assertEqual(
         config.get_db_for_doc_type('CommCareUser').uri,
         '{}__users'.format(self.remote_db_uri))
Beispiel #2
0
    def handle(self, *args, **options):
        if len(args) < 2:
            raise CommandError('Usage is copy_case, %s' % self.args)
        source_couch = CouchConfig(args[0])
        case_id = args[1]
        doc_ids = [case_id]

        domain = args[2] if len(args) > 2 else None
        if should_use_sql_backend(domain):
            raise CommandError('This command only works for couch-based domains.')

        def _migrate_case(case_id):
            print 'getting case %s' % case_id
            case = CommCareCase.wrap(source_couch.get_db_for_class(CommCareCase).get(case_id))
            original_domain = case.domain
            if domain is not None:
                case.domain = domain
            case.save(force_update=True)
            return case, original_domain

        case, orig_domain = _migrate_case(case_id)
        print 'copying %s parent cases' % len(case.indices)
        for index in case.indices:
            _migrate_case(index.referenced_id)
            doc_ids.append(index.referenced_id)

        # hack, set the domain back to make sure we get the reverse indices correctly
        case.domain = orig_domain
        with OverrideDB(CommCareCase, source_couch.get_db_for_class(CommCareCase)):
            child_indices = get_reverse_indices(case)
        print 'copying %s child cases' % len(child_indices)
        for index in child_indices:
            _migrate_case(index.referenced_id)
            doc_ids.append(index.referenced_id)

        print 'copying %s xforms' % len(case.xform_ids)

        def form_wrapper(row):
            doc = row['doc']
            doc.pop('_attachments', None)
            doc.pop('external_blobs', None)
            return XFormInstance.wrap(doc)

        xforms = source_couch.get_db_for_class(XFormInstance).all_docs(
            keys=case.xform_ids,
            include_docs=True,
            wrapper=form_wrapper,
        ).all()
        for form in xforms:
            if domain is not None:
                form.domain = domain
            form.save(force_update=True)
            print 'saved %s' % form._id
            doc_ids.append(form._id)

        if options['postgres_db']:
            copy_postgres_data_for_docs(options['postgres_db'], doc_ids)
Beispiel #3
0
 def _get_designs(self):
     # Instantiate here to make sure that it's instantiated after the dbs settings
     # are patched for tests
     couch_config = CouchConfig()
     db = Database(
         couch_config.get_db_uri_for_app_label(self.app_config.label), create=True)
     return [
         DesignInfo(app_label=self.app_config.label,
                    db=db,
                    design_path=self.dir)
     ]
Beispiel #4
0
    def handle(self, *args, **options):
        if len(args) < 2:
            raise CommandError("Usage is copy_case, %s" % self.args)
        source_couch = CouchConfig(args[0])
        case_id = args[1]
        doc_ids = [case_id]

        domain = args[2] if len(args) > 2 else None

        def _migrate_case(case_id):
            print "getting case %s" % case_id
            case = CommCareCase.wrap(source_couch.get_db_for_class(CommCareCase).get(case_id))
            original_domain = case.domain
            if domain is not None:
                case.domain = domain
            case.save(force_update=True)
            return case, original_domain

        case, orig_domain = _migrate_case(case_id)
        print "copying %s parent cases" % len(case.indices)
        for index in case.indices:
            _migrate_case(index.referenced_id)
            doc_ids.append(index.referenced_id)

        # hack, set the domain back to make sure we get the reverse indices correctly
        case.domain = orig_domain
        with OverrideDB(CommCareCase, source_couch.get_db_for_class(CommCareCase)):
            child_indices = get_reverse_indices(case)
        print "copying %s child cases" % len(child_indices)
        for index in child_indices:
            _migrate_case(index.referenced_id)
            doc_ids.append(index.referenced_id)

        print "copying %s xforms" % len(case.xform_ids)

        def form_wrapper(row):
            doc = row["doc"]
            doc.pop("_attachments", None)
            return XFormInstance.wrap(doc)

        xforms = (
            source_couch.get_db_for_class(XFormInstance)
            .all_docs(keys=case.xform_ids, include_docs=True, wrapper=form_wrapper)
            .all()
        )
        for form in xforms:
            if domain is not None:
                form.domain = domain
            form.save(force_update=True)
            print "saved %s" % form._id
            doc_ids.append(form._id)

        if options["postgres_db"]:
            copy_postgres_data_for_docs(options["postgres_db"], doc_ids)
Beispiel #5
0
 def _get_designs(self):
     # Instantiate here to make sure that it's instantiated after the dbs settings
     # are patched for tests
     couch_config = CouchConfig()
     db = Database(
         couch_config.get_db_uri_for_app_label(self.app_config.label), create=True)
     return [
         DesignInfo(app_label=self.app_config.label,
                    db=db,
                    design_path=self.dir)
     ]
Beispiel #6
0
 def test_all_db_uris_by_slug(self):
     config = CouchConfig(config=self._config)
     self.assertDictContainsSubset(
         {
             None: self.remote_db_uri,
             'users': '{}__users'.format(self.remote_db_uri),
             'fixtures': '{}__fixtures'.format(self.remote_db_uri),
             'meta': '{}__meta'.format(self.remote_db_uri),
         }, config.all_db_uris_by_slug)
 def test_all_db_uris_by_slug(self):
     config = CouchConfig(config=self._config)
     for key, value in {
             None: self.remote_db_uri,
             'users': '{}__users'.format(self.remote_db_uri),
             'fixtures': '{}__fixtures'.format(self.remote_db_uri),
             'meta': '{}__meta'.format(self.remote_db_uri),
     }.items():
         self.assertEqual(config.all_db_uris_by_slug[key], value,
                          f"key: {key!r}")
Beispiel #8
0
 def _get_couch_database_configs_from_string(self, db_string):
     sourcedb_parse_result = urlparse(db_string)
     return CouchConfig({
         'default': {
             'COUCH_HTTPS': sourcedb_parse_result.scheme == 'https',
             'COUCH_SERVER_ROOT': sourcedb_parse_result.hostname,
             'COUCH_USERNAME': sourcedb_parse_result.username,
             'COUCH_PASSWORD': sourcedb_parse_result.password,
             'COUCH_DATABASE_NAME': sourcedb_parse_result.path.lstrip('/')
         }
     })
Beispiel #9
0
    def handle(self, sourcedb, case_id, domain, **options):
        # FIXME broken b/c https://github.com/dimagi/commcare-hq/pull/15896
        source_couch = CouchConfig(sourcedb)
        doc_ids = [case_id]

        if should_use_sql_backend(domain):
            raise CommandError('This command only works for couch-based domains.')

        def _migrate_case(case_id):
            print('getting case %s' % case_id)
            case = CommCareCase.wrap(source_couch.get_db_for_class(CommCareCase).get(case_id))
            original_domain = case.domain
            if domain is not None:
                case.domain = domain
            case.save(force_update=True)
            return case, original_domain
Beispiel #10
0
    def handle(self, *args, **options):
        if len(args) < 2:
            raise CommandError('Usage is copy_case, %s' % self.args)
        source_couch = CouchConfig(args[0])
        case_id = args[1]
        doc_ids = [case_id]

        domain = args[2] if len(args) > 2 else None
        if should_use_sql_backend(domain):
            raise CommandError('This command only works for couch-based domains.')

        def _migrate_case(case_id):
            print 'getting case %s' % case_id
            case = CommCareCase.wrap(source_couch.get_db_for_class(CommCareCase).get(case_id))
            original_domain = case.domain
            if domain is not None:
                case.domain = domain
            case.save(force_update=True)
            return case, original_domain
Beispiel #11
0
 def test_get_db_for_db_name(self):
     config = CouchConfig(config=self._config)
     self.assertEqual(self.remote_db_uri,
                      config.get_db_for_db_name('test_cchq').uri)
     self.assertEqual('{}__users'.format(self.remote_db_uri),
                      config.get_db_for_db_name('test_cchq__users').uri)
Beispiel #12
0
 def test_get_db_for_db_name_not_found(self):
     config = CouchConfig(config=self._config)
     with self.assertRaises(DatabaseNotFound):
         config.get_db_for_db_name('missing')
Beispiel #13
0
    def handle(self, *args, **options):
        if len(args) not in [2, 3]:
            raise CommandError('Usage is copy_domain %s' % self.args)
        self.exclude_dbs = (
            # these have data we don't want to copy
            'receiverwrapper',
            'couchlog',
            'auditcare',
            'fluff-bihar',
            'fluff-opm',
            'fluff-mc',
            'fluff-cvsu',
            'mvp-indicators',
            'm4change',
            # todo: missing domain/docs, but probably want to add back
            'meta',
        )
        self.source_couch = source_couch = CouchConfig(args[0])
        domain = args[1].strip()
        simulate = options['simulate']
        exclude_attachments = options['exclude_attachments']
        self.run_multi_process = options['run_multi_process']

        since = json_format_date(iso_string_to_date(
            options['since'])) if options['since'] else None

        if options['list_types']:
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.list_types(sourcedb, domain, since)
            sys.exit(0)

        if simulate:
            print "\nSimulated run, no data will be copied.\n"

        if options['postgres_db'] and options['postgres_password']:
            settings.DATABASES[options['postgres_db']]['PASSWORD'] = options[
                'postgres_password']

        self.targetdb = CouchConfig(
            args[2]) if len(args) == 3 else CouchConfig()

        try:
            domain_doc = Domain.get_by_name(domain)
        except ResourceNotFound:
            domain_doc = None

        if domain_doc is None:
            self.copy_domain(source_couch, domain)

        if options['doc_types']:
            doc_types = options['doc_types'].split(',')
            for doc_type in doc_types:
                sourcedb = source_couch.get_db_for_doc_type(doc_type)
                startkey = [
                    x for x in [domain, doc_type, since] if x is not None
                ]
                endkey = [x for x in [domain, doc_type, {}] if x is not None]
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               startkey,
                               endkey,
                               doc_type=doc_type,
                               since=since,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        elif options['id_file']:
            path = options['id_file']
            if not os.path.isfile(path):
                print "Path '%s' does not exist or is not a file" % path
                sys.exit(1)

            with open(path) as input:
                doc_ids = [line.rstrip('\n') for line in input]

            if not doc_ids:
                print "Path '%s' does not contain any document ID's" % path
                sys.exit(1)

            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               doc_ids=doc_ids,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        else:
            startkey = [domain]
            endkey = [domain, {}]
            exclude_types = DEFAULT_EXCLUDE_TYPES + options[
                'doc_types_exclude'].split(',')
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               startkey,
                               endkey,
                               exclude_types=exclude_types,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
Beispiel #14
0
class Command(BaseCommand):
    help = "Copies the contents of a domain to another database. " \
           "If tagetdb is not specified, the target is the database " \
           "specified by COUCH_DATABASE in your settings."
    args = '<sourcedb> <domain> [<targetdb>]'
    option_list = BaseCommand.option_list + (
        make_option('--include',
                    action='store',
                    dest='doc_types',
                    default='',
                    help='Comma-separated list of Document Types to copy'),
        make_option(
            '--exclude',
            action='store',
            dest='doc_types_exclude',
            default='',
            help='Comma-separated list of Document Types to NOT copy.'),
        make_option(
            '--exclude-attachments',
            action='store_true',
            dest='exclude_attachments',
            default=False,
            help="Don't copy document attachments, just the docs themselves."),
        make_option(
            '--since',
            action='store',
            dest='since',
            default='',
            help=
            'Only copy documents newer than this date. Format: yyyy-MM-dd. Only '
        ),
        make_option(
            '--list-types',
            action='store_true',
            dest='list_types',
            default=False,
            help=
            'Don\'t copy anything, just list all the available document types.'
        ),
        make_option('--simulate',
                    action='store_true',
                    dest='simulate',
                    default=False,
                    help='Don\'t copy anything, print what would be copied.'),
        make_option(
            '--id-file',
            action='store',
            dest='id_file',
            default='',
            help=
            "File containing one document ID per line. Only docs with these ID's will be copied"
        ),
        make_option(
            '--postgres-db',
            action='store',
            dest='postgres_db',
            default='',
            help=
            "Name of postgres database to pull additional data from. This should map to a "
            "key in settings.DATABASES. If not specified no additional postgres data will be "
            "copied. This is currently used to pull CommCare Supply models."),
        make_option(
            '--postgres-password',
            action='store',
            dest='postgres_password',
            default='',
            help=
            "Password for postgres database to pull additional data from. If not specified will "
            "default to the value in settings.DATABASES"),
        make_option(
            '--dont-run-multi-process',
            action='store_false',
            dest='run_multi_process',
            default=True,
            help=
            "If set to true this spawn multiple processes which should speed up the time taken to "
            "copy. This must be false if running in a supervised process"))

    def iter_source_dbs(self):
        for sourcedb_name, sourcedb in self.source_couch.all_dbs_by_slug.items(
        ):
            if sourcedb_name not in self.exclude_dbs:
                print "In {} db".format(sourcedb_name or "the main")
                yield sourcedb_name, sourcedb

    def handle(self, *args, **options):
        if len(args) not in [2, 3]:
            raise CommandError('Usage is copy_domain %s' % self.args)
        self.exclude_dbs = (
            # these have data we don't want to copy
            'receiverwrapper',
            'couchlog',
            'auditcare',
            'fluff-bihar',
            'fluff-opm',
            'fluff-mc',
            'fluff-cvsu',
            'mvp-indicators',
            'm4change',
            # todo: missing domain/docs, but probably want to add back
            'meta',
        )
        self.source_couch = source_couch = CouchConfig(args[0])
        domain = args[1].strip()
        simulate = options['simulate']
        exclude_attachments = options['exclude_attachments']
        self.run_multi_process = options['run_multi_process']

        since = json_format_date(iso_string_to_date(
            options['since'])) if options['since'] else None

        if options['list_types']:
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.list_types(sourcedb, domain, since)
            sys.exit(0)

        if simulate:
            print "\nSimulated run, no data will be copied.\n"

        if options['postgres_db'] and options['postgres_password']:
            settings.DATABASES[options['postgres_db']]['PASSWORD'] = options[
                'postgres_password']

        self.targetdb = CouchConfig(
            args[2]) if len(args) == 3 else CouchConfig()

        try:
            domain_doc = Domain.get_by_name(domain)
        except ResourceNotFound:
            domain_doc = None

        if domain_doc is None:
            self.copy_domain(source_couch, domain)

        if options['doc_types']:
            doc_types = options['doc_types'].split(',')
            for doc_type in doc_types:
                sourcedb = source_couch.get_db_for_doc_type(doc_type)
                startkey = [
                    x for x in [domain, doc_type, since] if x is not None
                ]
                endkey = [x for x in [domain, doc_type, {}] if x is not None]
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               startkey,
                               endkey,
                               doc_type=doc_type,
                               since=since,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        elif options['id_file']:
            path = options['id_file']
            if not os.path.isfile(path):
                print "Path '%s' does not exist or is not a file" % path
                sys.exit(1)

            with open(path) as input:
                doc_ids = [line.rstrip('\n') for line in input]

            if not doc_ids:
                print "Path '%s' does not contain any document ID's" % path
                sys.exit(1)

            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               doc_ids=doc_ids,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        else:
            startkey = [domain]
            endkey = [domain, {}]
            exclude_types = DEFAULT_EXCLUDE_TYPES + options[
                'doc_types_exclude'].split(',')
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb,
                               domain,
                               simulate,
                               startkey,
                               endkey,
                               exclude_types=exclude_types,
                               postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)

    def list_types(self, sourcedb, domain, since):
        doc_types = sourcedb.view("by_domain_doc_type_date/view",
                                  startkey=[domain],
                                  endkey=[domain, {}],
                                  reduce=True,
                                  group=True,
                                  group_level=2)

        doc_count = dict([(row['key'][1], row['value']) for row in doc_types])
        if since:
            for doc_type in sorted(doc_count.iterkeys()):
                num_since = sourcedb.view("by_domain_doc_type_date/view",
                                          startkey=[domain, doc_type, since],
                                          endkey=[domain, doc_type, {}],
                                          reduce=True).all()
                num = num_since[0]['value'] if num_since else 0
                print "{0:<30}- {1:<6} total {2}".format(
                    doc_type, num, doc_count[doc_type])
        else:
            for doc_type in sorted(doc_count.iterkeys()):
                print "{0:<30}- {1}".format(doc_type, doc_count[doc_type])

    def copy_docs(self,
                  sourcedb,
                  domain,
                  simulate,
                  startkey=None,
                  endkey=None,
                  doc_ids=None,
                  doc_type=None,
                  since=None,
                  exclude_types=None,
                  postgres_db=None,
                  exclude_attachments=False):

        if not doc_ids:
            doc_ids = [
                result["id"]
                for result in sourcedb.view("by_domain_doc_type_date/view",
                                            startkey=startkey,
                                            endkey=endkey,
                                            reduce=False)
            ]
        total = len(doc_ids)
        count = 0
        msg = "Found %s matching documents in domain: %s" % (total, domain)
        msg += " of type: %s" % (doc_type) if doc_type else ""
        msg += " since: %s" % (since) if since else ""
        print msg

        err_log = self._get_err_log()

        if self.run_multi_process:
            queue = Queue(150)
            for i in range(NUM_PROCESSES):
                Worker(queue, sourcedb, self.targetdb, exclude_types, total,
                       simulate, err_log, exclude_attachments).start()

            for doc in iter_docs(sourcedb, doc_ids, chunksize=100):
                count += 1
                queue.put((doc, count))

            # shutdown workers
            for i in range(NUM_PROCESSES):
                queue.put(None)
        else:
            for doc in iter_docs(sourcedb, doc_ids, chunksize=100):
                target = self.targetdb.get_db_for_doc_type(doc['doc_type'])
                count += 1
                copy_doc(doc, count, sourcedb, target, exclude_types, total,
                         simulate, exclude_attachments)

        err_log.close()
        if os.stat(err_log.name)[6] == 0:
            os.remove(err_log.name)
        else:
            print 'Failed document IDs written to %s' % err_log.name

        if postgres_db:
            copy_postgres_data_for_docs(postgres_db,
                                        doc_ids=doc_ids,
                                        simulate=simulate)

    def copy_domain(self, source_couch, domain):
        print "Copying domain doc"
        sourcedb = source_couch.get_db_for_class(Domain)
        result = sourcedb.view("domain/domains",
                               key=domain,
                               reduce=False,
                               include_docs=True).first()

        if result and 'doc' in result:
            domain_doc = Domain.wrap(result['doc'])
            dt = DocumentTransform(domain_doc._obj, sourcedb)
            save(dt, self.targetdb.get_db_for_doc_type(domain_doc['doc_type']))
        else:
            print "Domain doc not found for domain %s." % domain

    def _get_err_log(self):
        name = 'copy_domain.err.%s'
        for i in range(1000):  # arbitrarily large number
            candidate = name % i
            if not os.path.isfile(candidate):
                return open(candidate, 'a', buffering=1)
Beispiel #15
0
 def test_remote_db_uri(self):
     config = CouchConfig(config=self._config)
     self.assertEqual(config.db_uri, self.remote_db_uri)
Beispiel #16
0
 def test_default_db_uri(self):
     config = CouchConfig()
     self.assertEqual(config.db_uri, settings.COUCH_DATABASE)
 def test_get_db_for_db_name(self):
     config = CouchConfig(db_uri=self.remote_db_uri)
     self.assertEqual(self.remote_db_uri, config.get_db_for_db_name('cchq').uri)
     self.assertEqual('{}__users'.format(self.remote_db_uri), config.get_db_for_db_name('cchq__users').uri)
 def test_get_db_for_doc_type(self):
     config = CouchConfig(db_uri=self.remote_db_uri)
     self.assertEqual(config.get_db_for_doc_type('CommCareCase').uri, self.remote_db_uri)
     self.assertEqual(config.get_db_for_doc_type('CommCareUser').uri,
                      '{}__users'.format(self.remote_db_uri))
Beispiel #19
0
class Command(BaseCommand):
    help = "Copies the contents of a domain to another database. " \
           "If tagetdb is not specified, the target is the database " \
           "specified by COUCH_DATABASE in your settings."
    args = '<sourcedb> <domain> [<targetdb>]'
    option_list = BaseCommand.option_list + (
        make_option('--include',
                    action='store',
                    dest='doc_types',
                    default='',
                    help='Comma-separated list of Document Types to copy'),
        make_option('--exclude',
                    action='store',
                    dest='doc_types_exclude',
                    default='',
                    help='Comma-separated list of Document Types to NOT copy.'),
        make_option('--exclude-attachments',
                    action='store_true',
                    dest='exclude_attachments',
                    default=False,
                    help="Don't copy document attachments, just the docs themselves."),
        make_option('--since',
                    action='store',
                    dest='since',
                    default='',
                    help='Only copy documents newer than this date. Format: yyyy-MM-dd. Only '),
        make_option('--list-types',
                    action='store_true',
                    dest='list_types',
                    default=False,
                    help='Don\'t copy anything, just list all the available document types.'),
        make_option('--simulate',
                    action='store_true',
                    dest='simulate',
                    default=False,
                    help='Don\'t copy anything, print what would be copied.'),
        make_option('--id-file',
                    action='store',
                    dest='id_file',
                    default='',
                    help="File containing one document ID per line. Only docs with these ID's will be copied"),
        make_option('--postgres-db',
                    action='store',
                    dest='postgres_db',
                    default='',
                    help="Name of postgres database to pull additional data from. This should map to a "
                         "key in settings.DATABASES. If not specified no additional postgres data will be "
                         "copied. This is currently used to pull CommCare Supply models."),
        make_option('--postgres-password',
                    action='store',
                    dest='postgres_password',
                    default='',
                    help="Password for postgres database to pull additional data from. If not specified will "
                         "default to the value in settings.DATABASES"),
        make_option('--dont-run-multi-process',
                    action='store_false',
                    dest='run_multi_process',
                    default=True,
                    help="If set to true this spawn multiple processes which should speed up the time taken to "
                         "copy. This must be false if running in a supervised process")
    )

    def iter_source_dbs(self):
        for sourcedb_name, sourcedb in self.source_couch.all_dbs_by_slug.items():
            if sourcedb_name not in self.exclude_dbs:
                print "In {} db".format(sourcedb_name or "the main")
                yield sourcedb_name, sourcedb

    def handle(self, *args, **options):
        if len(args) not in [2, 3]:
            raise CommandError('Usage is copy_domain %s' % self.args)
        self.exclude_dbs = (
            # these have data we don't want to copy
            'receiverwrapper', 'couchlog', 'auditcare', 'fluff-bihar', 'fluff-opm',
            'fluff-mc', 'fluff-cvsu', 'mvp-indicators', 'm4change',
            # todo: missing domain/docs, but probably want to add back
            'meta',
        )
        self.source_couch = source_couch = CouchConfig(args[0])
        domain = args[1].strip()
        simulate = options['simulate']
        exclude_attachments = options['exclude_attachments']
        self.run_multi_process = options['run_multi_process']

        since = json_format_date(iso_string_to_date(options['since'])) if options['since'] else None

        if options['list_types']:
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.list_types(sourcedb, domain, since)
            sys.exit(0)

        if simulate:
            print "\nSimulated run, no data will be copied.\n"

        if options['postgres_db'] and options['postgres_password']:
            settings.DATABASES[options['postgres_db']]['PASSWORD'] = options['postgres_password']

        self.targetdb = CouchConfig(args[2]) if len(args) == 3 else CouchConfig()

        try:
            domain_doc = Domain.get_by_name(domain)
        except ResourceNotFound:
            domain_doc = None

        if domain_doc is None:
            self.copy_domain(source_couch, domain)

        if options['doc_types']:
            doc_types = options['doc_types'].split(',')
            for doc_type in doc_types:
                sourcedb = source_couch.get_db_for_doc_type(doc_type)
                startkey = [x for x in [domain, doc_type, since] if x is not None]
                endkey = [x for x in [domain, doc_type, {}] if x is not None]
                self.copy_docs(sourcedb, domain, simulate, startkey, endkey, doc_type=doc_type, since=since,
                               postgres_db=options['postgres_db'], exclude_attachments=exclude_attachments)
        elif options['id_file']:
            path = options['id_file']
            if not os.path.isfile(path):
                print "Path '%s' does not exist or is not a file" % path
                sys.exit(1)

            with open(path) as input:
                doc_ids = [line.rstrip('\n') for line in input]

            if not doc_ids:
                print "Path '%s' does not contain any document ID's" % path
                sys.exit(1)

            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb, domain, simulate, doc_ids=doc_ids, postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        else:
            startkey = [domain]
            endkey = [domain, {}]
            exclude_types = DEFAULT_EXCLUDE_TYPES + options['doc_types_exclude'].split(',')
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb, domain, simulate, startkey, endkey, exclude_types=exclude_types,
                               postgres_db=options['postgres_db'], exclude_attachments=exclude_attachments)

    def list_types(self, sourcedb, domain, since):
        doc_types = sourcedb.view("by_domain_doc_type_date/view", startkey=[domain],
                                  endkey=[domain, {}], reduce=True, group=True, group_level=2)

        doc_count = dict([(row['key'][1], row['value']) for row in doc_types])
        if since:
            for doc_type in sorted(doc_count.iterkeys()):
                num_since = sourcedb.view("by_domain_doc_type_date/view", startkey=[domain, doc_type, since],
                                          endkey=[domain, doc_type, {}], reduce=True).all()
                num = num_since[0]['value'] if num_since else 0
                print "{0:<30}- {1:<6} total {2}".format(doc_type, num, doc_count[doc_type])
        else:
            for doc_type in sorted(doc_count.iterkeys()):
                print "{0:<30}- {1}".format(doc_type, doc_count[doc_type])

    def copy_docs(self, sourcedb, domain, simulate, startkey=None, endkey=None, doc_ids=None,
                  doc_type=None, since=None, exclude_types=None, postgres_db=None, exclude_attachments=False):

        if not doc_ids:
            doc_ids = [result["id"] for result in sourcedb.view("by_domain_doc_type_date/view", startkey=startkey,
                                                                endkey=endkey, reduce=False)]
        total = len(doc_ids)
        count = 0
        msg = "Found %s matching documents in domain: %s" % (total, domain)
        msg += " of type: %s" % (doc_type) if doc_type else ""
        msg += " since: %s" % (since) if since else ""
        print msg

        err_log = self._get_err_log()

        if self.run_multi_process:
            queue = Queue(150)
            for i in range(NUM_PROCESSES):
                Worker(queue, sourcedb, self.targetdb, exclude_types, total, simulate, err_log, exclude_attachments).start()

            for doc in iter_docs(sourcedb, doc_ids, chunksize=100):
                count += 1
                queue.put((doc, count))

            # shutdown workers
            for i in range(NUM_PROCESSES):
                queue.put(None)
        else:
            for doc in iter_docs(sourcedb, doc_ids, chunksize=100):
                target = self.targetdb.get_db_for_doc_type(doc['doc_type'])
                count += 1
                copy_doc(doc, count, sourcedb, target, exclude_types, total, simulate, exclude_attachments)

        err_log.close()
        if os.stat(err_log.name)[6] == 0:
            os.remove(err_log.name)
        else:
            print 'Failed document IDs written to %s' % err_log.name

        if postgres_db:
            copy_postgres_data_for_docs(postgres_db, doc_ids=doc_ids, simulate=simulate)

    def copy_domain(self, source_couch, domain):
        print "Copying domain doc"
        sourcedb = source_couch.get_db_for_class(Domain)
        result = sourcedb.view(
            "domain/domains",
            key=domain,
            reduce=False,
            include_docs=True
        ).first()

        if result and 'doc' in result:
            domain_doc = Domain.wrap(result['doc'])
            dt = DocumentTransform(domain_doc._obj, sourcedb)
            save(dt, self.targetdb.get_db_for_doc_type(domain_doc['doc_type']))
        else:
            print "Domain doc not found for domain %s." % domain

    def _get_err_log(self):
        name = 'copy_domain.err.%s'
        for i in range(1000):  # arbitrarily large number
            candidate = name % i
            if not os.path.isfile(candidate):
                return open(candidate, 'a', buffering=1)
 def test_get_db_for_db_name_not_found(self):
     config = CouchConfig(db_uri=self.remote_db_uri)
     with self.assertRaises(DatabaseNotFound):
         config.get_db_for_db_name('missing')
Beispiel #21
0
    def handle(self, *args, **options):
        if len(args) not in [2, 3]:
            raise CommandError('Usage is copy_domain %s' % self.args)
        self.exclude_dbs = (
            # these have data we don't want to copy
            'receiverwrapper', 'couchlog', 'auditcare', 'fluff-bihar', 'fluff-opm',
            'fluff-mc', 'fluff-cvsu', 'mvp-indicators', 'm4change',
            # todo: missing domain/docs, but probably want to add back
            'meta',
        )
        self.source_couch = source_couch = CouchConfig(args[0])
        domain = args[1].strip()
        simulate = options['simulate']
        exclude_attachments = options['exclude_attachments']
        self.run_multi_process = options['run_multi_process']

        since = json_format_date(iso_string_to_date(options['since'])) if options['since'] else None

        if options['list_types']:
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.list_types(sourcedb, domain, since)
            sys.exit(0)

        if simulate:
            print "\nSimulated run, no data will be copied.\n"

        if options['postgres_db'] and options['postgres_password']:
            settings.DATABASES[options['postgres_db']]['PASSWORD'] = options['postgres_password']

        self.targetdb = CouchConfig(args[2]) if len(args) == 3 else CouchConfig()

        try:
            domain_doc = Domain.get_by_name(domain)
        except ResourceNotFound:
            domain_doc = None

        if domain_doc is None:
            self.copy_domain(source_couch, domain)

        if options['doc_types']:
            doc_types = options['doc_types'].split(',')
            for doc_type in doc_types:
                sourcedb = source_couch.get_db_for_doc_type(doc_type)
                startkey = [x for x in [domain, doc_type, since] if x is not None]
                endkey = [x for x in [domain, doc_type, {}] if x is not None]
                self.copy_docs(sourcedb, domain, simulate, startkey, endkey, doc_type=doc_type, since=since,
                               postgres_db=options['postgres_db'], exclude_attachments=exclude_attachments)
        elif options['id_file']:
            path = options['id_file']
            if not os.path.isfile(path):
                print "Path '%s' does not exist or is not a file" % path
                sys.exit(1)

            with open(path) as input:
                doc_ids = [line.rstrip('\n') for line in input]

            if not doc_ids:
                print "Path '%s' does not contain any document ID's" % path
                sys.exit(1)

            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb, domain, simulate, doc_ids=doc_ids, postgres_db=options['postgres_db'],
                               exclude_attachments=exclude_attachments)
        else:
            startkey = [domain]
            endkey = [domain, {}]
            exclude_types = DEFAULT_EXCLUDE_TYPES + options['doc_types_exclude'].split(',')
            for sourcedb_name, sourcedb in self.iter_source_dbs():
                self.copy_docs(sourcedb, domain, simulate, startkey, endkey, exclude_types=exclude_types,
                               postgres_db=options['postgres_db'], exclude_attachments=exclude_attachments)