def _get_db_for_doc_type(self, doc_type): if doc_type not in self._dbs: couch_db = get_db_by_doc_type(doc_type) callback = LoaderCallback(self.success_counter, self.stdout) db = IterDB(couch_db, new_edits=False, callback=callback) db.__enter__() self._dbs[doc_type] = db return self._dbs[doc_type]
def _get_db_for_doc_type(self, doc_type): if doc_type not in self._dbs: couch_db = get_db_by_doc_type(doc_type) if couch_db is None: raise DocumentClassNotFound('No Document class with name "{}" could be found.'.format(doc_type)) callback = LoaderCallback(self.success_counter, self.stdout) db = IterDB(couch_db, new_edits=False, callback=callback) db.__enter__() self._dbs[doc_type] = db return self._dbs[doc_type]
def fix_xforms(log_file, dry_run): unfixable_builds = set() total, submissions = get_submissions_without_xmlns() xform_db = IterDB(XFormInstance.get_db()) with xform_db as xform_db: for i, xform_instance in enumerate(submissions): Command._print_progress(i, total) try: xmlns = get_correct_xmlns(xform_instance) except MultiplePreviouslyFixedForms as e: if xform_instance.build_id not in unfixable_builds: unfixable_builds.add(xform_instance.build_id) print(str(e)) _log(log_file, WARNING, MULTI_MATCH, xform_instance) continue except CantMatchAForm as e: _log(log_file, WARNING, CANT_MATCH, xform_instance) continue except BuildHasFormsWithUndefinedXmlns as e: _log(log_file, WARNING, FORM_HAS_UNDEFINED_XMLNS, xform_instance) continue if xmlns: set_xmlns_on_submission( xform_instance, xmlns, xform_db, log_file, dry_run, ) for error_id in xform_db.error_ids: _log(ERROR, ERROR_SAVING, xform_id=error_id)
def handle(self, ids_file, **options): with open(ids_file, encoding='utf-8') as f: doc_ids = [line.strip() for line in f] total_doc_ids = len(doc_ids) doc_ids = set(doc_ids) print("{} total doc ids, {} unique".format(total_doc_ids, len(doc_ids))) db = XFormInstance.get_db() # Both forms and cases are in here with IterDB(db) as iter_db: for doc in iter_docs(db, with_progress_bar(doc_ids)): iter_db.save(doc) print("{} docs saved".format(len(iter_db.saved_ids))) print("{} docs errored".format(len(iter_db.error_ids))) not_found = len(doc_ids) - len(iter_db.saved_ids) - len( iter_db.error_ids) print("{} docs not found".format(not_found)) filename = '{}_{}.csv'.format( ids_file.split('/')[-1], datetime.datetime.now().isoformat()) with open(filename, 'w', encoding='utf-8') as f: writer = csv.writer(f) writer.writerow(['doc_id', 'status']) for doc_id in doc_ids: if doc_id in iter_db.saved_ids: status = "saved" elif doc_id in iter_db.error_ids: status = "errored" else: status = "not_found" writer.writerow([doc_id, status]) print("Saved results to {}".format(filename))
def delete_already_successful_records(self, redundant_records): log = [] with IterDB(RepeatRecord.get_db()) as iter_db: for record in redundant_records: iter_db.delete(record) log.append((record._id, record.payload_id, record.failure_reason, 'Yes', 'Already Sent')) return log
def set_default_engine_ids(apps, schema_editor): if not settings.UNIT_TESTING: get_preindex_plugin('userreports').sync_design_docs() ucr_db = DataSourceConfiguration.get_db() with IterDB(ucr_db) as iter_db: for doc in iter_docs(ucr_db, DataSourceConfiguration.all_ids()): if not doc.get('engine_id'): doc['engine_id'] = DEFAULT_ENGINE_ID iter_db.save(doc)
def resolve_duplicates(self, records_by_payload_id): log = [] with IterDB(RepeatRecord.get_db()) as iter_db: for payload_id, records in records_by_payload_id.items(): log.append((records[0]._id, payload_id, records[0].failure_reason, 'No', '')) if len(records) > 1: for record in records[1:]: iter_db.delete(record) log.append((record._id, payload_id, record.failure_reason, 'Yes', 'Duplicate')) return log
def bulk_migrate(source_db, target_db, doc_types): with IterDB(target_db, new_edits=False, chunksize=25) as iter_db: for doc in get_all_docs_with_doc_types(source_db, doc_types): # It turns out that Cloudant does not support attachments=true # on views or on _all_docs, only on single doc gets, so we have # to manually re-query for the full doc + attachments. # (And I think there's literally no other way.) doc = _insert_attachments(source_db, doc) iter_db.save(doc)
def reconcile_repeat_records(self, voucher_updates): """ Mark updated records as "succeeded", all others as "cancelled" Delete duplicate records if any exist """ print "Reconciling repeat records" chemist_voucher_repeater_id = 'be435d3f407bfb1016cc89ebbf8146b1' lab_voucher_repeater_id = 'be435d3f407bfb1016cc89ebbfc42a47' already_seen = set() updates_by_voucher_id = {update.id: update for update in voucher_updates} headers = ['record_id', 'voucher_id', 'status'] rows = [] get_db = (lambda: IterDB(RepeatRecord.get_db())) if self.commit else MagicMock with get_db() as iter_db: for repeater_id in [chemist_voucher_repeater_id, lab_voucher_repeater_id]: print "repeater {}".format(repeater_id) records = iter_repeat_records_by_domain(self.domain, repeater_id=repeater_id) record_count = get_repeat_record_count(self.domain, repeater_id=repeater_id) for record in with_progress_bar(records, record_count): if record.payload_id in already_seen: status = "deleted" iter_db.delete(record) elif record.payload_id in updates_by_voucher_id: # add successful attempt status = "succeeded" attempt = RepeatRecordAttempt( cancelled=False, datetime=datetime.datetime.utcnow(), failure_reason=None, success_response="Paid offline via import_voucher_confirmations", next_check=None, succeeded=True, ) record.add_attempt(attempt) iter_db.save(record) else: # mark record as canceled record.add_attempt(RepeatRecordAttempt( cancelled=True, datetime=datetime.datetime.utcnow(), failure_reason="Cancelled during import_voucher_confirmations", success_response=None, next_check=None, succeeded=False, )) iter_db.save(record) already_seen.add(record.payload_id) rows.append([record._id, record.payload_id, status]) self.write_csv('repeat_records', headers, rows)
def delete_from_file(self): with open(self.filename) as f: doc_count = sum(1 for line in f) with open(self.filename) as f: with IterDB(XFormInstance.get_db(), throttle_secs=2, chunksize=100) as iter_db: for line in with_progress_bar(f, length=doc_count): doc = json.loads(line) assert doc['xmlns'] == DEVICE_LOG_XMLNS assert doc['doc_type'] == 'XFormInstance' iter_db.delete(doc) if iter_db.errors_by_type: print 'There were some errors', iter_db.errors_by_type
def _create_or_unarchive_users(location_type): users_by_loc = _get_users_by_loc_id(location_type) with IterDB(CommCareUser.get_db()) as iter_db: for loc in SQLLocation.objects.filter(location_type=location_type): user = users_by_loc.get(loc.location_id, None) or make_location_user(loc) user.is_active = True user.user_location_id = loc.location_id user.set_location(loc, commit=False) iter_db.save(user) loc.user_id = user._id loc.save()
def delete_docs(target_db, doc_ids): """ delete docs from database by doc _id and _rev """ if not doc_ids: return doc_id_rev_pairs = _bulk_get_revs(target_db, doc_ids) with IterDB(target_db, new_edits=False) as iter_db: for doc_id, doc_rev in doc_id_rev_pairs: iter_db.delete({'_id': doc_id, '_rev': doc_rev}) if iter_db.errors_by_type: logging.error('errors bulk saving in delete_docs: {!r}'.format( iter_db.errors_by_type))
def undelete_docs(db, doc_ids): results = Results(set(), set(), set()) with IterDB(db) as iter_db: for chunk in chunked(set(doc_ids), 100): for res in send_keys_to_couch(db, keys=set(chunk)): doc_id = res['key'] if res.get('error', None) == 'not_found': results.not_found.add(doc_id) elif res.get('value', {}).get('deleted', False): iter_db.save( get_deleted_doc(db, doc_id, res['value']['rev'])) results.restored.add(doc_id) else: results.not_deleted.add(doc_id) return results, iter_db
def copy_docs(source_db, target_db, doc_ids): """ copy docs from source_db to target_db by doc_id """ if not doc_ids: return with IterDB(target_db, new_edits=False) as iter_db: for doc in iter_docs(source_db, doc_ids, attachments=True): # see comment bulk_migrate on bulk migrate # explaining discrepancy between CouchDB and Cloudant that necessitates this doc = _insert_attachments(source_db, doc) iter_db.save(doc) if iter_db.errors_by_type: logging.error('errors bulk saving in copy_docs: {!r}'.format( iter_db.errors_by_type))
def handle(self, *args, **options): invoice_ids = WireBillingRecord.objects.values_list('pdf_data_id', flat=True) db = InvoicePdf.get_db() with IterDB(db) as iter_db: for doc in iter_docs(db, invoice_ids): doc['is_wire'] = True iter_db.save(doc) if iter_db.saved_ids: print '{}/{} docs saved correctly!'.format(len(iter_db.saved_ids), len(invoice_ids)) if iter_db.error_ids: print 'There were {} errors. There were errors when saving the following:'.format( len(iter_db.error_ids)) for error_id in iter_db.error_ids: print error_id
def _import_fixtures(domain): for fixture_name, filename in [ ('recipes', 'recipes.csv'), ('conv_factors', 'conv_factors.csv'), ('food_list', 'food_list.csv'), ('food_composition_table', 'food_composition_table.csv'), ]: fields, rows = _read_csv(filename) data_type = FixtureDataType( domain=domain, tag=fixture_name, fields=[FixtureTypeField(field_name=field) for field in fields], ) data_type.save() with IterDB(FixtureDataItem.get_db(), chunksize=1000) as iter_db: for vals in rows: fixture_data_item = _mk_fixture_data_item( domain, data_type._id, fields, vals) iter_db.save(fixture_data_item)
def fix_xforms(unique_id_to_xmlns_map, app_to_unique_ids_map, log_file, dry_run): total, submissions = get_submissions_without_xmlns() xform_db = IterDB(XFormInstance.get_db()) with xform_db as xform_db: for i, xform_instance in enumerate(submissions): Command._print_progress(i, total) try: unique_id = get_form_unique_id(xform_instance) except (MultipleFormsMissingXmlns, FormNameMismatch) as e: log_file.write(e.message) print e.message continue if unique_id: if unique_id not in unique_id_to_xmlns_map: xmlns = get_xmlns(unique_id, xform_instance.app_id, xform_instance.domain) log_file.write(xmlns_map_log_message(xmlns, unique_id)) unique_id_to_xmlns_map[unique_id] = xmlns set_xmlns_on_submission( xform_instance, unique_id_to_xmlns_map[unique_id], xform_db, log_file, dry_run, ) key = (xform_instance.app_id, xform_instance.domain) val = unique_id if val not in app_to_unique_ids_map[key]: log_file.write( unique_ids_map_log_message(key[0], key[1], unique_id)) app_to_unique_ids_map[key].add(val) for error_id in xform_db.error_ids: log_file.write("Failed to save xform {}\n".format(error_id))