def test_dict_differ(): original = { "a": "value", "b": ["value", "value"], "c": { "cc": { "1": "value" } }, "d": "value", "e": "value", "f": { "ff": { "1": "value", "2": "value", "3": "value" } } } new = { "a": "value", "b": ["value"], "c": { "cc": { "1": "changed value", "2": "added value" }, "dd": "added", "ee": { "eee": { "1": "added", "2": { "eeeee": "added" } } } }, "d": "changed", "f": { "ff": { "1": "value", "3": "changed" } } } diff = DictDiffer(new, original) assert list(set(["e", "f/ff/2"])) == list(set(diff.removed())) assert list(set(["b", "c/cc/1", "d", "f/ff/3"])) == list(set(diff.changed())) assert list(set(["c/cc/2", "c/dd", "c/ee/eee/1", "c/ee/eee/2/eeeee"])) == list(set(diff.added()))
def sync_views(self, db_name): """Fetches design documents from the views_directory, saves/updates them in the appropriate database, then build the views. """ build_views_from_file = ["dpla_db_all_provider_docs.js", "dashboard_db_all_provider_docs.js", "dashboard_db_all_ingestion_docs.js", "dpla_db_export_database.js", "bulk_download_db_all_contributor_docs.js"] if db_name == "dpla": db = self.dpla_db if self.sync_qa_views: self.logger.debug("QA views will be synced.") build_views_from_file.append("dpla_db_qa_reports.js") else: self.logger.debug("QA views will NOT be synced.") elif db_name == "dashboard": db = self.dashboard_db elif db_name == "bulk_download": db = self.bulk_download_db for file in os.listdir(self.views_directory): if file.startswith(db_name) and file in build_views_from_file: fname = os.path.join(self.views_directory, file) with open(fname, "r") as f: s = f.read().replace("\n", "") design_doc = json.loads(s) # Check if the design doc has changed prev_design_doc = db.get(design_doc["_id"], {}) prev_revision = prev_design_doc.pop("_rev", None) diff = DictDiffer(design_doc, prev_design_doc) if diff.differences(): # Save thew design document if prev_revision: design_doc["_rev"] = prev_revision db[design_doc["_id"]] = design_doc # Build views design_doc_name = design_doc["_id"].split("_design/")[-1] real_views = (v for v in design_doc["views"] if v != "lib") for view in real_views: view_path = "%s/%s" % (design_doc_name, view) start = time.time() try: for doc in db.view(view_path, limit=0): pass self.logger.debug("Built %s view %s in %s seconds" % (db.name, view_path, time.time() - start)) except Exception, e: self.logger.error("Error building %s view %s: %s" % (db.name, view_path, e))
def _sync_views(self, db_name): """Fetches design documents from the views_directory, saves/updates them in the appropriate database, then build the views. """ build_views_from_file = ["dpla_db_all_provider_docs.js", # Uncomment when QA views have been built #"dpla_db_qa_reports.js", "dashboard_db_all_provider_docs.js", "dashboard_db_all_ingestion_docs.js"] if db_name == "dpla": db = self.dpla_db elif db_name == "dashboard": db = self.dashboard_db for file in os.listdir(self.views_directory): if file.startswith(db_name): fname = os.path.join(self.views_directory, file) with open(fname, "r") as f: design_doc = json.load(f) # Check if the design doc has changed prev_design_doc = db.get(design_doc["_id"], {}) prev_revision = prev_design_doc.pop("_rev", None) diff = DictDiffer(design_doc, prev_design_doc) if diff.differences(): # Save thew design document if prev_revision: design_doc["_rev"] = prev_revision db[design_doc["_id"]] = design_doc # Build views if file in build_views_from_file: design_doc_name = design_doc["_id"].split("_design/")[-1] for view in design_doc["views"]: view_path = "%s/%s" % (design_doc_name, view) start = time.time() try: for doc in db.view(view_path, limit=0): pass self.logger.debug("Built %s view %s in %s seconds" % (db.name, view_path, time.time() - start)) except Exception, e: self.logger.error("Error building %s view %s: %s" % (db.name, view_path, e))
def _get_fields_changed(self, harvested_doc, database_doc): """Compares harvested_doc and database_doc and returns any changed fields. """ fields_changed = {} diff = DictDiffer(harvested_doc, database_doc) if diff.added(): fields_changed["added"] = diff.added() if diff.removed(): fields_changed["removed"] = diff.removed() if diff.changed(): fields_changed["changed"] = diff.changed() return fields_changed