def test_dict_differ():
    original = {
        "a": "value",
        "b": ["value", "value"],
        "c": {
            "cc": {
                "1": "value"
            }
        },
        "d": "value",
        "e": "value",
        "f": {
            "ff": {
                "1": "value",
                "2": "value",
                "3": "value"
            }
        }
    }
    new = {
        "a": "value",
        "b": ["value"],
        "c": {
            "cc": {
                "1": "changed value",
                "2": "added value"
            },
            "dd": "added",
            "ee": {
                "eee": {
                    "1": "added",
                    "2": {
                        "eeeee": "added"
                    }
                }
            }
        },
        "d": "changed",
        "f": {
            "ff": {
                "1": "value",
                "3": "changed"
            }
        }
    }

    diff = DictDiffer(new, original)

    assert list(set(["e", "f/ff/2"])) == list(set(diff.removed()))
    assert list(set(["b", "c/cc/1", "d",
                     "f/ff/3"])) == list(set(diff.changed()))
    assert list(set(["c/cc/2", "c/dd", "c/ee/eee/1",
                     "c/ee/eee/2/eeeee"])) == list(set(diff.added()))
Example #2
0
    def sync_views(self, db_name):
        """Fetches design documents from the views_directory, saves/updates
           them in the appropriate database, then build the views. 
        """
        build_views_from_file = ["dpla_db_all_provider_docs.js",
                                 "dashboard_db_all_provider_docs.js",
                                 "dashboard_db_all_ingestion_docs.js",
                                 "dpla_db_export_database.js",
                                 "bulk_download_db_all_contributor_docs.js"]
        if db_name == "dpla":
            db = self.dpla_db
            if self.sync_qa_views:
                self.logger.debug("QA views will be synced.")
                build_views_from_file.append("dpla_db_qa_reports.js")
            else:
                self.logger.debug("QA views will NOT be synced.")
        elif db_name == "dashboard":
            db = self.dashboard_db
        elif db_name == "bulk_download":
            db = self.bulk_download_db

        for file in os.listdir(self.views_directory):
            if file.startswith(db_name) and file in build_views_from_file:
                fname = os.path.join(self.views_directory, file)
                with open(fname, "r") as f:
                    s = f.read().replace("\n", "")
                    design_doc = json.loads(s)

                # Check if the design doc has changed
                prev_design_doc = db.get(design_doc["_id"], {})
                prev_revision = prev_design_doc.pop("_rev", None)
                diff = DictDiffer(design_doc, prev_design_doc)
                if diff.differences():
                    # Save thew design document
                    if prev_revision:
                        design_doc["_rev"] = prev_revision
                    db[design_doc["_id"]] = design_doc

                # Build views
                design_doc_name = design_doc["_id"].split("_design/")[-1]
                real_views = (v for v in design_doc["views"] if v != "lib")
                for view in real_views:
                    view_path = "%s/%s" % (design_doc_name, view)
                    start = time.time()
                    try:
                        for doc in db.view(view_path, limit=0):
                            pass
                        self.logger.debug("Built %s view %s in %s seconds"
                                          % (db.name, view_path,
                                             time.time() - start))
                    except Exception, e:
                        self.logger.error("Error building %s view %s: %s" %
                                          (db.name, view_path, e))
def test_dict_differ():
    original = {
        "a": "value",
        "b": ["value", "value"],
        "c": {
            "cc": {
                "1": "value"
            }
        },
        "d": "value",
        "e": "value",
        "f": {
            "ff": {
                "1": "value",
                "2": "value",
                "3": "value"
            }
        }
    }
    new = {
        "a": "value",
        "b": ["value"],
        "c": {
            "cc": {
                "1": "changed value",
                "2": "added value"
            },
            "dd": "added",
            "ee": {
                "eee": {
                    "1": "added",
                    "2":  {
                        "eeeee": "added"
                    }
                }
            }
        },
        "d": "changed",
        "f": {
            "ff": {
                "1": "value",
                "3": "changed"
            }
        }
    }
   

    diff = DictDiffer(new, original)

    assert list(set(["e", "f/ff/2"])) == list(set(diff.removed()))
    assert list(set(["b", "c/cc/1", "d", "f/ff/3"])) == list(set(diff.changed()))
    assert list(set(["c/cc/2", "c/dd", "c/ee/eee/1", "c/ee/eee/2/eeeee"])) == list(set(diff.added()))
Example #4
0
    def _sync_views(self, db_name):
        """Fetches design documents from the views_directory, saves/updates
           them in the appropriate database, then build the views. 
        """
        build_views_from_file = ["dpla_db_all_provider_docs.js",
                                 # Uncomment when QA views have been built
                                 #"dpla_db_qa_reports.js",
                                 "dashboard_db_all_provider_docs.js",
                                 "dashboard_db_all_ingestion_docs.js"]
        if db_name == "dpla":
            db = self.dpla_db
        elif db_name == "dashboard":
            db = self.dashboard_db

        for file in os.listdir(self.views_directory):
            if file.startswith(db_name):
                fname = os.path.join(self.views_directory, file)
                with open(fname, "r") as f:
                    design_doc = json.load(f)

                # Check if the design doc has changed
                prev_design_doc = db.get(design_doc["_id"], {})
                prev_revision = prev_design_doc.pop("_rev", None)
                diff = DictDiffer(design_doc, prev_design_doc)
                if diff.differences():
                    # Save thew design document
                    if prev_revision:
                        design_doc["_rev"] = prev_revision
                    db[design_doc["_id"]] = design_doc

                # Build views
                if file in build_views_from_file:
                    design_doc_name = design_doc["_id"].split("_design/")[-1]
                    for view in design_doc["views"]:
                        view_path = "%s/%s" % (design_doc_name, view)
                        start = time.time()
                        try:
                            for doc in db.view(view_path, limit=0):
                                pass
                            self.logger.debug("Built %s view %s in %s seconds"
                                              % (db.name, view_path,
                                                 time.time() - start))
                        except Exception, e:
                            self.logger.error("Error building %s view %s: %s" %
                                              (db.name, view_path, e))
    def _get_fields_changed(self, harvested_doc, database_doc):
        """Compares harvested_doc and database_doc and returns any changed
           fields.
        """
        fields_changed = {}
        diff = DictDiffer(harvested_doc, database_doc)
        if diff.added():
            fields_changed["added"] = diff.added()
        if diff.removed():
            fields_changed["removed"] = diff.removed()
        if diff.changed():
            fields_changed["changed"] = diff.changed()

        return fields_changed
Example #6
0
 def _get_fields_changed(self, harvested_doc, database_doc):
     """Compares harvested_doc and database_doc and returns any changed
        fields.
     """
     fields_changed = {}
     diff = DictDiffer(harvested_doc, database_doc)
     if diff.added():
         fields_changed["added"] = diff.added()
     if diff.removed():
         fields_changed["removed"] = diff.removed()
     if diff.changed():
         fields_changed["changed"] = diff.changed()
     
     return fields_changed