def updateJson(self, file_path, file=None, cur=None):
        if not file_path.startswith(self.db_dir):
            return False  # Not from the db dir: Skipping
        relative_path = file_path[len(self.db_dir):]  # File path realative to db file

        # Check if filename matches any of mappings in schema
        matched_maps = []
        for match, map_settings in self.schema["maps"].items():
            try:
                if SafeRe.match(match, relative_path):
                    matched_maps.append(map_settings)
            except SafeRe.UnsafePatternError as err:
                self.log.error(err)

        # No match found for the file
        if not matched_maps:
            return False

        # Load the json file
        try:
            if file is None:  # Open file is not file object passed
                file = open(file_path, "rb")

            if file is False:  # File deleted
                data = {}
            else:
                if file_path.endswith("json.gz"):
                    data = json.load(helper.limitedGzipFile(fileobj=file))
                else:
                    data = json.load(file)
        except Exception, err:
            self.log.debug("Json file %s load error: %s" % (file_path, err))
            data = {}
Example #2
0
    def updateJson(self, file_path, file=None, cur=None):
        if not file_path.startswith(self.db_dir):
            return False  # Not from the db dir: Skipping
        relative_path = file_path[len(self.db_dir):]  # File path realative to db file

        # Check if filename matches any of mappings in schema
        matched_maps = []
        for match, map_settings in self.schema["maps"].items():
            try:
                if SafeRe.match(match, relative_path):
                    matched_maps.append(map_settings)
            except SafeRe.UnsafePatternError as err:
                self.log.error(err)

        # No match found for the file
        if not matched_maps:
            return False

        # Load the json file
        try:
            if file is None:  # Open file is not file object passed
                file = open(file_path, "rb")

            if file is False:  # File deleted
                data = {}
            else:
                if file_path.endswith("json.gz"):
                    data = json.load(helper.limitedGzipFile(fileobj=file))
                else:
                    data = json.load(file)
        except Exception, err:
            self.log.debug("Json file %s load error: %s" % (file_path, err))
            data = {}
Example #3
0
    def updateJson(self, file_path, file=None, cur=None):
        if not file_path.startswith(self.db_dir):
            return False  # Not from the db dir: Skipping
        relative_path = file_path[len(self.db_dir):]  # File path realative to db file

        # Check if filename matches any of mappings in schema
        matched_maps = []
        for match, map_settings in self.schema["maps"].items():
            try:
                if SafeRe.match(match, relative_path):
                    matched_maps.append(map_settings)
            except SafeRe.UnsafePatternError as err:
                self.log.error(err)

        # No match found for the file
        if not matched_maps:
            return False

        # Load the json file
        try:
            if file is None:  # Open file is not file object passed
                file = open(file_path, "rb")

            if file is False:  # File deleted
                data = {}
            else:
                if file_path.endswith("json.gz"):
                    file = helper.limitedGzipFile(fileobj=file)

                if sys.version_info.major == 3 and sys.version_info.minor < 6:
                    data = json.loads(file.read().decode("utf8"))
                else:
                    data = json.load(file)
        except Exception as err:
            self.log.debug("Json file %s load error: %s" % (file_path, err))
            data = {}

        # No cursor specificed
        if not cur:
            cur = self.getSharedCursor()
            cur.logging = False

        # Row for current json file if required
        if not data or [dbmap for dbmap in matched_maps if "to_keyvalue" in dbmap or "to_table" in dbmap]:
            json_row = cur.getJsonRow(relative_path)

        # Check matched mappings in schema
        for dbmap in matched_maps:
            # Insert non-relational key values
            if dbmap.get("to_keyvalue"):
                # Get current values
                res = cur.execute("SELECT * FROM keyvalue WHERE json_id = ?", (json_row["json_id"],))
                current_keyvalue = {}
                current_keyvalue_id = {}
                for row in res:
                    current_keyvalue[row["key"]] = row["value"]
                    current_keyvalue_id[row["key"]] = row["keyvalue_id"]

                for key in dbmap["to_keyvalue"]:
                    if key not in current_keyvalue:  # Keyvalue not exist yet in the db
                        cur.execute(
                            "INSERT INTO keyvalue ?",
                            {"key": key, "value": data.get(key), "json_id": json_row["json_id"]}
                        )
                    elif data.get(key) != current_keyvalue[key]:  # Keyvalue different value
                        cur.execute(
                            "UPDATE keyvalue SET value = ? WHERE keyvalue_id = ?",
                            (data.get(key), current_keyvalue_id[key])
                        )

            # Insert data to json table for easier joins
            if dbmap.get("to_json_table"):
                directory, file_name = re.match("^(.*?)/*([^/]*)$", relative_path).groups()
                data_json_row = dict(cur.getJsonRow(directory + "/" + dbmap.get("file_name", file_name)))
                changed = False
                for key in dbmap["to_json_table"]:
                    if data.get(key) != data_json_row.get(key):
                        changed = True
                if changed:
                    # Add the custom col values
                    data_json_row.update({key: val for key, val in data.items() if key in dbmap["to_json_table"]})
                    cur.execute("INSERT OR REPLACE INTO json ?", data_json_row)

            # Insert data to tables
            for table_settings in dbmap.get("to_table", []):
                if isinstance(table_settings, dict):  # Custom settings
                    table_name = table_settings["table"]  # Table name to insert datas
                    node = table_settings.get("node", table_name)  # Node keyname in data json file
                    key_col = table_settings.get("key_col")  # Map dict key as this col
                    val_col = table_settings.get("val_col")  # Map dict value as this col
                    import_cols = table_settings.get("import_cols")
                    replaces = table_settings.get("replaces")
                else:  # Simple settings
                    table_name = table_settings
                    node = table_settings
                    key_col = None
                    val_col = None
                    import_cols = None
                    replaces = None

                # Fill import cols from table cols
                if not import_cols:
                    import_cols = set([item[0] for item in self.schema["tables"][table_name]["cols"]])

                cur.execute("DELETE FROM %s WHERE json_id = ?" % table_name, (json_row["json_id"],))

                if node not in data:
                    continue

                if key_col:  # Map as dict
                    for key, val in data[node].items():
                        if val_col:  # Single value
                            cur.execute(
                                "INSERT OR REPLACE INTO %s ?" % table_name,
                                {key_col: key, val_col: val, "json_id": json_row["json_id"]}
                            )
                        else:  # Multi value
                            if type(val) is dict:  # Single row
                                row = val
                                if import_cols:
                                    row = {key: row[key] for key in row if key in import_cols}  # Filter row by import_cols
                                row[key_col] = key
                                # Replace in value if necessary
                                if replaces:
                                    for replace_key, replace in replaces.items():
                                        if replace_key in row:
                                            for replace_from, replace_to in replace.items():
                                                row[replace_key] = row[replace_key].replace(replace_from, replace_to)

                                row["json_id"] = json_row["json_id"]
                                cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)
                            elif type(val) is list:  # Multi row
                                for row in val:
                                    row[key_col] = key
                                    row["json_id"] = json_row["json_id"]
                                    cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)
                else:  # Map as list
                    for row in data[node]:
                        row["json_id"] = json_row["json_id"]
                        if import_cols:
                            row = {key: row[key] for key in row if key in import_cols}  # Filter row by import_cols
                        cur.execute("INSERT OR REPLACE INTO %s ?" % table_name, row)

        # Cleanup json row
        if not data:
            self.log.debug("Cleanup json row for %s" % file_path)
            cur.execute("DELETE FROM json WHERE json_id = %s" % json_row["json_id"])

        return True