Esempio n. 1
0
    def post_dump(self, *args, **kwargs):
        if not self.release:
            # wasn't set before, means no need to post-process (ie. up-to-date, already done)
            return
        build_meta = json.load(
            open(os.path.join(self.new_data_folder, "%s.json" % self.release)))
        if build_meta["type"] == "incremental":
            self.logger.info("Checking md5sum for files in '%s'" %
                             self.new_data_folder)
            metadata = json.load(
                open(os.path.join(self.new_data_folder, "metadata.json")))
            for md5_fname in metadata["diff"]["files"]:
                spec_md5 = md5_fname["md5sum"]
                fname = md5_fname["name"]
                compute_md5 = md5sum(os.path.join(self.new_data_folder, fname))
                if compute_md5 != spec_md5:
                    self.logger.error(
                        "md5 check failed for file '%s', it may be corrupted" %
                        fname)
                    e = DumperException("Bad md5sum for file '%s'" % fname)
                    self.register_status("failed", download={"err": repr(e)})
                    raise e
                else:
                    self.logger.debug("md5 check success for file '%s'" %
                                      fname)
        elif build_meta["type"] == "full":
            # if type=fs, check if archive must be uncompressed
            # TODO

            # repo_name = list(build_meta["metadata"]["repository"].keys())[0]
            if build_meta["metadata"]["repository"]["type"] == "fs":
                uncompressall(self.new_data_folder)
Esempio n. 2
0
def diff_worker_old_vs_new(id_list_old, new_db_col_names, batch_num,
                           diff_folder):
    new = create_backend(new_db_col_names)
    docs_common = new.mget_from_ids(id_list_old)
    ids_common = [_doc['_id'] for _doc in docs_common]
    id_in_old = list(set(id_list_old) - set(ids_common))
    file_name = os.path.join(diff_folder, "%s.pyobj" % str(batch_num))
    _result = {
        'delete': id_in_old,
        'add': [],
        'update': [],
        'source': new.target_name,
        'timestamp': get_timestamp()
    }
    summary = {"add": 0, "update": 0, "delete": len(id_in_old)}
    if len(id_in_old) != 0:
        dump(_result, file_name)
        # compute md5 so when downloaded, users can check integreity
        md5 = md5sum(file_name)
        summary["diff_file"] = {
            "name": os.path.basename(file_name),
            "md5sum": md5
        }

    return summary
Esempio n. 3
0
 def post_dump(self):
     build_meta = json.load(open(os.path.join(self.new_data_folder,"%s.json" % self.release)))
     if build_meta["type"] == "incremental":
         self.logger.info("Checking md5sum for files in '%s'" % self.new_data_folder) 
         metadata = json.load(open(os.path.join(self.new_data_folder,"metadata.json")))
         for md5_fname in metadata["diff"]["files"]:
             spec_md5 = md5_fname["md5sum"]
             fname = md5_fname["name"]
             compute_md5 = md5sum(os.path.join(self.new_data_folder,fname))
             if compute_md5 != spec_md5:
                 self.logger.error("md5 check failed for file '%s', it may be corrupted" % fname)
                 e = DumperException("Bad md5sum for file '%s'" % fname)
                 self.register_status("failed",download={"err" : repr(e)})
                 raise e
             else:
                 self.logger.debug("md5 check success for file '%s'" % fname)
Esempio n. 4
0
 def post_dump(self):
     build_meta = json.load(
         open(os.path.join(self.new_data_folder, "%s.json" % self.release)))
     if build_meta["type"] == "incremental":
         self.logger.info("Checking md5sum for files in '%s'" %
                          self.new_data_folder)
         metadata = json.load(
             open(os.path.join(self.new_data_folder, "metadata.json")))
         for md5_fname in metadata["diff"]["files"]:
             spec_md5 = md5_fname["md5sum"]
             fname = md5_fname["name"]
             compute_md5 = md5sum(os.path.join(self.new_data_folder, fname))
             if compute_md5 != spec_md5:
                 self.logger.error(
                     "md5 check failed for file '%s', it may be corrupted" %
                     fname)
                 e = DumperException("Bad md5sum for file '%s'" % fname)
                 self.register_status("failed", download={"err": repr(e)})
                 raise e
             else:
                 self.logger.debug("md5 check success for file '%s'" %
                                   fname)
Esempio n. 5
0
 def diff_mapping(old, new, diff_folder):
     summary = {}
     old_build = get_src_build().find_one(
         {"_id": old.target_collection.name})
     new_build = get_src_build().find_one(
         {"_id": new.target_collection.name})
     if old_build and new_build:
         # mapping diff always in jsondiff
         mapping_diff = jsondiff(old_build["mapping"],
                                 new_build["mapping"])
         if mapping_diff:
             file_name = os.path.join(diff_folder, "mapping.pyobj")
             dump(mapping_diff, file_name)
             md5 = md5sum(file_name)
             summary["mapping_file"] = {
                 "name": os.path.basename(file_name),
                 "md5sum": md5
             }
     else:
         self.logger.info("Neither '%s' nor '%s' have mappings associated to them, skip" % \
                 (old.target_collection.name,new.target_collection.name))
     return summary
Esempio n. 6
0
def diff_worker_new_vs_old(id_list_new,
                           old_db_col_names,
                           new_db_col_names,
                           batch_num,
                           diff_folder,
                           diff_func,
                           exclude=[],
                           selfcontained=False):
    new = create_backend(new_db_col_names)
    old = create_backend(old_db_col_names)
    docs_common = old.mget_from_ids(id_list_new)
    ids_common = [_doc['_id'] for _doc in docs_common]
    id_in_new = list(set(id_list_new) - set(ids_common))
    _updates = []
    if len(ids_common) > 0:
        _updates = diff_func(old, new, list(ids_common), exclude_attrs=exclude)
    file_name = os.path.join(diff_folder, "%s.pyobj" % str(batch_num))
    _result = {
        'add': id_in_new,
        'update': _updates,
        'delete': [],
        'source': new.target_name,
        'timestamp': get_timestamp()
    }
    if selfcontained:
        _result["add"] = new.mget_from_ids(id_in_new)
    summary = {"add": len(id_in_new), "update": len(_updates), "delete": 0}
    if len(_updates) != 0 or len(id_in_new) != 0:
        dump(_result, file_name)
        # compute md5 so when downloaded, users can check integreity
        md5 = md5sum(file_name)
        summary["diff_file"] = {
            "name": os.path.basename(file_name),
            "md5sum": md5
        }

    return summary