Example #1
0
    def hash(self, filepath):
        tmp_filename = str(uuid.uuid4())
        total_size = 0

        with open(os.path.join("/tmp", tmp_filename), 'wb') as fw:
            with open(filepath, 'rb') as fr:
                # read file block by block
                buf = fr.read(self.BLOCKSIZE)
                buf_len = len(buf)
                total_size += buf_len

                while buf_len > 0:
                    self.hash_algo.update(buf)
                    fw.write(buf)

                    buf = fr.read(self.BLOCKSIZE)
                    buf_len = len(buf)
                    total_size += buf_len

        # figure out the digest of the file
        digest = self.hash_algo.hexdigest()
        (directory, filename) = Helmspoint.digest_filepath(digest)

        # move the file
        obj_src = os.path.join("/tmp", tmp_filename)
        obj_dst = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
        os.rename(obj_src, obj_dst)

        return digest, total_size
Example #2
0
    def write(self, tree):
        tree_json = json.dumps(tree).encode('UTF-8')
        digest = hashlib.sha256(tree_json).hexdigest()

        (directory, filename) = Helmspoint.digest_filepath(digest)

        # write it to object directory
        dst_path = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
        with open(dst_path, 'wb') as fw:
            fw.write(tree_json)

        return digest
Example #3
0
    def write(self):
        stage_data = self.initial_data()
        self.digest = hashlib.sha256(stage_data).hexdigest()

        (directory, filename) = Helmspoint.digest_filepath(self.digest)

        # write it to object directory
        dst_path = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
        with open(dst_path, 'wb') as fw:
            fw.write(stage_data)

        return self.digest
Example #4
0
    def write(self):
        dag_data = self.build()
        dag_json = json.dumps(dag_data).encode('UTF-8')
        self.digest = hashlib.sha256(dag_json).hexdigest()

        (directory, filename) = Helmspoint.digest_filepath(self.digest)

        dst_path = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
        with open(dst_path, 'wb') as fw:
            fw.write(dag_json)

        return self.digest
Example #5
0
    def run(self, arg_map, data_digests):
        # get the dag
        dag_json = Dag.get(self.digest)

        # get the stage
        func_link = next(link for link in dag_json['links'] if link['name'] == 'func')

        # deserialize the func
        stage_func = Stage.get(func_link['hash'])

        # use every parent dag hash to look up data.
        parent_links = [link for link in dag_json['links'] if link['name'] == 'parent']
        parent_dag_digests = map(lambda link: link['hash'], parent_links)

        print("arg_map %s" % arg_map)
        print("data_digests %s" % data_digests)
        print("parent_dag_digests %s" % parent_dag_digests)

        # build up data arguments to go into this stage
        arg_names = arg_map[dag_json['data']['name']]
        input_data = []
        for parent_data_digest in parent_dag_digests:
            data_digest = data_digests[parent_data_digest]
            (directory, filepath) = Helmspoint.digest_filepath(data_digest)
            datapath = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filepath)
            with open(datapath, 'r') as f:
                raw_data = f.read()
                json_data = json.loads(raw_data)
                input_data.append(json_data)

        # run it
        print("running stage: %s" % dag_json['data']['name'])
        parents_mapping = dict(zip(arg_names, input_data))
        output_data = stage_func(**parents_mapping)

        # hash data and write the data to disk
        pipe_result_path= os.path.join("datasource", "pipeline")
        os.makedirs(pipe_result_path, exist_ok = True)
        datapath = os.path.join(pipe_result_path, dag_json['data']['name'])
        with open(datapath, 'w') as f:
            json_data = json.dumps(output_data)
            f.write(json_data)
        (data_digest, data_size) = Blob().hash(datapath)

        print("----------")

        return data_digest
Example #6
0
    def write(self, commit):
        commit_json = json.dumps(commit).encode('UTF-8')
        digest = hashlib.sha256(commit_json).hexdigest()

        (directory, filename) = Helmspoint.digest_filepath(digest)

        # write it to object directory
        dst_path = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
        with open(dst_path, 'wb') as fw:
            fw.write(commit_json)

        # write hash of new commit to ref/heads/master
        with open(os.path.join(Helmspoint.REPO_HEADS_PATH, "master"),
                  'w') as fw:
            fw.write(digest)

        return digest
Example #7
0
 def get(digest):
     (directory, filename) = Helmspoint.digest_filepath(digest)
     filepath = os.path.join(Helmspoint.REPO_OBJ_PATH, directory, filename)
     with open(filepath, 'rb') as f:
         raw = f.read()
     return cloudpickle.loads(raw)