def _compute_u_sorted(a, sorting): u_blocks = [[] for _ in range(a._n_blocks[1])] hbsize = a._reg_shape[1] for i, vblock in enumerate(a._iterator("columns")): u_block = [object() for _ in range(a._n_blocks[1])] _compute_u_block_sorted(vblock._blocks, i, hbsize, sorting, u_block) for j in range(len(u_block)): u_blocks[j].append(u_block[j]) vbsize = a._reg_shape[0] final_blocks = Array._get_out_blocks(a._n_blocks) for i, u_block in enumerate(u_blocks): new_block = [object() for _ in range(a._n_blocks[0])] _merge_svd_block(u_block, i, hbsize, vbsize, sorting, new_block) for j in range(len(new_block)): final_blocks[j][i] = new_block[j] for elem in u_block: compss_delete_object(elem) return Array(final_blocks, a._top_left_shape, a._reg_shape, a.shape, a._sparse)
def _sort_v(v, sorting): v_blocks = [[] for _ in range(v._n_blocks[1])] hbsize = v._reg_shape[1] for i, vblock in enumerate(v._iterator("columns")): out_blocks = [[] for _ in range(v._n_blocks[1])] _sort_v_block(vblock._blocks, i, hbsize, sorting, out_blocks) for j in range(len(out_blocks)): v_blocks[j].append(out_blocks[j]) vbsize = v._reg_shape[0] final_blocks = Array._get_out_blocks(v._n_blocks) for i, v_block in enumerate(v_blocks): new_block = [object() for _ in range(v._n_blocks[0])] _merge_svd_block(v_block, i, hbsize, vbsize, sorting, new_block) for j in range(len(new_block)): final_blocks[j][i] = new_block[j] for elem in v_block: compss_delete_object(elem) return Array(final_blocks, v._top_left_shape, v._reg_shape, v.shape, v._sparse)
def _load_mdcrd(path, block_size, n_cols, n_blocks, bytes_per_snap, bytes_per_block): blocks = [] file_size = os.stat(path).st_size - _CRD_LINE_SIZE try: fid = open(path, "rb") fid.read(_CRD_LINE_SIZE) # skip header for _ in range(0, file_size, bytes_per_block): data = fid.read(bytes_per_block) out_blocks = [object() for _ in range(n_blocks)] _read_crd_bytes(data, block_size[1], n_cols, out_blocks) compss_delete_object(data) blocks.append(out_blocks) finally: fid.close() n_samples = int(file_size / bytes_per_snap) return Array(blocks, top_left_shape=block_size, reg_shape=block_size, shape=(n_samples, n_cols), sparse=False)
def testDeleteObject2(self): obj_1 = [0] for i in range(10): obj_1[0] = i - 1 obj_2 = increment_object(obj_1) obj_2 = compss_wait_on(obj_2) compss_delete_object(obj_1) self.assertEqual(i, obj_2[0])
def fit(self, x, y=None): """Estimate model parameters with the EM algorithm. Iterates between E-steps and M-steps until convergence or until `max_iter` iterations are reached. It estimates the model parameters `weights_`, `means_` and `covariances_`. Parameters ---------- x : ds-array, shape=(n_samples, n_features) Data points. y : ignored Not used, present here for API consistency by convention. Warns ----- ConvergenceWarning If `tol` is not None and `max_iter` iterations are reached without convergence. """ self._check_initial_parameters() self.converged_ = False self.n_iter = 0 random_state = validation.check_random_state(self.random_state) self._initialize_parameters(x, random_state) self.lower_bound_ = -np.infty if self.verbose: print("GaussianMixture EM algorithm start") for self.n_iter in range(1, self.max_iter + 1): prev_lower_bound = self.lower_bound_ self.lower_bound_, resp = self._e_step(x) self._m_step(x, resp) for resp_block in resp._blocks: compss_delete_object(resp_block) if self.check_convergence: self.lower_bound_ = compss_wait_on(self.lower_bound_) diff = abs(self.lower_bound_ - prev_lower_bound) if self.verbose: iter_msg_template = "Iteration %s - Convergence crit. = %s" print(iter_msg_template % (self.n_iter, diff)) if diff < self.tol: self.converged_ = True break if self.check_convergence and not self.converged_: warnings.warn('The algorithm did not converge. ' 'Try different init parameters, ' 'or increase max_iter, tol ' 'or check for degenerate data.', ConvergenceWarning)
def testDeleteObject3(self): obj_1 = [0] obj_list = [] for i in range(10): obj_1[0] = i - 1 obj_2 = increment_object(obj_1) obj_list.append(obj_2) compss_delete_object(obj_1) obj_list = compss_wait_on(obj_list) self.assertEqual([[i] for i in range(10)], obj_list)
def _do_iteration(self, x, y, ids_list): q = [] pars = self._clf_params arity = self._arity # first level for partition, id_bk in zip(_paired_partition(x, y), ids_list): x_data = partition[0]._blocks y_data = partition[1]._blocks ids = [id_bk] if self._svs is not None: x_data.append(self._svs) y_data.append([self._sv_labels]) ids.append([self._sv_ids]) _tmp = _train(x_data, y_data, ids, self._random_state, **pars) sv, sv_labels, sv_ids, self._clf = _tmp q.append((sv, sv_labels, sv_ids)) # reduction while len(q) > arity: data = q[:arity] del q[:arity] x_data = [tup[0] for tup in data] y_data = [[tup[1]] for tup in data] ids = [[tup[2]] for tup in data] _tmp = _train(x_data, y_data, ids, self._random_state, **pars) sv, sv_labels, sv_ids, self._clf = _tmp q.append((sv, sv_labels, sv_ids)) # delete partial results for partial in data: compss_delete_object(partial) # last layer x_data = [tup[0] for tup in q] y_data = [[tup[1]] for tup in q] ids = [[tup[2]] for tup in q] _tmp = _train(x_data, y_data, ids, self._random_state, **pars) self._svs, self._sv_labels, self._sv_ids, self._clf = _tmp self.iterations += 1
def fit(self, dataset): """Fits the DecisionTreeClassifier. Parameters ---------- dataset : dislib.classification.rf._data.RfDataset """ self.n_features = dataset.get_n_features() self.n_classes = dataset.get_n_classes() samples_path = dataset.samples_path features_path = dataset.features_path n_samples = dataset.get_n_samples() y_codes = dataset.get_y_codes() seed = self.random_state.randint(np.iinfo(np.int32).max) sample, y_s = _sample_selection(n_samples, y_codes, self.bootstrap, seed) self.tree = _Node() self.nodes_info = [] self.subtrees = [] tree_traversal = [(self.tree, sample, y_s, 0)] while tree_traversal: node, sample, y_s, depth = tree_traversal.pop() if depth < self.distr_depth: split = _split_node_wrapper(sample, self.n_features, y_s, self.n_classes, self.try_features, self.random_state, samples_file=samples_path, features_file=features_path) node_info, left_group, y_l, right_group, y_r = split compss_delete_object(sample) compss_delete_object(y_s) node.content = len(self.nodes_info) self.nodes_info.append(node_info) node.left = _Node() node.right = _Node() depth = depth + 1 tree_traversal.append((node.right, right_group, y_r, depth)) tree_traversal.append((node.left, left_group, y_l, depth)) else: subtree = _build_subtree_wrapper( sample, y_s, self.n_features, self.max_depth - depth, self.n_classes, self.try_features, self.sklearn_max, self.random_state, samples_path, features_path) node.content = len(self.subtrees) self.subtrees.append(subtree) compss_delete_object(sample) compss_delete_object(y_s) self.nodes_info = _merge(*self.nodes_info)
def testDeleteObject1(self): obj_1 = [0] obj_2 = increment_object(obj_1) obj_2 = compss_wait_on(obj_2) obj_1_id = OT.get_object_id(obj_1) deletion_result = compss_delete_object(obj_1) self.assertTrue(deletion_result) self.assertFalse(obj_1_id in OT.pending_to_synchronize) self.assertTrue(OT.get_object_id(obj_1) is "")
def testDeleteObject1(self): from pycompss.runtime.binding import pending_to_synchronize, objid_to_filename, get_object_id obj_1 = [0] obj_2 = increment_object(obj_1) obj_2 = compss_wait_on(obj_2) obj_1_id = get_object_id(obj_1, False, False) deletion_result = compss_delete_object(obj_1) self.assertTrue(deletion_result) self.assertFalse(obj_1_id in pending_to_synchronize) self.assertTrue(get_object_id(obj_1, False, False) is None)
def remove_last_rows(a: Array, n_rows): """ Removes last rows from the bottom blocks of the ds-array. Parameters ---------- a : ds-array The array to pad. n_rows : int The array to pad. """ if n_rows <= 0: return right_bottom_shape = compute_bottom_right_shape(a) if n_rows >= right_bottom_shape[0]: # removing whole blocks removed_blocks = int(n_rows / right_bottom_shape[0]) removed_rows = removed_blocks * right_bottom_shape[0] for i in reversed( range(a._n_blocks[0] - removed_blocks, a._n_blocks[0])): compss_delete_object(a._blocks[i]) del a._blocks[i] a._n_blocks = (a._n_blocks[0] - removed_blocks, a._n_blocks[1]) a._shape = (a._shape[0] - removed_rows, a._shape[1]) n_rows = n_rows - removed_rows if n_rows <= 0: return for col_block_idx in range(a._n_blocks[1]): # removing remaining rows padded_block = _remove_bottom_rows(a._blocks[-1][col_block_idx], n_rows) a._blocks[-1][col_block_idx] = padded_block a._shape = (a._shape[0] - n_rows, a._shape[1])
def shuffle(x, y=None, random_state=None): """ Randomly shuffles the rows of data. Parameters ---------- x : ds-array Data to be shuffled. y : ds-array, optional (default=None) Additional array to shuffle using the same permutation, usually for labels or values. It is required that y.shape[0] == x.shape[0]. random_state : int or RandomState, optional (default=None) Seed or numpy.random.RandomState instance to use in the generation of random numbers. Returns ------- x_shuffled : ds-array A new ds-array containing the rows of x shuffled. y_shuffled : ds-array, optional A new ds-array containing the rows of y shuffled using the same permutation. Only provided if y is not None. """ if y is not None: assert y.shape[0] == x.shape[0] np.random.seed(random_state) block_n_rows = x._reg_shape[0] sizes_out = [block_n_rows for _ in range(x._shape[0] // block_n_rows)] remainder = x._shape[0] % block_n_rows if remainder != 0: sizes_out.append(remainder) # Matrix of subsets of rows (subsamples) going from part_in_i to part_out_j mapped_subsamples = [] # For each part_in, get the parts going to each part_out if y is None: partition = x._iterator(axis=0) else: partition = _paired_partition(x, y) for part_in in partition: # part can be an array x_part or a tuple (x_part, y_part) part_sizes, part_in_subsamples = _partition_arrays(part_in, sizes_out) mapped_subsamples.append(part_in_subsamples) sizes_out -= part_sizes x_shuffled_blocks = [] y_shuffled_blocks = [] for j in range(len(sizes_out)): part_out_subsamples = [ part_in_subsamples[j] for part_in_subsamples in mapped_subsamples ] seed = np.random.randint(np.iinfo(np.int32).max) part_out_x_blocks = [{} for _ in range(x._n_blocks[1])] if y is None: _merge_shuffle_x(seed, part_out_subsamples, part_out_x_blocks, x._reg_shape[1]) else: part_out_y_blocks = [{} for _ in range(y._n_blocks[1])] _merge_shuffle_xy(seed, part_out_subsamples, part_out_x_blocks, part_out_y_blocks, x._reg_shape[1], y._reg_shape[1]) y_shuffled_blocks.append(part_out_y_blocks) x_shuffled_blocks.append(part_out_x_blocks) # Clean parts to save disk space for part in part_out_subsamples: compss_delete_object(part) x_shuffled = Array(blocks=x_shuffled_blocks, top_left_shape=x._reg_shape, reg_shape=x._reg_shape, shape=x.shape, sparse=x._sparse) if y is None: return x_shuffled else: y_shuffled = Array(blocks=y_shuffled_blocks, top_left_shape=(x._reg_shape[0], y._reg_shape[1]), reg_shape=(x._reg_shape[0], y._reg_shape[1]), shape=y.shape, sparse=y._sparse) return x_shuffled, y_shuffled
def fit(self, dataset, feature_types): count_num_attr = len([a for a in feature_types if a == "num"]) if count_num_attr > 0: self.epsilon = self.privacy_budget / (( (2 + count_num_attr) * self.max_depth) + 2) self.n_features = dataset.get_n_features() self.n_classes = dataset.get_n_classes() samples_path = dataset.samples_path features_path = dataset.features_path n_samples = dataset.get_n_samples() y_codes = dataset.get_y_codes() seed = self.random_state.randint(np.iinfo(np.int32).max) sample, y_s = _sample_selection(n_samples, y_codes, self.bootstrap, seed) self.tree = _Node() self.nodes_info = [] self.subtrees = [] tree_traversal = [(self.tree, sample, y_s, 0)] while tree_traversal: node, sample, y_s, depth = tree_traversal.pop() # print("depth=", depth, ", self.distr_depth=", self.distr_depth) if depth < self.distr_depth: # print("depth < self.distr_depth = True >>>> _split_node_wrapper") split = _split_node_wrapper(sample, self.n_features, feature_types, y_s, self.n_classes, self.try_features, self.random_state, self.epsilon, samples_file=samples_path, features_file=features_path) node_info, left_group, y_l, right_group, y_r = split compss_delete_object(sample) compss_delete_object(y_s) node.content = len(self.nodes_info) self.nodes_info.append(node_info) node.left = _Node() node.right = _Node() depth = depth + 1 tree_traversal.append((node.right, right_group, y_r, depth)) tree_traversal.append((node.left, left_group, y_l, depth)) else: # print("depth < self.distr_depth = False >>>> _build_subtree_wrapper") subtree = _build_subtree_wrapper( sample, y_s, self.n_features, feature_types, self.max_depth - depth, self.n_classes, self.try_features, self.sklearn_max, self.random_state, samples_path, features_path, self.epsilon) node.content = len(self.subtrees) self.subtrees.append(subtree) compss_delete_object(sample) compss_delete_object(y_s) self.nodes_info = _merge(*self.nodes_info)
def test_dummy_api(): from pycompss.api.dummy.api import compss_start from pycompss.api.dummy.api import compss_stop from pycompss.api.dummy.api import compss_file_exists from pycompss.api.dummy.api import compss_open from pycompss.api.dummy.api import compss_delete_file from pycompss.api.dummy.api import compss_wait_on_file from pycompss.api.dummy.api import compss_wait_on_directory from pycompss.api.dummy.api import compss_delete_object from pycompss.api.dummy.api import compss_barrier from pycompss.api.dummy.api import compss_barrier_group from pycompss.api.dummy.api import compss_wait_on from pycompss.api.dummy.api import compss_get_number_of_resources from pycompss.api.dummy.api import compss_request_resources from pycompss.api.dummy.api import compss_free_resources from pycompss.api.dummy.api import TaskGroup file_name = "simulated_file.txt" file_names = ["simulated_file1.txt", "simulated_file2.txt"] directory_name = "simulated_directory" directory_names = ["simulated_directory1", "simulated_directory2"] group_name = "simulated_group" obj = [1, 2, 3] num_resources = 1 with open(file_name, "w") as f: f.write("some content") os.mkdir(directory_name) for f_name in file_names: with open(f_name, "w") as f: f.write("some content") for d_name in directory_names: os.mkdir(d_name) compss_start(log_level="off", interactive=False) compss_stop(code=0) compss_file_exists(file_name) compss_file_exists(*file_names) compss_open(file_name, mode="r") compss_delete_file(file_name) compss_delete_file(*file_names) compss_wait_on_file(file_name) compss_wait_on_file(*file_names) compss_wait_on_directory(directory_name) compss_wait_on_directory(*directory_names) compss_delete_object(obj) compss_delete_object(*obj) compss_barrier(no_more_tasks=False) compss_barrier_group(group_name) compss_wait_on(obj) compss_wait_on(*obj) compss_get_number_of_resources() compss_request_resources(num_resources, group_name) compss_free_resources(num_resources, group_name) with TaskGroup(group_name, implicit_barrier=True): # Empty task group check pass os.remove(file_name) os.rmdir(directory_name) for f_name in file_names: os.remove(f_name) for d_name in directory_names: os.rmdir(d_name)
def delete_object(obj): # Release compss_delete_object(obj)
def _qr_full(r): b_size = r._reg_shape q, q_type = _gen_identity(r.shape[0], r.shape[0], r._reg_shape, r._n_blocks[0], r._n_blocks[0]) r_type = full((r._n_blocks[0], r._n_blocks[1]), (1, 1), OTHER) for i in range(r._n_blocks[1]): act_q_type, act_q, r_type_block, r_block = _qr(r._blocks[i][i], r_type._blocks[i][i], r._reg_shape, t=True) r_type.replace_block(i, i, r_type_block) r.replace_block(i, i, r_block) for j in range(r._n_blocks[0]): q_type_block, q_block = _dot(q._blocks[j][i], q_type._blocks[j][i], act_q, act_q_type, b_size, transpose_b=True) q_type.replace_block(j, i, q_type_block) q.replace_block(j, i, q_block) for j in range(i + 1, r._n_blocks[1]): r_type_block, r_block = _dot(act_q, act_q_type, r._blocks[i][j], r_type._blocks[i][j], b_size) r_type.replace_block(i, j, r_type_block) r.replace_block(i, j, r_block) compss_delete_object(act_q_type) compss_delete_object(act_q) sub_q = [[np.array([0]), np.array([0])], [np.array([0]), np.array([0])]] sub_q_type = [[_type_block(OTHER), _type_block(OTHER)], [_type_block(OTHER), _type_block(OTHER)]] # Update values of the respective column for j in range(i + 1, r._n_blocks[0]): sub_q[0][0], sub_q[0][1], sub_q[1][0], sub_q[1][1], \ r_type_block1, r_block1, r_type_block2, r_block2 = _little_qr( r._blocks[i][i], r_type._blocks[i][i], r._blocks[j][i], r_type._blocks[j][i], r._reg_shape, transpose=True ) r_type.replace_block(i, i, r_type_block1) r.replace_block(i, i, r_block1) r_type.replace_block(j, i, r_type_block2) r.replace_block(j, i, r_block2) # Update values of the row for the value updated in the column for k in range(i + 1, r._n_blocks[1]): [[r_type_block1], [r_type_block2]], \ [[r_block1], [r_block2]] = _multiply_blocked( sub_q, sub_q_type, [[r._blocks[i][k]], [r._blocks[j][k]]], [[r_type._blocks[i][k]], [r_type._blocks[j][k]]], r._reg_shape ) r_type.replace_block(i, k, r_type_block1) r.replace_block(i, k, r_block1) r_type.replace_block(j, k, r_type_block2) r.replace_block(j, k, r_block2) for k in range(r._n_blocks[0]): [[q_type_block1, q_type_block2]], \ [[q_block1, q_block2]] = _multiply_blocked( [[q._blocks[k][i], q._blocks[k][j]]], [[q_type._blocks[k][i], q_type._blocks[k][j]]], sub_q, sub_q_type, r._reg_shape, transpose_b=True ) q_type.replace_block(k, i, q_type_block1) q.replace_block(k, i, q_block1) q_type.replace_block(k, j, q_type_block2) q.replace_block(k, j, q_block2) compss_delete_object(sub_q[0][0]) compss_delete_object(sub_q[0][1]) compss_delete_object(sub_q[1][0]) compss_delete_object(sub_q[1][1]) return q, r
def _initialize_parameters(self, x, random_state): """Initialization of the Gaussian mixture parameters. Parameters ---------- x : ds-array, shape=(n_samples, n_features) Data points. random_state : RandomState A random number generator instance. """ if self.weights_init is not None: self.weights_ = self.weights_init / np.sum(self.weights_init) if self.means_init is not None: self.means_ = self.means_init if self.precisions_init is not None: if self.covariance_type == 'full': self.precisions_cholesky_ = np.array( [linalg.cholesky(prec_init, lower=True) for prec_init in self.precisions_init]) elif self.covariance_type == 'tied': self.precisions_cholesky_ = linalg.cholesky( self.precisions_init, lower=True) else: self.precisions_cholesky_ = self.precisions_init initialize_params = (self.weights_init is None or self.means_init is None or self.precisions_init is None) if initialize_params: n_components = self.n_components resp_blocks = [] if self.init_params == 'kmeans': if self.verbose: print("KMeans initialization start") seed = random_state.randint(0, int(1e8)) kmeans = KMeans(n_clusters=n_components, random_state=seed, verbose=self.verbose) y = kmeans.fit_predict(x) self.kmeans = kmeans for y_part in y._iterator(axis=0): resp_blocks.append([_resp_subset(y_part._blocks, n_components)]) elif self.init_params == 'random': chunks = x._n_blocks[0] seeds = random_state.randint(np.iinfo(np.int32).max, size=chunks) for i, x_row in enumerate(x._iterator(axis=0)): resp_blocks.append([_random_resp_subset(x_row.shape[0], n_components, seeds[i])]) else: raise ValueError("Unimplemented initialization method '%s'" % self.init_params) resp = Array(blocks=resp_blocks, top_left_shape=(x._top_left_shape[0], n_components), reg_shape=(x._reg_shape[0], n_components), shape=(x.shape[0], n_components), sparse=False) weights, nk, means = self._estimate_parameters(x, resp) if self.means_init is None: self.means_ = means if self.weights_init is None: self.weights_ = weights if self.precisions_init is None: cov, p_c = _estimate_covariances(x, resp, nk, self.means_, self.reg_covar, self.covariance_type, self.arity) self.covariances_ = cov self.precisions_cholesky_ = p_c for resp_block in resp._blocks: compss_delete_object(resp_block)
def main(num_blocks, elems_per_block, check_result, seed, use_storage): """ Matmul main. :param num_blocks: <Integer> Number of blocks :param elems_per_block: <Integer> Number of elements per block :param check_result: <Boolean> Check results against sequential version of matmul :param seed: <Integer> Random seed :param use_storage: <Boolean> Use storage :return: None """ start_time = time.time() # Generate the dataset in a distributed manner # i.e: avoid having the master a whole matrix A, B, C = [], [], [] matrix_name = ["A", "B"] for i in range(num_blocks): for l in [A, B, C]: l.append([]) # Keep track of blockId to initialize with different random seeds bid = 0 for j in range(num_blocks): for ix, l in enumerate([A, B]): psco_name = matrix_name[ix] + str(i) + 'g' + str(j) l[-1].append( generate_block(elems_per_block, num_blocks, seed=seed + bid, use_storage=use_storage, psco_name=psco_name)) bid += 1 C[-1].append( generate_block(elems_per_block, num_blocks, set_to_zero=True, use_storage=False)) compss_barrier() initialization_time = time.time() # Do matrix multiplication dot(A, B, C) compss_barrier() multiplication_time = time.time() if use_storage: # Persist the result in a distributed manner (i.e: exploit data # locality & avoid memory flooding) for i in range(num_blocks): for j in range(num_blocks): psco_name = 'C_' + str(i) + '_' + str(j) persist_result(C[i][j], psco_name) # If we are not going to check the result, we can safely delete # the Cij intermediate matrices if not check_result: compss_delete_object(C[i][j]) compss_barrier() persist_c_time = time.time() else: persist_c_time = multiplication_time # Check if we get the same result if multiplying sequentially (no tasks) # Note that this implies having the whole A and B matrices in the master, # so it is advisable to set --check_result only with small matrices # Explicit correctness (i.e: an actual dot product is performed) must be # checked manually if check_result: for i in range(num_blocks): for j in range(num_blocks): A[i][j] = compss_wait_on(A[i][j]) B[i][j] = compss_wait_on(B[i][j]) for i in range(num_blocks): for j in range(num_blocks): Cij = compss_wait_on(C[i][j]) Dij = generate_block(elems_per_block, num_blocks, use_storage=False, set_to_zero=True) Dij = compss_wait_on(Dij) import numpy as np for k in range(num_blocks): Dij.block += np.dot(A[i][k].block, B[k][j].block) if not np.allclose(Cij.block, Dij.block): print('Block %d-%d gives different products!' % (i, j)) return print('Distributed and sequential results coincide!') print("-----------------------------------------") print("-------------- RESULTS ------------------") print("-----------------------------------------") print("Initialization time: %f" % (initialization_time - start_time)) print("Multiplication time: %f" % (multiplication_time - initialization_time)) print("Persist C time : %f" % (persist_c_time - multiplication_time)) print("Total time: %f" % (persist_c_time - start_time)) print("-----------------------------------------")
def files(): """ Test FILE_IN """ fin = "infile" content = "IN FILE CONTENT" with open(fin, 'w') as f: f.write(content) res = file_in(fin) res = compss_wait_on(res) assert res == content, "strings are not equal: {}, {}".format(res, content) # Check if file exists: file_checker(fin, "IN") # Remove object compss_delete_object(res) """ Test Multiple FILE_IN """ fin_1 = "infile_1" fin_2 = "infile_2" fin_3 = "infile_3" fins = [fin_1, fin_2, fin_3] content = "IN FILE CONTENT" results = [] for fin in fins: with open(fin, 'w') as f: f.write(content) results.append(file_in(fin)) results = compss_wait_on(results) for res in results: assert res == content, "strings are not equal: {}, {}".format(res, content) # Check if file exists: multiple_file_checker(fins, "IN") # Remove objects compss_delete_object(*results) """ Test FILE_INOUT """ finout = "inoutfile" content = "INOUT FILE CONTENT" with open(finout, 'w') as f: f.write(content) res = file_inout(finout) res = compss_wait_on(res) compss_wait_on_file(finout) with compss_open(finout, 'r') as finout_r: content_r = finout_r.read() content += "\n===> INOUT FILE ADDED CONTENT" assert res == content, "strings are not equal: {}, {}".format(res, content) assert content_r == content, "strings are not equal: {}, {}".format(content_r, content) # Check if file exists: file_checker(finout, "INOUT") # Remove object compss_delete_object(res) """ Test Multiple FILE_INOUT """ finout_1 = "inoutfile_1" finout_2 = "inoutfile_2" finout_3 = "inoutfile_3" finouts = [finout_1, finout_2, finout_3] content = "INOUT FILE CONTENT" results = [] for finout in finouts: with open(finout, 'w') as f: f.write(content) results.append(file_inout(finout)) results = compss_wait_on(results) compss_wait_on_file(*finouts) i = 0 content += "\n===> INOUT FILE ADDED CONTENT" for finout in finouts: with compss_open(finout, 'r') as finout_r: content_r = finout_r.read() assert results[i] == content, "strings are not equal: {}, {}".format(results[i], content) assert content_r == content, "strings are not equal: {}, {}".format(content_r, content) i += 1 # Check if file exists: multiple_file_checker(finouts, "INOUT") # Remove objects compss_delete_object(*results) """ Test FILE_OUT """ fout = "outfile" content = "OUT FILE CONTENT" res = file_out(fout, content) res = compss_wait_on(res) compss_wait_on_file(fout) with compss_open(fout, 'r') as fout_r: content_r = fout_r.read() # The final file is only stored after the execution. # During the execution, you have to use the compss_open, which will # provide the real file where the output file is. # fileInFolder = os.path.exists(fout) # assert fileInFolder is True, "FILE_OUT is not in the final location" assert res == content, "strings are not equal: {}, {}".format(res, content) assert content_r == content, "strings are not equal: {}, {}".format(content_r, content) # Check if file exists: file_checker(fout, "OUT") # Remove object compss_delete_object(res) """ Test Multiple FILE_OUT """ fout_1 = "outfile_1" fout_2 = "outfile_2" fout_3 = "outfile_3" fouts = [fout_1, fout_2, fout_3] content = "OUT FILE CONTENT" results = [] for fout in fouts: results.append(file_out(fout, content)) results = compss_wait_on(results) compss_wait_on_file(*fouts) i = 0 for fout in fouts: with compss_open(fout, 'r') as fout_r: content_r = fout_r.read() # The final file is only stored after the execution. # During the execution, you have to use the compss_open, which will # provide the real file where the output file is. # fileInFolder = os.path.exists(fout) # assert fileInFolder is True, "FILE_OUT is not in the final location" assert results[i] == content, "strings are not equal: {}, {}".format(results[i], content) assert content_r == content, "strings are not equal: {}, {}".format(content_r, content) i += 1 # Check if file exists: file_checker(fout, "OUT") # Remove object compss_delete_object(*results)
def delete_object(*objs): # Release for obj in objs: compss_delete_object(obj)
def _qr_economic(r): a_shape = (r.shape[0], r.shape[1]) a_n_blocks = (r._n_blocks[0], r._n_blocks[1]) b_size = r._reg_shape q, q_type = _gen_identity(r.shape[0], a_shape[1], b_size, r._n_blocks[0], r._n_blocks[1]) r_type = full((r._n_blocks[0], r._n_blocks[1]), (1, 1), OTHER) act_q_list = [] sub_q_list = {} for i in range(a_n_blocks[1]): act_q_type, act_q, r_type_block, r_block = _qr(r._blocks[i][i], r_type._blocks[i][i], b_size, t=True) r_type.replace_block(i, i, r_type_block) r.replace_block(i, i, r_block) act_q_list.append((act_q_type, act_q)) for j in range(i + 1, a_n_blocks[1]): r_type_block, r_block = _dot(act_q, act_q_type, r._blocks[i][j], r_type._blocks[i][j], b_size) r_type.replace_block(i, j, r_type_block) r.replace_block(i, j, r_block) # Update values of the respective column for j in range(i + 1, r._n_blocks[0]): sub_q = [[np.array([0]), np.array([0])], [np.array([0]), np.array([0])]] sub_q_type = [[_type_block(OTHER), _type_block(OTHER)], [_type_block(OTHER), _type_block(OTHER)]] sub_q[0][0], sub_q[0][1], sub_q[1][0], sub_q[1][1], \ r_type_block1, r_block1, \ r_type_block2, r_block2 = _little_qr( r._blocks[i][i], r_type._blocks[i][i], r._blocks[j][i], r_type._blocks[j][i], b_size, transpose=True ) r_type.replace_block(i, i, r_type_block1) r.replace_block(i, i, r_block1) r_type.replace_block(j, i, r_type_block2) r.replace_block(j, i, r_block2) sub_q_list[(j, i)] = (sub_q_type, sub_q) # Update values of the row for the value updated in the column for k in range(i + 1, a_n_blocks[1]): [[r_type_block1], [r_type_block2]], \ [[r_block1], [r_block2]] = _multiply_blocked( sub_q, sub_q_type, [[r._blocks[i][k]], [r._blocks[j][k]]], [[r_type._blocks[i][k]], [r_type._blocks[j][k]]], b_size ) r_type.replace_block(i, k, r_type_block1) r.replace_block(i, k, r_block1) r_type.replace_block(j, k, r_type_block2) r.replace_block(j, k, r_block2) for i in reversed(range(len(act_q_list))): for j in reversed(range(i + 1, r._n_blocks[0])): for k in range(q._n_blocks[1]): [[q_type_block1], [q_type_block2]], \ [[q_block1], [q_block2]] = _multiply_blocked( sub_q_list[(j, i)][1], sub_q_list[(j, i)][0], [[q._blocks[i][k]], [q._blocks[j][k]]], [[q_type._blocks[i][k]], [q_type._blocks[j][k]]], b_size, transpose_a=True ) q_type.replace_block(i, k, q_type_block1) q.replace_block(i, k, q_block1) q_type.replace_block(j, k, q_type_block2) q.replace_block(j, k, q_block2) compss_delete_object(sub_q_list[(j, i)][0][0]) compss_delete_object(sub_q_list[(j, i)][0][1]) compss_delete_object(sub_q_list[(j, i)][1][0]) compss_delete_object(sub_q_list[(j, i)][1][1]) del sub_q_list[(j, i)] for k in range(q._n_blocks[1]): q_type_block, q_block = _dot(act_q_list[i][1], act_q_list[i][0], q._blocks[i][k], q_type._blocks[i][k], b_size, transpose_a=True) q_type.replace_block(i, k, q_type_block) q.replace_block(i, k, q_block) compss_delete_object(act_q_list[i][0]) compss_delete_object(act_q_list[i][1]) # removing last rows of r to make it n x n instead of m x n remove_last_rows(r, r.shape[0] - r.shape[1]) return q, r
def kron(a, b, block_size=None): """ Kronecker product of two ds-arrays. Parameters ---------- a, b : ds-arrays Input ds-arrays. block_size : tuple of two ints, optional Block size of the resulting array. Defaults to the block size of `b`. Returns ------- out : ds-array Raises ------ NotImplementedError If a or b are sparse. """ if a._sparse or b._sparse: raise NotImplementedError("Sparse ds-arrays not supported.") k_n_blocks = ((a.shape[0] * b._n_blocks[0]), a.shape[1] * b._n_blocks[1]) k_blocks = Array._get_out_blocks(k_n_blocks) # compute the kronecker product by multipliying b by each element in a. # The resulting array keeps the block structure of b repeated many # times. This is why we need to rechunk it at the end. offseti = 0 for i in range(a._n_blocks[0]): offsetj = 0 for j in range(a._n_blocks[1]): bshape_a = a._get_block_shape(i, j) for k in range(b._n_blocks[0]): for q in range(b._n_blocks[1]): out_blocks = Array._get_out_blocks(bshape_a) _kron(a._blocks[i][j], b._blocks[k][q], out_blocks) for m in range(bshape_a[0]): for n in range(bshape_a[1]): bi = (offseti + m) * b._n_blocks[0] + k bj = (offsetj + n) * b._n_blocks[1] + q k_blocks[bi][bj] = out_blocks[m][n] offsetj += bshape_a[1] offseti += bshape_a[0] shape = (a.shape[0] * b.shape[0], a.shape[1] * b.shape[1]) if not block_size: bsize = b._reg_shape else: bsize = block_size # rechunk the array unless all blocks of b are of the same size and # block_size is None if (not block_size or block_size == b._reg_shape) and ( b.shape[0] % b._reg_shape[0] == 0 and b.shape[1] % b._reg_shape[1] == 0 and b._is_regular()): return Array(k_blocks, bsize, bsize, shape, False) else: out = Array._rechunk(k_blocks, shape, bsize, _kron_shape_f, b) for blocks in k_blocks: for block in blocks: compss_delete_object(block) return out