예제 #1
0
def _compute_u_sorted(a, sorting):
    u_blocks = [[] for _ in range(a._n_blocks[1])]
    hbsize = a._reg_shape[1]

    for i, vblock in enumerate(a._iterator("columns")):
        u_block = [object() for _ in range(a._n_blocks[1])]
        _compute_u_block_sorted(vblock._blocks, i, hbsize, sorting, u_block)

        for j in range(len(u_block)):
            u_blocks[j].append(u_block[j])

    vbsize = a._reg_shape[0]
    final_blocks = Array._get_out_blocks(a._n_blocks)

    for i, u_block in enumerate(u_blocks):
        new_block = [object() for _ in range(a._n_blocks[0])]
        _merge_svd_block(u_block, i, hbsize, vbsize, sorting, new_block)

        for j in range(len(new_block)):
            final_blocks[j][i] = new_block[j]

        for elem in u_block:
            compss_delete_object(elem)

    return Array(final_blocks, a._top_left_shape, a._reg_shape, a.shape,
                 a._sparse)
예제 #2
0
def _sort_v(v, sorting):
    v_blocks = [[] for _ in range(v._n_blocks[1])]
    hbsize = v._reg_shape[1]

    for i, vblock in enumerate(v._iterator("columns")):
        out_blocks = [[] for _ in range(v._n_blocks[1])]
        _sort_v_block(vblock._blocks, i, hbsize, sorting, out_blocks)

        for j in range(len(out_blocks)):
            v_blocks[j].append(out_blocks[j])

    vbsize = v._reg_shape[0]
    final_blocks = Array._get_out_blocks(v._n_blocks)

    for i, v_block in enumerate(v_blocks):
        new_block = [object() for _ in range(v._n_blocks[0])]
        _merge_svd_block(v_block, i, hbsize, vbsize, sorting, new_block)

        for j in range(len(new_block)):
            final_blocks[j][i] = new_block[j]

        for elem in v_block:
            compss_delete_object(elem)

    return Array(final_blocks, v._top_left_shape, v._reg_shape, v.shape,
                 v._sparse)
예제 #3
0
파일: io.py 프로젝트: vibhatha/dislib
def _load_mdcrd(path, block_size, n_cols, n_blocks, bytes_per_snap,
                bytes_per_block):
    blocks = []

    file_size = os.stat(path).st_size - _CRD_LINE_SIZE

    try:
        fid = open(path, "rb")
        fid.read(_CRD_LINE_SIZE)  # skip header

        for _ in range(0, file_size, bytes_per_block):
            data = fid.read(bytes_per_block)
            out_blocks = [object() for _ in range(n_blocks)]
            _read_crd_bytes(data, block_size[1], n_cols, out_blocks)
            compss_delete_object(data)
            blocks.append(out_blocks)
    finally:
        fid.close()

    n_samples = int(file_size / bytes_per_snap)

    return Array(blocks,
                 top_left_shape=block_size,
                 reg_shape=block_size,
                 shape=(n_samples, n_cols),
                 sparse=False)
예제 #4
0
 def testDeleteObject2(self):
     obj_1 = [0]
     for i in range(10):
         obj_1[0] = i - 1
         obj_2 = increment_object(obj_1)
         obj_2 = compss_wait_on(obj_2)
         compss_delete_object(obj_1)
         self.assertEqual(i, obj_2[0])
예제 #5
0
    def fit(self, x, y=None):
        """Estimate model parameters with the EM algorithm.

        Iterates between E-steps and M-steps until convergence or until
        `max_iter` iterations are reached. It estimates the model parameters
        `weights_`, `means_` and `covariances_`.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            Data points.
        y : ignored
            Not used, present here for API consistency by convention.

        Warns
        -----
        ConvergenceWarning
            If `tol` is not None and `max_iter` iterations are reached without
            convergence.
        """
        self._check_initial_parameters()

        self.converged_ = False
        self.n_iter = 0

        random_state = validation.check_random_state(self.random_state)

        self._initialize_parameters(x, random_state)
        self.lower_bound_ = -np.infty
        if self.verbose:
            print("GaussianMixture EM algorithm start")
        for self.n_iter in range(1, self.max_iter + 1):
            prev_lower_bound = self.lower_bound_

            self.lower_bound_, resp = self._e_step(x)
            self._m_step(x, resp)
            for resp_block in resp._blocks:
                compss_delete_object(resp_block)

            if self.check_convergence:
                self.lower_bound_ = compss_wait_on(self.lower_bound_)
                diff = abs(self.lower_bound_ - prev_lower_bound)

                if self.verbose:
                    iter_msg_template = "Iteration %s - Convergence crit. = %s"
                    print(iter_msg_template % (self.n_iter, diff))

                if diff < self.tol:
                    self.converged_ = True
                    break

        if self.check_convergence and not self.converged_:
            warnings.warn('The algorithm did not converge. '
                          'Try different init parameters, '
                          'or increase max_iter, tol '
                          'or check for degenerate data.',
                          ConvergenceWarning)
예제 #6
0
 def testDeleteObject3(self):
     obj_1 = [0]
     obj_list = []
     for i in range(10):
         obj_1[0] = i - 1
         obj_2 = increment_object(obj_1)
         obj_list.append(obj_2)
         compss_delete_object(obj_1)
     obj_list = compss_wait_on(obj_list)
     self.assertEqual([[i] for i in range(10)], obj_list)
예제 #7
0
파일: base.py 프로젝트: sravyasri/dislib
    def _do_iteration(self, x, y, ids_list):
        q = []
        pars = self._clf_params
        arity = self._arity

        # first level
        for partition, id_bk in zip(_paired_partition(x, y), ids_list):
            x_data = partition[0]._blocks
            y_data = partition[1]._blocks
            ids = [id_bk]

            if self._svs is not None:
                x_data.append(self._svs)
                y_data.append([self._sv_labels])
                ids.append([self._sv_ids])

            _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
            sv, sv_labels, sv_ids, self._clf = _tmp
            q.append((sv, sv_labels, sv_ids))

        # reduction
        while len(q) > arity:
            data = q[:arity]
            del q[:arity]

            x_data = [tup[0] for tup in data]
            y_data = [[tup[1]] for tup in data]
            ids = [[tup[2]] for tup in data]

            _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
            sv, sv_labels, sv_ids, self._clf = _tmp
            q.append((sv, sv_labels, sv_ids))

            # delete partial results
            for partial in data:
                compss_delete_object(partial)

        # last layer
        x_data = [tup[0] for tup in q]
        y_data = [[tup[1]] for tup in q]
        ids = [[tup[2]] for tup in q]

        _tmp = _train(x_data, y_data, ids, self._random_state, **pars)
        self._svs, self._sv_labels, self._sv_ids, self._clf = _tmp

        self.iterations += 1
예제 #8
0
    def fit(self, dataset):
        """Fits the DecisionTreeClassifier.

        Parameters
        ----------
        dataset : dislib.classification.rf._data.RfDataset

        """

        self.n_features = dataset.get_n_features()
        self.n_classes = dataset.get_n_classes()
        samples_path = dataset.samples_path
        features_path = dataset.features_path
        n_samples = dataset.get_n_samples()
        y_codes = dataset.get_y_codes()

        seed = self.random_state.randint(np.iinfo(np.int32).max)

        sample, y_s = _sample_selection(n_samples, y_codes, self.bootstrap,
                                        seed)

        self.tree = _Node()
        self.nodes_info = []
        self.subtrees = []
        tree_traversal = [(self.tree, sample, y_s, 0)]
        while tree_traversal:
            node, sample, y_s, depth = tree_traversal.pop()
            if depth < self.distr_depth:
                split = _split_node_wrapper(sample,
                                            self.n_features,
                                            y_s,
                                            self.n_classes,
                                            self.try_features,
                                            self.random_state,
                                            samples_file=samples_path,
                                            features_file=features_path)
                node_info, left_group, y_l, right_group, y_r = split
                compss_delete_object(sample)
                compss_delete_object(y_s)
                node.content = len(self.nodes_info)
                self.nodes_info.append(node_info)
                node.left = _Node()
                node.right = _Node()
                depth = depth + 1
                tree_traversal.append((node.right, right_group, y_r, depth))
                tree_traversal.append((node.left, left_group, y_l, depth))
            else:
                subtree = _build_subtree_wrapper(
                    sample, y_s, self.n_features, self.max_depth - depth,
                    self.n_classes, self.try_features, self.sklearn_max,
                    self.random_state, samples_path, features_path)
                node.content = len(self.subtrees)
                self.subtrees.append(subtree)
                compss_delete_object(sample)
                compss_delete_object(y_s)
        self.nodes_info = _merge(*self.nodes_info)
예제 #9
0
 def testDeleteObject1(self):
     obj_1 = [0]
     obj_2 = increment_object(obj_1)
     obj_2 = compss_wait_on(obj_2)
     obj_1_id = OT.get_object_id(obj_1)
     deletion_result = compss_delete_object(obj_1)
     self.assertTrue(deletion_result)
     self.assertFalse(obj_1_id in OT.pending_to_synchronize)
     self.assertTrue(OT.get_object_id(obj_1) is "")
예제 #10
0
 def testDeleteObject1(self):
     from pycompss.runtime.binding import pending_to_synchronize, objid_to_filename, get_object_id
     obj_1 = [0]
     obj_2 = increment_object(obj_1)
     obj_2 = compss_wait_on(obj_2)
     obj_1_id = get_object_id(obj_1, False, False)
     deletion_result = compss_delete_object(obj_1)
     self.assertTrue(deletion_result)
     self.assertFalse(obj_1_id in pending_to_synchronize)
     self.assertTrue(get_object_id(obj_1, False, False) is None)
예제 #11
0
def remove_last_rows(a: Array, n_rows):
    """
        Removes last rows from the bottom blocks of the ds-array.
        Parameters
        ----------
        a : ds-array
            The array to pad.
        n_rows : int
            The array to pad.
    """
    if n_rows <= 0:
        return

    right_bottom_shape = compute_bottom_right_shape(a)

    if n_rows >= right_bottom_shape[0]:
        # removing whole blocks
        removed_blocks = int(n_rows / right_bottom_shape[0])
        removed_rows = removed_blocks * right_bottom_shape[0]
        for i in reversed(
                range(a._n_blocks[0] - removed_blocks, a._n_blocks[0])):
            compss_delete_object(a._blocks[i])
            del a._blocks[i]

        a._n_blocks = (a._n_blocks[0] - removed_blocks, a._n_blocks[1])
        a._shape = (a._shape[0] - removed_rows, a._shape[1])
        n_rows = n_rows - removed_rows

    if n_rows <= 0:
        return

    for col_block_idx in range(a._n_blocks[1]):
        # removing remaining rows
        padded_block = _remove_bottom_rows(a._blocks[-1][col_block_idx],
                                           n_rows)
        a._blocks[-1][col_block_idx] = padded_block

    a._shape = (a._shape[0] - n_rows, a._shape[1])
예제 #12
0
def shuffle(x, y=None, random_state=None):
    """ Randomly shuffles the rows of data.

    Parameters
    ----------
    x : ds-array
        Data to be shuffled.
    y : ds-array, optional (default=None)
        Additional array to shuffle using the same permutation, usually for
        labels or values. It is required that y.shape[0] == x.shape[0].
    random_state : int or RandomState, optional (default=None)
        Seed or numpy.random.RandomState instance to use in the generation of
        random numbers.

    Returns
    -------
    x_shuffled : ds-array
        A new ds-array containing the rows of x shuffled.
    y_shuffled : ds-array, optional
        A new ds-array containing the rows of y shuffled using the same
        permutation. Only provided if y is not None.
    """
    if y is not None:
        assert y.shape[0] == x.shape[0]

    np.random.seed(random_state)
    block_n_rows = x._reg_shape[0]
    sizes_out = [block_n_rows for _ in range(x._shape[0] // block_n_rows)]
    remainder = x._shape[0] % block_n_rows
    if remainder != 0:
        sizes_out.append(remainder)

    # Matrix of subsets of rows (subsamples) going from part_in_i to part_out_j
    mapped_subsamples = []

    # For each part_in, get the parts going to each part_out
    if y is None:
        partition = x._iterator(axis=0)
    else:
        partition = _paired_partition(x, y)
    for part_in in partition:
        # part can be an array x_part or a tuple (x_part, y_part)
        part_sizes, part_in_subsamples = _partition_arrays(part_in, sizes_out)
        mapped_subsamples.append(part_in_subsamples)
        sizes_out -= part_sizes

    x_shuffled_blocks = []
    y_shuffled_blocks = []
    for j in range(len(sizes_out)):
        part_out_subsamples = [
            part_in_subsamples[j] for part_in_subsamples in mapped_subsamples
        ]
        seed = np.random.randint(np.iinfo(np.int32).max)
        part_out_x_blocks = [{} for _ in range(x._n_blocks[1])]
        if y is None:
            _merge_shuffle_x(seed, part_out_subsamples, part_out_x_blocks,
                             x._reg_shape[1])
        else:
            part_out_y_blocks = [{} for _ in range(y._n_blocks[1])]
            _merge_shuffle_xy(seed, part_out_subsamples, part_out_x_blocks,
                              part_out_y_blocks, x._reg_shape[1],
                              y._reg_shape[1])
            y_shuffled_blocks.append(part_out_y_blocks)
        x_shuffled_blocks.append(part_out_x_blocks)

        # Clean parts to save disk space
        for part in part_out_subsamples:
            compss_delete_object(part)

    x_shuffled = Array(blocks=x_shuffled_blocks,
                       top_left_shape=x._reg_shape,
                       reg_shape=x._reg_shape,
                       shape=x.shape,
                       sparse=x._sparse)
    if y is None:
        return x_shuffled
    else:
        y_shuffled = Array(blocks=y_shuffled_blocks,
                           top_left_shape=(x._reg_shape[0], y._reg_shape[1]),
                           reg_shape=(x._reg_shape[0], y._reg_shape[1]),
                           shape=y.shape,
                           sparse=y._sparse)
        return x_shuffled, y_shuffled
    def fit(self, dataset, feature_types):

        count_num_attr = len([a for a in feature_types if a == "num"])

        if count_num_attr > 0:
            self.epsilon = self.privacy_budget / ((
                (2 + count_num_attr) * self.max_depth) + 2)

        self.n_features = dataset.get_n_features()
        self.n_classes = dataset.get_n_classes()
        samples_path = dataset.samples_path
        features_path = dataset.features_path
        n_samples = dataset.get_n_samples()
        y_codes = dataset.get_y_codes()

        seed = self.random_state.randint(np.iinfo(np.int32).max)

        sample, y_s = _sample_selection(n_samples, y_codes, self.bootstrap,
                                        seed)

        self.tree = _Node()
        self.nodes_info = []
        self.subtrees = []
        tree_traversal = [(self.tree, sample, y_s, 0)]

        while tree_traversal:
            node, sample, y_s, depth = tree_traversal.pop()

            # print("depth=", depth, ", self.distr_depth=", self.distr_depth)

            if depth < self.distr_depth:
                # print("depth < self.distr_depth = True >>>> _split_node_wrapper")

                split = _split_node_wrapper(sample,
                                            self.n_features,
                                            feature_types,
                                            y_s,
                                            self.n_classes,
                                            self.try_features,
                                            self.random_state,
                                            self.epsilon,
                                            samples_file=samples_path,
                                            features_file=features_path)

                node_info, left_group, y_l, right_group, y_r = split
                compss_delete_object(sample)
                compss_delete_object(y_s)
                node.content = len(self.nodes_info)
                self.nodes_info.append(node_info)
                node.left = _Node()
                node.right = _Node()
                depth = depth + 1
                tree_traversal.append((node.right, right_group, y_r, depth))
                tree_traversal.append((node.left, left_group, y_l, depth))
            else:
                # print("depth < self.distr_depth = False >>>> _build_subtree_wrapper")

                subtree = _build_subtree_wrapper(
                    sample, y_s, self.n_features, feature_types,
                    self.max_depth - depth, self.n_classes, self.try_features,
                    self.sklearn_max, self.random_state, samples_path,
                    features_path, self.epsilon)

                node.content = len(self.subtrees)
                self.subtrees.append(subtree)
                compss_delete_object(sample)
                compss_delete_object(y_s)

        self.nodes_info = _merge(*self.nodes_info)
예제 #14
0
파일: test_api.py 프로젝트: bsc-wdc/compss
def test_dummy_api():
    from pycompss.api.dummy.api import compss_start
    from pycompss.api.dummy.api import compss_stop
    from pycompss.api.dummy.api import compss_file_exists
    from pycompss.api.dummy.api import compss_open
    from pycompss.api.dummy.api import compss_delete_file
    from pycompss.api.dummy.api import compss_wait_on_file
    from pycompss.api.dummy.api import compss_wait_on_directory
    from pycompss.api.dummy.api import compss_delete_object
    from pycompss.api.dummy.api import compss_barrier
    from pycompss.api.dummy.api import compss_barrier_group
    from pycompss.api.dummy.api import compss_wait_on
    from pycompss.api.dummy.api import compss_get_number_of_resources
    from pycompss.api.dummy.api import compss_request_resources
    from pycompss.api.dummy.api import compss_free_resources
    from pycompss.api.dummy.api import TaskGroup

    file_name = "simulated_file.txt"
    file_names = ["simulated_file1.txt", "simulated_file2.txt"]
    directory_name = "simulated_directory"
    directory_names = ["simulated_directory1", "simulated_directory2"]
    group_name = "simulated_group"
    obj = [1, 2, 3]
    num_resources = 1

    with open(file_name, "w") as f:
        f.write("some content")
    os.mkdir(directory_name)

    for f_name in file_names:
        with open(f_name, "w") as f:
            f.write("some content")
    for d_name in directory_names:
        os.mkdir(d_name)

    compss_start(log_level="off", interactive=False)
    compss_stop(code=0)
    compss_file_exists(file_name)
    compss_file_exists(*file_names)
    compss_open(file_name, mode="r")
    compss_delete_file(file_name)
    compss_delete_file(*file_names)
    compss_wait_on_file(file_name)
    compss_wait_on_file(*file_names)
    compss_wait_on_directory(directory_name)
    compss_wait_on_directory(*directory_names)
    compss_delete_object(obj)
    compss_delete_object(*obj)
    compss_barrier(no_more_tasks=False)
    compss_barrier_group(group_name)
    compss_wait_on(obj)
    compss_wait_on(*obj)
    compss_get_number_of_resources()
    compss_request_resources(num_resources, group_name)
    compss_free_resources(num_resources, group_name)

    with TaskGroup(group_name, implicit_barrier=True):
        # Empty task group check
        pass

    os.remove(file_name)
    os.rmdir(directory_name)

    for f_name in file_names:
        os.remove(f_name)
    for d_name in directory_names:
        os.rmdir(d_name)
예제 #15
0
def delete_object(obj):  # Release
    compss_delete_object(obj)
예제 #16
0
def _qr_full(r):
    b_size = r._reg_shape
    q, q_type = _gen_identity(r.shape[0], r.shape[0], r._reg_shape,
                              r._n_blocks[0], r._n_blocks[0])

    r_type = full((r._n_blocks[0], r._n_blocks[1]), (1, 1), OTHER)

    for i in range(r._n_blocks[1]):
        act_q_type, act_q, r_type_block, r_block = _qr(r._blocks[i][i],
                                                       r_type._blocks[i][i],
                                                       r._reg_shape,
                                                       t=True)
        r_type.replace_block(i, i, r_type_block)
        r.replace_block(i, i, r_block)

        for j in range(r._n_blocks[0]):
            q_type_block, q_block = _dot(q._blocks[j][i],
                                         q_type._blocks[j][i],
                                         act_q,
                                         act_q_type,
                                         b_size,
                                         transpose_b=True)
            q_type.replace_block(j, i, q_type_block)
            q.replace_block(j, i, q_block)

        for j in range(i + 1, r._n_blocks[1]):
            r_type_block, r_block = _dot(act_q, act_q_type, r._blocks[i][j],
                                         r_type._blocks[i][j], b_size)
            r_type.replace_block(i, j, r_type_block)
            r.replace_block(i, j, r_block)

        compss_delete_object(act_q_type)
        compss_delete_object(act_q)

        sub_q = [[np.array([0]), np.array([0])],
                 [np.array([0]), np.array([0])]]
        sub_q_type = [[_type_block(OTHER),
                       _type_block(OTHER)],
                      [_type_block(OTHER),
                       _type_block(OTHER)]]

        # Update values of the respective column
        for j in range(i + 1, r._n_blocks[0]):
            sub_q[0][0], sub_q[0][1], sub_q[1][0], sub_q[1][1], \
              r_type_block1, r_block1, r_type_block2, r_block2 = _little_qr(
                r._blocks[i][i],
                r_type._blocks[i][i],
                r._blocks[j][i],
                r_type._blocks[j][i],
                r._reg_shape,
                transpose=True
            )
            r_type.replace_block(i, i, r_type_block1)
            r.replace_block(i, i, r_block1)
            r_type.replace_block(j, i, r_type_block2)
            r.replace_block(j, i, r_block2)

            # Update values of the row for the value updated in the column
            for k in range(i + 1, r._n_blocks[1]):
                [[r_type_block1], [r_type_block2]], \
                  [[r_block1], [r_block2]] = _multiply_blocked(
                    sub_q,
                    sub_q_type,
                    [[r._blocks[i][k]], [r._blocks[j][k]]],
                    [[r_type._blocks[i][k]], [r_type._blocks[j][k]]],
                    r._reg_shape
                )
                r_type.replace_block(i, k, r_type_block1)
                r.replace_block(i, k, r_block1)
                r_type.replace_block(j, k, r_type_block2)
                r.replace_block(j, k, r_block2)

            for k in range(r._n_blocks[0]):
                [[q_type_block1, q_type_block2]], \
                  [[q_block1, q_block2]] = _multiply_blocked(
                    [[q._blocks[k][i], q._blocks[k][j]]],
                    [[q_type._blocks[k][i], q_type._blocks[k][j]]],
                    sub_q,
                    sub_q_type,
                    r._reg_shape,
                    transpose_b=True
                )
                q_type.replace_block(k, i, q_type_block1)
                q.replace_block(k, i, q_block1)
                q_type.replace_block(k, j, q_type_block2)
                q.replace_block(k, j, q_block2)

            compss_delete_object(sub_q[0][0])
            compss_delete_object(sub_q[0][1])
            compss_delete_object(sub_q[1][0])
            compss_delete_object(sub_q[1][1])

    return q, r
예제 #17
0
    def _initialize_parameters(self, x, random_state):
        """Initialization of the Gaussian mixture parameters.

        Parameters
        ----------
        x : ds-array, shape=(n_samples, n_features)
            Data points.

        random_state : RandomState
            A random number generator instance.
        """
        if self.weights_init is not None:
            self.weights_ = self.weights_init / np.sum(self.weights_init)
        if self.means_init is not None:
            self.means_ = self.means_init
        if self.precisions_init is not None:
            if self.covariance_type == 'full':
                self.precisions_cholesky_ = np.array(
                    [linalg.cholesky(prec_init, lower=True)
                     for prec_init in self.precisions_init])
            elif self.covariance_type == 'tied':
                self.precisions_cholesky_ = linalg.cholesky(
                    self.precisions_init, lower=True)
            else:
                self.precisions_cholesky_ = self.precisions_init
        initialize_params = (self.weights_init is None or
                             self.means_init is None or
                             self.precisions_init is None)
        if initialize_params:
            n_components = self.n_components
            resp_blocks = []
            if self.init_params == 'kmeans':
                if self.verbose:
                    print("KMeans initialization start")
                seed = random_state.randint(0, int(1e8))
                kmeans = KMeans(n_clusters=n_components, random_state=seed,
                                verbose=self.verbose)
                y = kmeans.fit_predict(x)
                self.kmeans = kmeans
                for y_part in y._iterator(axis=0):
                    resp_blocks.append([_resp_subset(y_part._blocks,
                                                     n_components)])

            elif self.init_params == 'random':
                chunks = x._n_blocks[0]
                seeds = random_state.randint(np.iinfo(np.int32).max,
                                             size=chunks)
                for i, x_row in enumerate(x._iterator(axis=0)):
                    resp_blocks.append([_random_resp_subset(x_row.shape[0],
                                                            n_components,
                                                            seeds[i])])
            else:
                raise ValueError("Unimplemented initialization method '%s'"
                                 % self.init_params)
            resp = Array(blocks=resp_blocks,
                         top_left_shape=(x._top_left_shape[0], n_components),
                         reg_shape=(x._reg_shape[0], n_components),
                         shape=(x.shape[0], n_components), sparse=False)
            weights, nk, means = self._estimate_parameters(x, resp)
            if self.means_init is None:
                self.means_ = means
            if self.weights_init is None:
                self.weights_ = weights

            if self.precisions_init is None:
                cov, p_c = _estimate_covariances(x, resp, nk,
                                                 self.means_, self.reg_covar,
                                                 self.covariance_type,
                                                 self.arity)
                self.covariances_ = cov
                self.precisions_cholesky_ = p_c

            for resp_block in resp._blocks:
                compss_delete_object(resp_block)
예제 #18
0
파일: matmul.py 프로젝트: bsc-wdc/apps
def main(num_blocks, elems_per_block, check_result, seed, use_storage):
    """
    Matmul main.
    :param num_blocks: <Integer> Number of blocks
    :param elems_per_block: <Integer> Number of elements per block
    :param check_result: <Boolean> Check results against sequential version
                         of matmul
    :param seed: <Integer> Random seed
    :param use_storage: <Boolean> Use storage
    :return: None
    """
    start_time = time.time()

    # Generate the dataset in a distributed manner
    # i.e: avoid having the master a whole matrix
    A, B, C = [], [], []
    matrix_name = ["A", "B"]
    for i in range(num_blocks):
        for l in [A, B, C]:
            l.append([])
        # Keep track of blockId to initialize with different random seeds
        bid = 0
        for j in range(num_blocks):
            for ix, l in enumerate([A, B]):
                psco_name = matrix_name[ix] + str(i) + 'g' + str(j)
                l[-1].append(
                    generate_block(elems_per_block,
                                   num_blocks,
                                   seed=seed + bid,
                                   use_storage=use_storage,
                                   psco_name=psco_name))
                bid += 1
            C[-1].append(
                generate_block(elems_per_block,
                               num_blocks,
                               set_to_zero=True,
                               use_storage=False))
    compss_barrier()
    initialization_time = time.time()

    # Do matrix multiplication
    dot(A, B, C)

    compss_barrier()
    multiplication_time = time.time()

    if use_storage:
        # Persist the result in a distributed manner (i.e: exploit data
        # locality & avoid memory flooding)
        for i in range(num_blocks):
            for j in range(num_blocks):
                psco_name = 'C_' + str(i) + '_' + str(j)
                persist_result(C[i][j], psco_name)
                # If we are not going to check the result, we can safely delete
                # the Cij intermediate matrices
                if not check_result:
                    compss_delete_object(C[i][j])
        compss_barrier()
        persist_c_time = time.time()
    else:
        persist_c_time = multiplication_time

    # Check if we get the same result if multiplying sequentially (no tasks)
    # Note that this implies having the whole A and B matrices in the master,
    # so it is advisable to set --check_result only with small matrices
    # Explicit correctness (i.e: an actual dot product is performed) must be
    # checked manually
    if check_result:
        for i in range(num_blocks):
            for j in range(num_blocks):
                A[i][j] = compss_wait_on(A[i][j])
                B[i][j] = compss_wait_on(B[i][j])
        for i in range(num_blocks):
            for j in range(num_blocks):
                Cij = compss_wait_on(C[i][j])
                Dij = generate_block(elems_per_block,
                                     num_blocks,
                                     use_storage=False,
                                     set_to_zero=True)
                Dij = compss_wait_on(Dij)
                import numpy as np
                for k in range(num_blocks):
                    Dij.block += np.dot(A[i][k].block, B[k][j].block)
                if not np.allclose(Cij.block, Dij.block):
                    print('Block %d-%d gives different products!' % (i, j))
                    return
        print('Distributed and sequential results coincide!')

    print("-----------------------------------------")
    print("-------------- RESULTS ------------------")
    print("-----------------------------------------")
    print("Initialization time: %f" % (initialization_time - start_time))
    print("Multiplication time: %f" %
          (multiplication_time - initialization_time))
    print("Persist C time     : %f" % (persist_c_time - multiplication_time))
    print("Total time: %f" % (persist_c_time - start_time))
    print("-----------------------------------------")
예제 #19
0
def files():
    """ Test FILE_IN """
    fin = "infile"
    content = "IN FILE CONTENT"
    with open(fin, 'w') as f:
        f.write(content)
    res = file_in(fin)
    res = compss_wait_on(res)
    assert res == content, "strings are not equal: {}, {}".format(res, content)

    # Check if file exists:
    file_checker(fin, "IN")
    # Remove object
    compss_delete_object(res)

    """ Test Multiple FILE_IN """
    fin_1 = "infile_1"
    fin_2 = "infile_2"
    fin_3 = "infile_3"
    fins = [fin_1, fin_2, fin_3]
    content = "IN FILE CONTENT"
    results = []
    for fin in fins:
        with open(fin, 'w') as f:
            f.write(content)
        results.append(file_in(fin))
    results = compss_wait_on(results)
    for res in results:
        assert res == content, "strings are not equal: {}, {}".format(res, content)

    # Check if file exists:
    multiple_file_checker(fins, "IN")
    # Remove objects
    compss_delete_object(*results)

    """ Test FILE_INOUT """
    finout = "inoutfile"
    content = "INOUT FILE CONTENT"
    with open(finout, 'w') as f:
        f.write(content)
    res = file_inout(finout)
    res = compss_wait_on(res)
    compss_wait_on_file(finout)
    with compss_open(finout, 'r') as finout_r:
        content_r = finout_r.read()
    content += "\n===> INOUT FILE ADDED CONTENT"
    assert res == content, "strings are not equal: {}, {}".format(res, content)
    assert content_r == content, "strings are not equal: {}, {}".format(content_r, content)

    # Check if file exists:
    file_checker(finout, "INOUT")
    # Remove object
    compss_delete_object(res)

    """ Test Multiple FILE_INOUT """
    finout_1 = "inoutfile_1"
    finout_2 = "inoutfile_2"
    finout_3 = "inoutfile_3"
    finouts = [finout_1, finout_2, finout_3]
    content = "INOUT FILE CONTENT"
    results = []
    for finout in finouts:
        with open(finout, 'w') as f:
            f.write(content)
        results.append(file_inout(finout))
    results = compss_wait_on(results)
    compss_wait_on_file(*finouts)
    i = 0
    content += "\n===> INOUT FILE ADDED CONTENT"
    for finout in finouts:
        with compss_open(finout, 'r') as finout_r:
            content_r = finout_r.read()
        assert results[i] == content, "strings are not equal: {}, {}".format(results[i], content)
        assert content_r == content, "strings are not equal: {}, {}".format(content_r, content)
        i += 1

    # Check if file exists:
    multiple_file_checker(finouts, "INOUT")
    # Remove objects
    compss_delete_object(*results)

    """ Test FILE_OUT """
    fout = "outfile"
    content = "OUT FILE CONTENT"
    res = file_out(fout, content)
    res = compss_wait_on(res)
    compss_wait_on_file(fout)
    with compss_open(fout, 'r') as fout_r:
        content_r = fout_r.read()
    # The final file is only stored after the execution.
    # During the execution, you have to use the compss_open, which will
    # provide the real file where the output file is.
    # fileInFolder = os.path.exists(fout)
    # assert fileInFolder is True, "FILE_OUT is not in the final location"
    assert res == content, "strings are not equal: {}, {}".format(res, content)
    assert content_r == content, "strings are not equal: {}, {}".format(content_r, content)

    # Check if file exists:
    file_checker(fout, "OUT")
    # Remove object
    compss_delete_object(res)

    """ Test Multiple FILE_OUT """
    fout_1 = "outfile_1"
    fout_2 = "outfile_2"
    fout_3 = "outfile_3"
    fouts = [fout_1, fout_2, fout_3]
    content = "OUT FILE CONTENT"
    results = []
    for fout in fouts:
        results.append(file_out(fout, content))
    results = compss_wait_on(results)
    compss_wait_on_file(*fouts)
    i = 0
    for fout in fouts:
        with compss_open(fout, 'r') as fout_r:
            content_r = fout_r.read()
        # The final file is only stored after the execution.
        # During the execution, you have to use the compss_open, which will
        # provide the real file where the output file is.
        # fileInFolder = os.path.exists(fout)
        # assert fileInFolder is True, "FILE_OUT is not in the final location"
        assert results[i] == content, "strings are not equal: {}, {}".format(results[i], content)
        assert content_r == content, "strings are not equal: {}, {}".format(content_r, content)
        i += 1

    # Check if file exists:
    file_checker(fout, "OUT")
    # Remove object
    compss_delete_object(*results)
예제 #20
0
def delete_object(*objs):  # Release
    for obj in objs:
        compss_delete_object(obj)
예제 #21
0
def _qr_economic(r):
    a_shape = (r.shape[0], r.shape[1])
    a_n_blocks = (r._n_blocks[0], r._n_blocks[1])
    b_size = r._reg_shape

    q, q_type = _gen_identity(r.shape[0], a_shape[1], b_size, r._n_blocks[0],
                              r._n_blocks[1])

    r_type = full((r._n_blocks[0], r._n_blocks[1]), (1, 1), OTHER)

    act_q_list = []
    sub_q_list = {}

    for i in range(a_n_blocks[1]):
        act_q_type, act_q, r_type_block, r_block = _qr(r._blocks[i][i],
                                                       r_type._blocks[i][i],
                                                       b_size,
                                                       t=True)
        r_type.replace_block(i, i, r_type_block)
        r.replace_block(i, i, r_block)
        act_q_list.append((act_q_type, act_q))

        for j in range(i + 1, a_n_blocks[1]):
            r_type_block, r_block = _dot(act_q, act_q_type, r._blocks[i][j],
                                         r_type._blocks[i][j], b_size)
            r_type.replace_block(i, j, r_type_block)
            r.replace_block(i, j, r_block)

        # Update values of the respective column
        for j in range(i + 1, r._n_blocks[0]):
            sub_q = [[np.array([0]), np.array([0])],
                     [np.array([0]), np.array([0])]]
            sub_q_type = [[_type_block(OTHER),
                           _type_block(OTHER)],
                          [_type_block(OTHER),
                           _type_block(OTHER)]]

            sub_q[0][0], sub_q[0][1], sub_q[1][0], sub_q[1][1], \
                r_type_block1, r_block1, \
                r_type_block2, r_block2 = _little_qr(
                    r._blocks[i][i], r_type._blocks[i][i],
                    r._blocks[j][i], r_type._blocks[j][i],
                    b_size, transpose=True
            )
            r_type.replace_block(i, i, r_type_block1)
            r.replace_block(i, i, r_block1)
            r_type.replace_block(j, i, r_type_block2)
            r.replace_block(j, i, r_block2)

            sub_q_list[(j, i)] = (sub_q_type, sub_q)

            # Update values of the row for the value updated in the column
            for k in range(i + 1, a_n_blocks[1]):
                [[r_type_block1], [r_type_block2]], \
                  [[r_block1], [r_block2]] = _multiply_blocked(
                    sub_q,
                    sub_q_type,
                    [[r._blocks[i][k]], [r._blocks[j][k]]],
                    [[r_type._blocks[i][k]], [r_type._blocks[j][k]]],
                    b_size
                )
                r_type.replace_block(i, k, r_type_block1)
                r.replace_block(i, k, r_block1)
                r_type.replace_block(j, k, r_type_block2)
                r.replace_block(j, k, r_block2)

    for i in reversed(range(len(act_q_list))):
        for j in reversed(range(i + 1, r._n_blocks[0])):
            for k in range(q._n_blocks[1]):
                [[q_type_block1], [q_type_block2]], \
                  [[q_block1], [q_block2]] = _multiply_blocked(
                    sub_q_list[(j, i)][1],
                    sub_q_list[(j, i)][0],
                    [[q._blocks[i][k]], [q._blocks[j][k]]],
                    [[q_type._blocks[i][k]], [q_type._blocks[j][k]]],
                    b_size,
                    transpose_a=True
                )
                q_type.replace_block(i, k, q_type_block1)
                q.replace_block(i, k, q_block1)
                q_type.replace_block(j, k, q_type_block2)
                q.replace_block(j, k, q_block2)

            compss_delete_object(sub_q_list[(j, i)][0][0])
            compss_delete_object(sub_q_list[(j, i)][0][1])
            compss_delete_object(sub_q_list[(j, i)][1][0])
            compss_delete_object(sub_q_list[(j, i)][1][1])
            del sub_q_list[(j, i)]

        for k in range(q._n_blocks[1]):
            q_type_block, q_block = _dot(act_q_list[i][1],
                                         act_q_list[i][0],
                                         q._blocks[i][k],
                                         q_type._blocks[i][k],
                                         b_size,
                                         transpose_a=True)
            q_type.replace_block(i, k, q_type_block)
            q.replace_block(i, k, q_block)

        compss_delete_object(act_q_list[i][0])
        compss_delete_object(act_q_list[i][1])

    # removing last rows of r to make it n x n instead of m x n
    remove_last_rows(r, r.shape[0] - r.shape[1])

    return q, r
예제 #22
0
def kron(a, b, block_size=None):
    """ Kronecker product of two ds-arrays.

    Parameters
    ----------
    a, b : ds-arrays
        Input ds-arrays.
    block_size : tuple of two ints, optional
        Block size of the resulting array. Defaults to the block size of `b`.

    Returns
    -------
    out : ds-array

    Raises
    ------
    NotImplementedError
        If a or b are sparse.
    """
    if a._sparse or b._sparse:
        raise NotImplementedError("Sparse ds-arrays not supported.")

    k_n_blocks = ((a.shape[0] * b._n_blocks[0]), a.shape[1] * b._n_blocks[1])
    k_blocks = Array._get_out_blocks(k_n_blocks)

    # compute the kronecker product by multipliying b by each element in a.
    # The resulting array keeps the block structure of b repeated many
    # times. This is why we need to rechunk it at the end.
    offseti = 0

    for i in range(a._n_blocks[0]):
        offsetj = 0

        for j in range(a._n_blocks[1]):
            bshape_a = a._get_block_shape(i, j)

            for k in range(b._n_blocks[0]):
                for q in range(b._n_blocks[1]):
                    out_blocks = Array._get_out_blocks(bshape_a)
                    _kron(a._blocks[i][j], b._blocks[k][q], out_blocks)

                    for m in range(bshape_a[0]):
                        for n in range(bshape_a[1]):
                            bi = (offseti + m) * b._n_blocks[0] + k
                            bj = (offsetj + n) * b._n_blocks[1] + q
                            k_blocks[bi][bj] = out_blocks[m][n]

            offsetj += bshape_a[1]
        offseti += bshape_a[0]

    shape = (a.shape[0] * b.shape[0], a.shape[1] * b.shape[1])

    if not block_size:
        bsize = b._reg_shape
    else:
        bsize = block_size

    # rechunk the array unless all blocks of b are of the same size and
    # block_size is None
    if (not block_size or block_size == b._reg_shape) and (
            b.shape[0] % b._reg_shape[0] == 0
            and b.shape[1] % b._reg_shape[1] == 0 and b._is_regular()):
        return Array(k_blocks, bsize, bsize, shape, False)
    else:
        out = Array._rechunk(k_blocks, shape, bsize, _kron_shape_f, b)

        for blocks in k_blocks:
            for block in blocks:
                compss_delete_object(block)

        return out