Esempio n. 1
0
    def create_operator(self, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK):
        """
        Build the shift operator for the walk.

        Parameters
        ----------
        coord_format : bool, optional
            Indicate if the operator must be returned in an apropriate format for multiplications.
            Default value is Utils.CoordinateDefault.
        storage_level : StorageLevel, optional
            The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK.

        Returns
        -------
        Operator

        Raises
        ------
        ValueError

        """
        if self._logger:
            self._logger.info("building shift operator...")

        initial_time = datetime.now()

        coin_size = 2
        size = self._size
        num_edges = self._num_edges
        shape = (coin_size * size, coin_size * size)

        if self._broken_links:
            broken_links = self._broken_links.generate(num_edges)

            generation_mode = Utils.get_conf(self._spark_context, 'dtqw.mesh.brokenLinks.generationMode', default='broadcast')

            if generation_mode == 'rdd':
                def __map(e):
                    """e = (edge, (edge, broken or not))"""
                    for i in range(coin_size):
                        l = (-1) ** i

                        # Finding the correspondent x coordinate of the vertex from the edge number
                        x = (e[1][0] - i - l) % size

                        if e[1][1]:
                            l = 0

                        yield (i + l) * size + (x + l) % size, (1 - i) * size + x, 1

                rdd = self._spark_context.range(
                    num_edges
                ).map(
                    lambda m: (m, m)
                ).leftOuterJoin(
                    broken_links
                ).flatMap(
                    __map
                )
            elif generation_mode == 'broadcast':
                def __map(e):
                    for i in range(coin_size):
                        l = (-1) ** i

                        # Finding the correspondent x coordinate of the vertex from the edge number
                        x = (e - i - l) % size

                        if e in broken_links.value:
                            l = 0

                        yield (i + l) * size + (x + l) % size, (1 - i) * size + x, 1

                rdd = self._spark_context.range(
                    num_edges
                ).flatMap(
                    __map
                )
            else:
                if self._logger:
                    self._logger.error("invalid broken links generation mode")
                raise ValueError("invalid broken links generation mode")
        else:
            def __map(x):
                for i in range(coin_size):
                    l = (-1) ** i
                    yield i * size + (x + l) % size, i * size + x, 1

            rdd = self._spark_context.range(
                size
            ).flatMap(
                __map
            )

        if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
            rdd = Utils.change_coordinate(
                rdd, Utils.CoordinateDefault, new_coord=coord_format
            )

            expected_elems = coin_size * size
            expected_size = Utils.get_size_of_type(int) * expected_elems
            num_partitions = Utils.get_num_partitions(self._spark_context, expected_elems)

            if num_partitions:
                rdd = rdd.partitionBy(
                    numPartitions=num_partitions
                )

        operator = Operator(rdd, shape, data_type=int, coord_format=coord_format).materialize(storage_level)

        if self._broken_links:
            broken_links.unpersist()

        self._profile(operator, initial_time)

        return operator
Esempio n. 2
0
    def create_walk_operator(self,
                             coord_format=Utils.CoordinateDefault,
                             storage_level=StorageLevel.MEMORY_AND_DISK):
        """
        Build the walk operator for the walk.

        When performing a multiparticle walk, this method builds a list with n operators,
        where n is the number of particles of the system. In this case, each operator is built by
        applying a tensor product between the evolution operator and n-1 identity matrices as follows:

            W1 = W1 (X) I2 (X) ... (X) In
            Wi = I1 (X) ... (X) Ii-1 (X) Wi (X) Ii+1 (X) ... In
            Wn = I1 (X) ... (X) In-1 (X) Wn

        Regardless the number of particles, the walk operators have their (i,j,value) coordinates converted to
        appropriate coordinates for multiplication, in this case, the CoordinateMultiplier.

        Parameters
        ----------
        coord_format : int, optional
            Indicate if the operator must be returned in an apropriate format for multiplications.
            Default value is CoordinateDefault.
        storage_level : StorageLevel, optional
            The desired storage level when materializing the RDD.

        """
        app_id = self._spark_context.applicationId

        if self._coin_operator is None:
            if self._logger:
                self._logger.info(
                    "no coin operator has been set. A new one will be built")
            self._coin_operator = self._coin.create_operator(
                self._mesh,
                coord_format=Utils.CoordinateMultiplicand,
                storage_level=storage_level)

            if self._profiler:
                if Utils.get_conf(self._spark_context,
                                  'dtqw.profiler.logExecutors',
                                  default='False') == 'True':
                    self._profiler.log_executors(app_id=app_id)

        if self._shift_operator is None:
            if self._logger:
                self._logger.info(
                    "no shift operator has been set. A new one will be built")
            self._shift_operator = self._mesh.create_operator(
                coord_format=Utils.CoordinateMultiplier,
                storage_level=storage_level)

            if self._profiler:
                if Utils.get_conf(self._spark_context,
                                  'dtqw.profiler.logExecutors',
                                  default='False') == 'True':
                    self._profiler.log_executors(app_id=app_id)

        if self._num_particles == 1:
            if self._logger:
                self._logger.info(
                    "with just one particle, the walk operator is the evolution operator"
                )

            t1 = datetime.now()

            evolution_operator = self._shift_operator.multiply(
                self._coin_operator, coord_format=Utils.CoordinateMultiplier)

            eo = evolution_operator.persist(storage_level)

            if Utils.get_conf(self._spark_context,
                              'dtqw.walkOperator.checkpoint',
                              default='False') == 'True':
                eo = eo.checkpoint()

            self._walk_operator = eo.materialize(storage_level)

            self._coin_operator.unpersist()
            self._shift_operator.unpersist()

            if self._profiler:
                self._profiler.profile_resources(app_id)
                self._profiler.profile_executors(app_id)

                info = self._profiler.profile_operator(
                    'walkOperator', self._walk_operator,
                    (datetime.now() - t1).total_seconds())

                if self._logger:
                    self._logger.info("walk operator was built in {}s".format(
                        info['buildingTime']))
                    self._logger.info(
                        "walk operator is consuming {} bytes in memory and {} bytes in disk"
                        .format(info['memoryUsed'], info['diskUsed']))

                if Utils.get_conf(self._spark_context,
                                  'dtqw.profiler.logExecutors',
                                  default='False') == 'True':
                    self._profiler.log_executors(app_id=app_id)
        else:
            if self._logger:
                self._logger.info("building walk operator...")

            t_tmp = datetime.now()

            evolution_operator = self._shift_operator.multiply(
                self._coin_operator,
                coord_format=Utils.CoordinateDefault).persist(
                    storage_level).materialize(storage_level)

            self._coin_operator.unpersist()
            self._shift_operator.unpersist()

            if Utils.get_conf(self._spark_context,
                              'dtqw.profiler.logExecutors',
                              default='False') == 'True':
                self._profiler.log_executors(app_id=app_id)

            shape = evolution_operator.shape
            shape_tmp = shape

            self._walk_operator = []

            kron_mode = Utils.get_conf(self._spark_context,
                                       'dtqw.walkOperator.kroneckerMode',
                                       default='broadcast')

            if kron_mode == 'broadcast':
                eo = Utils.broadcast(self._spark_context,
                                     evolution_operator.data.collect())
                evolution_operator.unpersist()

                for p in range(self._num_particles):
                    if self._logger:
                        self._logger.debug(
                            "building walk operator for particle {}...".format(
                                p + 1))

                    if p == 0:
                        # The first particle's walk operator consists in applying the tensor product between the
                        # evolution operator and the other particles' corresponding identity matrices
                        #
                        # W1 = U (X) I2 (X) ... (X) In
                        rdd_shape = (shape_tmp[0]**(self._num_particles - 1 -
                                                    p),
                                     shape_tmp[1]**(self._num_particles - 1 -
                                                    p))

                        def __map(m):
                            for i in eo.value:
                                yield i[0] * rdd_shape[0] + m, i[
                                    1] * rdd_shape[1] + m, i[2]

                        rdd = self._spark_context.range(
                            rdd_shape[0]).flatMap(__map)

                        shape = (rdd_shape[0] * shape_tmp[0],
                                 rdd_shape[1] * shape_tmp[1])
                    else:
                        t_tmp = datetime.now()

                        # For the other particles, each one has its operator built by applying the
                        # tensor product between its previous particles' identity matrices and its evolution operator.
                        #
                        # Wi = I1 (X) ... (X) Ii-1 (X) U ...
                        rdd_shape = (shape_tmp[0]**p, shape_tmp[1]**p)

                        def __map(m):
                            for i in eo.value:
                                yield m * shape_tmp[0] + i[
                                    0], m * shape_tmp[1] + i[1], i[2]

                        rdd = self._spark_context.range(
                            rdd_shape[0]).flatMap(__map)

                        shape = (rdd_shape[0] * shape_tmp[0],
                                 rdd_shape[1] * shape_tmp[1])

                        # Then, the tensor product is applied between the following particles' identity matrices.
                        #
                        # ... (X) Ii+1 (X) ... In
                        #
                        # If it is the last particle, the tensor product is applied between
                        # the pre-identity and evolution operators
                        #
                        # ... (X) Ii-1 (X) U
                        if p < self._num_particles - 1:
                            rdd_shape = (shape_tmp[0]**(self._num_particles -
                                                        1 - p),
                                         shape_tmp[1]**(self._num_particles -
                                                        1 - p))

                            def __map(m):
                                for i in range(rdd_shape[0]):
                                    yield m[0] * rdd_shape[0] + i, m[
                                        1] * rdd_shape[1] + i, m[2]

                            rdd = rdd.flatMap(__map)

                            shape = (rdd_shape[0] * shape[0],
                                     rdd_shape[1] * shape[1])

                    if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
                        rdd = Utils.change_coordinate(rdd,
                                                      Utils.CoordinateDefault,
                                                      new_coord=coord_format)

                        expected_elems = evolution_operator.num_nonzero_elements * evolution_operator.shape[
                            0]**(self._num_particles - 1)
                        expected_size = Utils.get_size_of_type(
                            complex) * expected_elems
                        num_partitions = Utils.get_num_partitions(
                            self._spark_context, expected_size)

                        if num_partitions:
                            rdd = rdd.partitionBy(numPartitions=num_partitions)

                        self._num_partitions = num_partitions

                    wo = Operator(
                        rdd, shape,
                        coord_format=coord_format).persist(storage_level)

                    if Utils.get_conf(self._spark_context,
                                      'dtqw.walkOperator.checkpoint',
                                      default='False') == 'True':
                        wo = wo.checkpoint()

                    self._walk_operator.append(wo.materialize(storage_level))

                    if self._profiler:
                        self._profiler.profile_resources(app_id)
                        self._profiler.profile_executors(app_id)

                        info = self._profiler.profile_operator(
                            'walkOperatorParticle{}'.format(p + 1),
                            self._walk_operator[-1],
                            (datetime.now() - t_tmp).total_seconds())

                        if self._logger:
                            self._logger.info(
                                "walk operator for particle {} was built in {}s"
                                .format(p + 1, info['buildingTime']))
                            self._logger.info(
                                "walk operator for particle {} is consuming {} bytes in memory and {} bytes in disk"
                                .format(p + 1, info['memoryUsed'],
                                        info['diskUsed']))

                        if Utils.get_conf(self._spark_context,
                                          'dtqw.profiler.logExecutors',
                                          default='False') == 'True':
                            self._profiler.log_executors(app_id=app_id)

                eo.unpersist()
            elif kron_mode == 'dump':
                path = Utils.get_temp_path(
                    Utils.get_conf(self._spark_context,
                                   'dtqw.storage.tempPath',
                                   default='./'))

                evolution_operator.dump(path)

                for p in range(self._num_particles):
                    if self._logger:
                        self._logger.debug(
                            "building walk operator for particle {}...".format(
                                p + 1))

                    shape = shape_tmp

                    if p == 0:
                        # The first particle's walk operator consists in applying the tensor product between the
                        # evolution operator and the other particles' corresponding identity matrices
                        #
                        # W1 = U (X) I2 (X) ... (X) In
                        rdd_shape = (shape_tmp[0]**(self._num_particles - 1 -
                                                    p),
                                     shape_tmp[1]**(self._num_particles - 1 -
                                                    p))

                        def __map(m):
                            with fileinput.input(files=glob(path +
                                                            '/part-*')) as f:
                                for line in f:
                                    l = line.split()
                                    yield int(l[0]) * rdd_shape[0] + m, int(
                                        l[1]) * rdd_shape[1] + m, complex(l[2])

                        rdd = self._spark_context.range(
                            rdd_shape[0]).flatMap(__map)

                        shape = (rdd_shape[0] * shape_tmp[0],
                                 rdd_shape[1] * shape_tmp[1])
                    else:
                        t_tmp = datetime.now()

                        # For the other particles, each one has its operator built by applying the
                        # tensor product between its previous particles' identity matrices and its evolution operator.
                        #
                        # Wi = I1 (X) ... (X) Ii-1 (X) U ...
                        rdd_shape = (shape_tmp[0]**p, shape_tmp[1]**p)

                        def __map(m):
                            with fileinput.input(files=glob(path +
                                                            '/part-*')) as f:
                                for line in f:
                                    l = line.split()
                                    yield m * shape_tmp[0] + int(
                                        l[0]), m * shape_tmp[1] + int(
                                            l[1]), complex(l[2])

                        rdd = self._spark_context.range(
                            rdd_shape[0]).flatMap(__map)

                        shape = (rdd_shape[0] * shape_tmp[0],
                                 rdd_shape[1] * shape_tmp[1])

                        # Then, the tensor product is applied between the following particles' identity matrices.
                        #
                        # ... (X) Ii+1 (X) ... In
                        #
                        # If it is the last particle, the tensor product is applied between
                        # the pre-identity and evolution operators
                        #
                        # ... (X) Ii-1 (X) U
                        if p < self._num_particles == 1:
                            rdd_shape = (shape_tmp[0]**(self._num_particles -
                                                        1 - p),
                                         shape_tmp[1]**(self._num_particles -
                                                        1 - p))

                            def __map(m):
                                for i in range(rdd_shape[0]):
                                    yield m[0] * rdd_shape[0] + i, m[
                                        1] * rdd_shape[1] + i, m[2]

                            rdd = rdd.flatMap(__map)

                            shape = (rdd_shape[0] * shape_tmp[0],
                                     rdd_shape[1] * shape_tmp[1])

                    if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
                        rdd = Utils.change_coordinate(rdd,
                                                      Utils.CoordinateDefault,
                                                      new_coord=coord_format)

                        expected_elems = evolution_operator.num_nonzero_elements * evolution_operator.shape[
                            0]**(self._num_particles - 1)
                        expected_size = Utils.get_size_of_type(
                            complex) * expected_elems
                        num_partitions = Utils.get_num_partitions(
                            self._spark_context, expected_size)

                        if num_partitions:
                            rdd = rdd.partitionBy(numPartitions=num_partitions)

                        self._num_partitions = num_partitions

                    wo = Operator(
                        rdd, shape,
                        coord_format=coord_format).persist(storage_level)

                    if Utils.get_conf(self._spark_context,
                                      'dtqw.walkOperator.checkpoint',
                                      default='False') == 'True':
                        wo = wo.checkpoint()

                    self._walk_operator.append(wo.materialize(storage_level))

                    if self._profiler:
                        self._profiler.profile_resources(app_id)
                        self._profiler.profile_executors(app_id)

                        info = self._profiler.profile_operator(
                            'walkOperatorParticle{}'.format(p + 1),
                            self._walk_operator[-1],
                            (datetime.now() - t_tmp).total_seconds())

                        if self._logger:
                            self._logger.info(
                                "walk operator for particle {} was built in {}s"
                                .format(p + 1, info['buildingTime']))
                            self._logger.info(
                                "walk operator for particle {} is consuming {} bytes in memory and {} bytes in disk"
                                .format(p + 1, info['memoryUsed'],
                                        info['diskUsed']))

                        if Utils.get_conf(self._spark_context,
                                          'dtqw.profiler.logExecutors',
                                          default='False') == 'True':
                            self._profiler.log_executors(app_id=app_id)

                evolution_operator.unpersist()
                Utils.remove_path(path)
            else:
                if self._logger:
                    self._logger.error("invalid kronecker mode")
                raise ValueError("invalid kronecker mode")
Esempio n. 3
0
    def create_operator(self,
                        coord_format=Utils.CoordinateDefault,
                        storage_level=StorageLevel.MEMORY_AND_DISK):
        """
        Build the shift operator for the walk.

        Parameters
        ----------
        coord_format : bool, optional
            Indicate if the operator must be returned in an apropriate format for multiplications.
            Default value is Utils.CoordinateDefault.
        storage_level : StorageLevel, optional
            The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK.

        Returns
        -------
        Operator

        Raises
        ------
        ValueError

        """
        if self._logger:
            self._logger.info("building shift operator...")

        initial_time = datetime.now()

        coin_size = 2
        size = self._size
        num_edges = self._num_edges
        size_xy = size[0] * size[1]
        shape = (coin_size * coin_size * size_xy,
                 coin_size * coin_size * size_xy)

        if self._broken_links:
            broken_links = self._broken_links.generate(num_edges)

            generation_mode = Utils.get_conf(
                self._spark_context,
                'dtqw.mesh.brokenLinks.generationMode',
                default='broadcast')

            if generation_mode == 'rdd':

                def __map(e):
                    """e = (edge, (edge, broken or not))"""
                    for i in range(coin_size):
                        l = (-1)**i

                        # Finding the correspondent x,y coordinates of the vertex from the edge number
                        if e[1][0] >= size[0] * size[1]:
                            j = i
                            x = int((e[1][0] - size[0] * size[1]) / size[0])
                            y = ((e[1][0] - size[0] * size[1]) % size[1] - i -
                                 l) % size[1]
                        else:
                            j = int(not i)
                            x = (e[1][0] % size[0] - i - l) % size[0]
                            y = int(e[1][0] / size[0])

                        delta = int(not (i ^ j))

                        if e[1][1]:
                            l = 0

                        m = ((i + l) * coin_size + (abs(j + l) % coin_size)) * size_xy + \
                            ((x + l * (1 - delta)) % size[0]) * size[1] + (y + l * delta) % size[1]
                        n = ((1 - i) * coin_size +
                             (1 - j)) * size_xy + x * size[1] + y

                        yield m, n, 1

                rdd = self._spark_context.range(num_edges).map(lambda m: (
                    m, m)).leftOuterJoin(broken_links).flatMap(__map)
            elif generation_mode == 'broadcast':

                def __map(e):
                    """e = (edge, (edge, broken or not))"""
                    for i in range(coin_size):
                        l = (-1)**i

                        # Finding the correspondent x,y coordinates of the vertex from the edge number
                        if e >= size[0] * size[1]:
                            j = i
                            delta = int(not (i ^ j))
                            x = int((e - size[0] * size[1]) / size[0])
                            y = ((e - size[0] * size[1]) % size[1] - i -
                                 l) % size[1]
                        else:
                            j = int(not i)
                            delta = int(not (i ^ j))
                            x = (e % size[0] - i - l) % size[0]
                            y = int(e / size[0])

                        if e in broadcast.value:
                            bl = 0
                        else:
                            bl = l

                        m = ((i + bl) * coin_size + (abs(j + bl) % coin_size)) * size_xy + \
                            ((x + bl * (1 - delta)) % size[0]) * size[1] + (y + bl * delta) % size[1]
                        n = ((1 - i) * coin_size +
                             (1 - j)) * size_xy + x * size[1] + y

                        yield m, n, 1

                rdd = self._spark_context.range(num_edges).flatMap(__map)
            else:
                if self._logger:
                    self._logger.error("invalid broken links generation mode")
                raise ValueError("invalid broken links generation mode")
        else:

            def __map(xy):
                x = xy % size[0]
                y = int(xy / size[0])

                for i in range(coin_size):
                    l = (-1)**i
                    for j in range(coin_size):
                        delta = int(not (i ^ j))

                        m = (i * coin_size + j) * size_xy + \
                            ((x + l * (1 - delta)) % size[0]) * size[1] + (y + l * delta) % size[1]
                        n = (i * coin_size + j) * size_xy + x * size[1] + y

                        yield m, n, 1

            rdd = self._spark_context.range(size_xy).flatMap(__map)

        if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
            rdd = Utils.change_coordinate(rdd,
                                          Utils.CoordinateDefault,
                                          new_coord=coord_format)

            expected_elems = coin_size**2 * size_xy
            expected_size = Utils.get_size_of_type(int) * expected_elems
            num_partitions = Utils.get_num_partitions(self._spark_context,
                                                      expected_elems)

            if num_partitions:
                rdd = rdd.partitionBy(numPartitions=num_partitions)

        operator = Operator(
            rdd, shape, data_type=int,
            coord_format=coord_format).materialize(storage_level)

        if self._broken_links:
            broken_links.unpersist()

        self._profile(operator, initial_time)

        return operator
Esempio n. 4
0
    def create_interaction_operator(
            self,
            coord_format=Utils.CoordinateDefault,
            storage_level=StorageLevel.MEMORY_AND_DISK):
        """
        Build the particles' interaction operator for the walk.

        Parameters
        ----------
        coord_format : int, optional
            Indicate if the operator must be returned in an apropriate format for multiplications.
            Default value is CoordinateDefault.
        storage_level : StorageLevel, optional
            The desired storage level when materializing the RDD.

        Raises
        ------
        ValueError

        """
        if not self._phase:
            if self._logger:
                self._logger.error(
                    'No collision phase or a zeroed collision phase was informed'
                )
            raise ValueError(
                'No collision phase or a zeroed collision phase was informed')

        if self._logger:
            self._logger.info("building interaction operator...")

        t1 = datetime.now()

        phase = cmath.exp(self._phase * (0.0 + 1.0j))
        num_particles = self._num_particles

        coin_size = 2

        if self._mesh.is_1d():
            size = self._mesh.size
            cs_size = coin_size * size

            rdd_range = cs_size**num_particles
            shape = (rdd_range, rdd_range)

            def __map(m):
                x = []

                for p in range(num_particles):
                    x.append(
                        int(m / (cs_size**(num_particles - 1 - p))) % size)

                for p1 in range(num_particles):
                    for p2 in range(num_particles):
                        if p1 != p2 and x[p1] == x[p2]:
                            return m, m, phase

                return m, m, 1
        elif self._mesh.is_2d():
            size_x = self._mesh.size[0]
            size_y = self._mesh.size[1]
            cs_size_x = coin_size * size_x
            cs_size_y = coin_size * size_y
            cs_size_xy = cs_size_x * cs_size_y

            rdd_range = cs_size_xy**num_particles
            shape = (rdd_range, rdd_range)

            def __map(m):
                xy = []

                for p in range(num_particles):
                    xy.append(
                        (int(m /
                             (cs_size_xy**(num_particles - 1 - p) * size_y)) %
                         size_x,
                         int(m /
                             (cs_size_xy**(num_particles - 1 - p))) % size_y))

                for p1 in range(num_particles):
                    for p2 in range(num_particles):
                        if p1 != p2 and xy[p1][0] == xy[p2][0] and xy[p1][
                                1] == xy[p2][1]:
                            return m, m, phase

                return m, m, 1
        else:
            if self._logger:
                self._logger.error("mesh dimension not implemented")
            raise NotImplementedError("mesh dimension not implemented")

        rdd = self._spark_context.range(rdd_range).map(__map)

        if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
            rdd = Utils.change_coordinate(rdd,
                                          Utils.CoordinateDefault,
                                          new_coord=coord_format)

            # The walk operators must be guaranteed to be previously built
            # in order to the number of partitions be already known.
            # Using the same number of partitions is important to avoid shuffle
            # when multiplying the state by the operators.
            num_partitions = self._num_partitions

            if not num_partitions:
                expected_elems = rdd_range
                expected_size = Utils.get_size_of_type(
                    complex) * expected_elems
                num_partitions = Utils.get_num_partitions(
                    self._spark_context, expected_size)

            if num_partitions:
                rdd = rdd.partitionBy(numPartitions=num_partitions)

        io = Operator(rdd, shape,
                      coord_format=coord_format).persist(storage_level)

        if Utils.get_conf(self._spark_context,
                          'dtqw.interactionOperator.checkpoint',
                          default='False') == 'True':
            io = io.checkpoint()

        self._interaction_operator = io.materialize(storage_level)

        app_id = self._spark_context.applicationId

        if self._profiler:
            self._profiler.profile_resources(app_id)
            self._profiler.profile_executors(app_id)

            info = self._profiler.profile_operator(
                'interactionOperator', self._interaction_operator,
                (datetime.now() - t1).total_seconds())

            if self._logger:
                self._logger.info(
                    "interaction operator was built in {}s".format(
                        info['buildingTime']))
                self._logger.info(
                    "interaction operator is consuming {} bytes in memory and {} bytes in disk"
                    .format(info['memoryUsed'], info['diskUsed']))

            if Utils.get_conf(self._spark_context,
                              'dtqw.profiler.logExecutors',
                              default='False') == 'True':
                self._profiler.log_executors(app_id=app_id)
Esempio n. 5
0
    def create_operator(self,
                        mesh,
                        coord_format=Utils.CoordinateDefault,
                        storage_level=StorageLevel.MEMORY_AND_DISK):
        """
        Build the coin operator for the walk.

        Parameters
        ----------
        mesh : Mesh
            A Mesh instance.
        coord_format : bool, optional
            Indicate if the operator must be returned in an apropriate format for multiplications.
            Default value is Utils.CoordinateDefault.
        storage_level : StorageLevel, optional
            The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK.

        Returns
        -------
        Operator

        """
        if self._logger:
            self._logger.info("building coin operator...")

        initial_time = datetime.now()

        if not is_mesh(mesh):
            if self._logger:
                self._logger.error("expected mesh, not {}".format(type(mesh)))
            raise TypeError("expected mesh, not {}".format(type(mesh)))

        if not mesh.is_2d():
            if self._logger:
                self._logger.error(
                    "non correspondent coin and mesh dimensions")
            raise ValueError("non correspondent coin and mesh dimensions")

        mesh_size = mesh.size[0] * mesh.size[1]
        shape = (self._data.shape[0] * mesh_size,
                 self._data.shape[1] * mesh_size)
        data = Utils.broadcast(self._spark_context, self._data)

        # The coin operator is built by applying a tensor product between the chosen coin and
        # an identity matrix with the dimensions of the chosen mesh.
        def __map(xy):
            for i in range(data.value.shape[0]):
                for j in range(data.value.shape[1]):
                    yield (i * mesh_size + xy, j * mesh_size + xy,
                           data.value[i][j])

        rdd = self._spark_context.range(mesh_size).flatMap(__map)

        if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand:
            rdd = Utils.change_coordinate(rdd,
                                          Utils.CoordinateDefault,
                                          new_coord=coord_format)

            expected_elems = len(self._data) * mesh_size
            expected_size = Utils.get_size_of_type(complex) * expected_elems
            num_partitions = Utils.get_num_partitions(self._spark_context,
                                                      expected_size)

            if num_partitions:
                rdd = rdd.partitionBy(numPartitions=num_partitions)

        operator = Operator(
            rdd, shape, coord_format=coord_format).materialize(storage_level)

        self._profile(operator, initial_time)

        return operator