def kron(self, other, coord_format=Utils.CoordinateDefault): """ Perform a tensor (Kronecker) product with another operator. Parameters ---------- other : :obj:Operator The other operator. coord_format : int, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is Utils.CoordinateDefault. Returns ------- :obj:Operator The resulting operator. """ if not is_operator(other): if self._logger: self._logger.error( 'Operator instance expected, not "{}"'.format(type(other))) raise TypeError('Operator instance expected, not "{}"'.format( type(other))) other_shape = other.shape new_shape = (self._shape[0] * other_shape[0], self._shape[1] * other_shape[1]) data_type = Utils.get_precendent_type(self._data_type, other.data_type) expected_elems = self._num_nonzero_elements * other.num_nonzero_elements expected_size = Utils.get_size_of_type(data_type) * expected_elems num_partitions = Utils.get_num_partitions(self.data.context, expected_size) rdd = self.data.map(lambda m: (0, m)).join( other.data.map(lambda m: (0, m)), numPartitions=num_partitions).map(lambda m: (m[1][0], m[1][1])) if coord_format == Utils.CoordinateMultiplier: rdd = rdd.map(lambda m: (m[0][1] * other_shape[1] + m[1][1], (m[0][ 0] * other_shape[0] + m[1][0], m[0][2] * m[1][2])) ).partitionBy(numPartitions=num_partitions) elif coord_format == Utils.CoordinateMultiplicand: rdd = rdd.map(lambda m: (m[0][0] * other_shape[0] + m[1][0], (m[0][ 1] * other_shape[1] + m[1][1], m[0][2] * m[1][2])) ).partitionBy(numPartitions=num_partitions) else: # Utils.CoordinateDefault rdd = rdd.map(lambda m: (m[0][0] * other_shape[0] + m[1][0], m[0][ 1] * other_shape[1] + m[1][1], m[0][2] * m[1][2])) return Operator(rdd, new_shape, coord_format=coord_format)
def __init__(self, rdd, shape, data_type=complex): """ Build a top-level object for some mathematical elements. Parameters ---------- rdd : RDD The base RDD of this object. shape : tuple The shape of this matrix object. Must be a 2-dimensional tuple. data_type : type, optional The Python type of all values in this object. Default is complex. """ if not isinstance(rdd, RDD): # self.logger.error("Invalid argument to instantiate an Operator object") raise TypeError( "invalid argument to instantiate an Operator object") if shape is not None: if not Utils.is_shape(shape): # self.logger.error("Invalid shape") raise ValueError("invalid shape") self._spark_context = rdd.context self._shape = shape self._num_elements = self._shape[0] * self._shape[1] self._num_nonzero_elements = 0 self._data_type = data_type self.data = rdd self._logger = None self._profiler = None
def kron(self, other): """ Perform a tensor (Kronecker) product with another system state. Parameters ---------- other : :obj:State The other system state. Returns ------- :obj:State The resulting state. """ if not is_state(other): if self._logger: self._logger.error('State instance expected, not "{}"'.format( type(other))) raise TypeError('State instance expected, not "{}"'.format( type(other))) other_shape = other.shape new_shape = (self._shape[0] * other_shape[0], 1) expected_elems = new_shape[0] expected_size = Utils.get_size_of_type(complex) * expected_elems num_partitions = Utils.get_num_partitions(self.data.context, expected_size) rdd = self.data.map(lambda m: (0, m)).join( other.data.map(lambda m: (0, m)), numPartitions=num_partitions).map(lambda m: (m[1][0], m[1][1])) rdd = rdd.map(lambda m: (m[0][0] * other_shape[0] + m[1][0], m[0][1] * m[1][1])) return State(rdd, new_shape, self._mesh, self._num_particles)
def generate(self, num_edges): """ Yield broken links for the mesh based on its probability to have a broken link/edge. Returns ------- RDD or Broadcast The RDD or Broadcast dict which keys are the numbered edges that are broken. Raises ------ ValueError """ probability = self._probability seed = Utils.get_conf(self._spark_context, 'dtqw.mesh.randomBrokenLinks.seed', default=None) def __map(e): random.seed(seed) return e, random.random() < probability rdd = self._spark_context.range(num_edges).map(__map).filter( lambda m: m[1] is True) generation_mode = Utils.get_conf( self._spark_context, 'dtqw.mesh.brokenLinks.generationMode', default='broadcast') if generation_mode == 'rdd': return rdd elif generation_mode == 'broadcast': return Utils.broadcast(self._spark_context, rdd.collectAsMap()) else: raise ValueError("invalid broken links generation mode")
def is_unitary(self): """ Check if this operator is unitary by calculating its norm. Returns ------- bool True if the norm of this operator is 1.0, False otherwise. """ round_precision = int( Utils.get_conf(self._spark_context, 'dtqw.math.roundPrecision', default='10')) return round(self.norm(), round_precision) == 1.0
def create_operator(self, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK): """ Build the shift operator for the walk. Parameters ---------- coord_format : bool, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is Utils.CoordinateDefault. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK. Returns ------- Operator Raises ------ ValueError """ if self._logger: self._logger.info("building shift operator...") initial_time = datetime.now() coin_size = 2 size = self._size num_edges = self._num_edges shape = (coin_size * size, coin_size * size) if self._broken_links: broken_links = self._broken_links.generate(num_edges) generation_mode = Utils.get_conf(self._spark_context, 'dtqw.mesh.brokenLinks.generationMode', default='broadcast') if generation_mode == 'rdd': def __map(e): """e = (edge, (edge, broken or not))""" for i in range(coin_size): l = (-1) ** i # Finding the correspondent x coordinate of the vertex from the edge number x = (e[1][0] - i - l) % size if e[1][1]: l = 0 yield (i + l) * size + (x + l) % size, (1 - i) * size + x, 1 rdd = self._spark_context.range( num_edges ).map( lambda m: (m, m) ).leftOuterJoin( broken_links ).flatMap( __map ) elif generation_mode == 'broadcast': def __map(e): for i in range(coin_size): l = (-1) ** i # Finding the correspondent x coordinate of the vertex from the edge number x = (e - i - l) % size if e in broken_links.value: l = 0 yield (i + l) * size + (x + l) % size, (1 - i) * size + x, 1 rdd = self._spark_context.range( num_edges ).flatMap( __map ) else: if self._logger: self._logger.error("invalid broken links generation mode") raise ValueError("invalid broken links generation mode") else: def __map(x): for i in range(coin_size): l = (-1) ** i yield i * size + (x + l) % size, i * size + x, 1 rdd = self._spark_context.range( size ).flatMap( __map ) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate( rdd, Utils.CoordinateDefault, new_coord=coord_format ) expected_elems = coin_size * size expected_size = Utils.get_size_of_type(int) * expected_elems num_partitions = Utils.get_num_partitions(self._spark_context, expected_elems) if num_partitions: rdd = rdd.partitionBy( numPartitions=num_partitions ) operator = Operator(rdd, shape, data_type=int, coord_format=coord_format).materialize(storage_level) if self._broken_links: broken_links.unpersist() self._profile(operator, initial_time) return operator
def walk(self, steps, initial_state, storage_level=StorageLevel.MEMORY_AND_DISK): """ Perform a walk Parameters ---------- steps : int initial_state : State The initial state of the system. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. Returns ------- State The final state of the system after performing the walk. Raises ------ ValueError """ if not self._mesh.check_steps(steps): if self._logger: self._logger.error( "invalid number of steps for the chosen mesh") raise ValueError("invalid number of steps for the chosen mesh") if self._logger: self._logger.info("steps: {}".format(steps)) self._logger.info("space size: {}".format(self._mesh.size)) self._logger.info("number of particles: {}".format( self._num_particles)) if self._num_particles > 1: if self._phase is None: self._logger.info("no collision phase has been defined") elif self._phase == 0.0: self._logger.info( "a zeroed collision phase was defined. No interaction operator will be built" ) else: self._logger.info("collision phase: {}".format( self._phase)) if self._mesh.broken_links is None: self._logger.info("no broken links has been defined") else: self._logger.info("broken links probability: {}".format( self._mesh.broken_links.probability)) result = initial_state.materialize(storage_level) if not result.is_unitary(): if self._logger: self._logger.error("the initial state is not unitary") raise ValueError("the initial state is not unitary") app_id = self._spark_context.applicationId if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_state('initialState', result, 0.0) if self._logger: self._logger.info( "initial state is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) if self._logger: self._profiler.log_rdd(app_id=app_id) if steps > 0: # Building walk operators once if not simulating decoherence with broken links # When there is a broken links probability, the walk operators will be built in each step of the walk if not self._mesh.broken_links: if self._walk_operator is None: if self._logger: self._logger.info( "no walk operator has been set. A new one will be built" ) self.create_walk_operator( coord_format=Utils.CoordinateMultiplier, storage_level=storage_level) if self._num_particles > 1 and self._phase and self._interaction_operator is None: if self._logger: self._logger.info( "no interaction operator has been set. A new one will be built" ) self.create_interaction_operator( coord_format=Utils.CoordinateMultiplier, storage_level=storage_level) t1 = datetime.now() if self._logger: self._logger.info("starting the walk...") checkpoint_states = Utils.get_conf(self._spark_context, 'dtqw.walk.checkpointStates', default='False') if checkpoint_states == 'True': checkpoint_frequency = int( Utils.get_conf(self._spark_context, 'dtqw.walk.checkpointFrequency', default=math.sqrt(steps))) for i in range(1, steps + 1, 1): if self._mesh.broken_links: self.destroy_shift_operator() self.destroy_walk_operator() self.create_walk_operator( coord_format=Utils.CoordinateMultiplier, storage_level=storage_level) t_tmp = datetime.now() result_tmp = result mul_mode = Utils.get_conf( self._spark_context, 'dtqw.walkOperator.multiplicationMode', default='join') if self._num_particles == 1: result_tmp = self._walk_operator.multiply(result_tmp) else: if self._interaction_operator is not None: result_tmp = self._interaction_operator.multiply( result_tmp) for wo in reversed(self._walk_operator): result_tmp = wo.multiply(result_tmp) # In the last step, the resulting state is not materialized # because it will be repartitioned to a more appropriate # number of partitions and have a partitioner defined. if i == steps: expected_elems = result_tmp.shape[0] expected_size = Utils.get_size_of_type( result_tmp.data_type) * expected_elems num_partitions = Utils.get_num_partitions( self._spark_context, expected_elems) if num_partitions: result_tmp.define_partitioner(num_partitions) if checkpoint_states == 'True': if i % checkpoint_frequency == 0: result_tmp.persist(storage_level).checkpoint() result_tmp.materialize(storage_level) result.unpersist() result = result_tmp if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_state( 'systemState{}'.format(i), result, (datetime.now() - t_tmp).total_seconds()) if self._logger: self._logger.info("step was done in {}s".format( info['buildingTime'])) self._logger.info( "system state of step {} is consuming {} bytes in memory and {} bytes in disk" .format(i, info['memoryUsed'], info['diskUsed'])) if self._logger: self._profiler.log_rdd(app_id=app_id) if self._logger: self._logger.info("walk was done in {}s".format( (datetime.now() - t1).total_seconds())) t1 = datetime.now() if self._logger: self._logger.debug("checking if the final state is unitary...") if not result.is_unitary(): if self._logger: self._logger.error("the final state is not unitary") raise ValueError("the final state is not unitary") if self._logger: self._logger.debug("unitarity check was done in {}s".format( (datetime.now() - t1).total_seconds())) if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_state('finalState', result, 0.0) if self._logger: self._logger.info( "final state is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) return result
def create_walk_operator(self, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK): """ Build the walk operator for the walk. When performing a multiparticle walk, this method builds a list with n operators, where n is the number of particles of the system. In this case, each operator is built by applying a tensor product between the evolution operator and n-1 identity matrices as follows: W1 = W1 (X) I2 (X) ... (X) In Wi = I1 (X) ... (X) Ii-1 (X) Wi (X) Ii+1 (X) ... In Wn = I1 (X) ... (X) In-1 (X) Wn Regardless the number of particles, the walk operators have their (i,j,value) coordinates converted to appropriate coordinates for multiplication, in this case, the CoordinateMultiplier. Parameters ---------- coord_format : int, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is CoordinateDefault. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. """ app_id = self._spark_context.applicationId if self._coin_operator is None: if self._logger: self._logger.info( "no coin operator has been set. A new one will be built") self._coin_operator = self._coin.create_operator( self._mesh, coord_format=Utils.CoordinateMultiplicand, storage_level=storage_level) if self._profiler: if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) if self._shift_operator is None: if self._logger: self._logger.info( "no shift operator has been set. A new one will be built") self._shift_operator = self._mesh.create_operator( coord_format=Utils.CoordinateMultiplier, storage_level=storage_level) if self._profiler: if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) if self._num_particles == 1: if self._logger: self._logger.info( "with just one particle, the walk operator is the evolution operator" ) t1 = datetime.now() evolution_operator = self._shift_operator.multiply( self._coin_operator, coord_format=Utils.CoordinateMultiplier) eo = evolution_operator.persist(storage_level) if Utils.get_conf(self._spark_context, 'dtqw.walkOperator.checkpoint', default='False') == 'True': eo = eo.checkpoint() self._walk_operator = eo.materialize(storage_level) self._coin_operator.unpersist() self._shift_operator.unpersist() if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_operator( 'walkOperator', self._walk_operator, (datetime.now() - t1).total_seconds()) if self._logger: self._logger.info("walk operator was built in {}s".format( info['buildingTime'])) self._logger.info( "walk operator is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) else: if self._logger: self._logger.info("building walk operator...") t_tmp = datetime.now() evolution_operator = self._shift_operator.multiply( self._coin_operator, coord_format=Utils.CoordinateDefault).persist( storage_level).materialize(storage_level) self._coin_operator.unpersist() self._shift_operator.unpersist() if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) shape = evolution_operator.shape shape_tmp = shape self._walk_operator = [] kron_mode = Utils.get_conf(self._spark_context, 'dtqw.walkOperator.kroneckerMode', default='broadcast') if kron_mode == 'broadcast': eo = Utils.broadcast(self._spark_context, evolution_operator.data.collect()) evolution_operator.unpersist() for p in range(self._num_particles): if self._logger: self._logger.debug( "building walk operator for particle {}...".format( p + 1)) if p == 0: # The first particle's walk operator consists in applying the tensor product between the # evolution operator and the other particles' corresponding identity matrices # # W1 = U (X) I2 (X) ... (X) In rdd_shape = (shape_tmp[0]**(self._num_particles - 1 - p), shape_tmp[1]**(self._num_particles - 1 - p)) def __map(m): for i in eo.value: yield i[0] * rdd_shape[0] + m, i[ 1] * rdd_shape[1] + m, i[2] rdd = self._spark_context.range( rdd_shape[0]).flatMap(__map) shape = (rdd_shape[0] * shape_tmp[0], rdd_shape[1] * shape_tmp[1]) else: t_tmp = datetime.now() # For the other particles, each one has its operator built by applying the # tensor product between its previous particles' identity matrices and its evolution operator. # # Wi = I1 (X) ... (X) Ii-1 (X) U ... rdd_shape = (shape_tmp[0]**p, shape_tmp[1]**p) def __map(m): for i in eo.value: yield m * shape_tmp[0] + i[ 0], m * shape_tmp[1] + i[1], i[2] rdd = self._spark_context.range( rdd_shape[0]).flatMap(__map) shape = (rdd_shape[0] * shape_tmp[0], rdd_shape[1] * shape_tmp[1]) # Then, the tensor product is applied between the following particles' identity matrices. # # ... (X) Ii+1 (X) ... In # # If it is the last particle, the tensor product is applied between # the pre-identity and evolution operators # # ... (X) Ii-1 (X) U if p < self._num_particles - 1: rdd_shape = (shape_tmp[0]**(self._num_particles - 1 - p), shape_tmp[1]**(self._num_particles - 1 - p)) def __map(m): for i in range(rdd_shape[0]): yield m[0] * rdd_shape[0] + i, m[ 1] * rdd_shape[1] + i, m[2] rdd = rdd.flatMap(__map) shape = (rdd_shape[0] * shape[0], rdd_shape[1] * shape[1]) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate(rdd, Utils.CoordinateDefault, new_coord=coord_format) expected_elems = evolution_operator.num_nonzero_elements * evolution_operator.shape[ 0]**(self._num_particles - 1) expected_size = Utils.get_size_of_type( complex) * expected_elems num_partitions = Utils.get_num_partitions( self._spark_context, expected_size) if num_partitions: rdd = rdd.partitionBy(numPartitions=num_partitions) self._num_partitions = num_partitions wo = Operator( rdd, shape, coord_format=coord_format).persist(storage_level) if Utils.get_conf(self._spark_context, 'dtqw.walkOperator.checkpoint', default='False') == 'True': wo = wo.checkpoint() self._walk_operator.append(wo.materialize(storage_level)) if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_operator( 'walkOperatorParticle{}'.format(p + 1), self._walk_operator[-1], (datetime.now() - t_tmp).total_seconds()) if self._logger: self._logger.info( "walk operator for particle {} was built in {}s" .format(p + 1, info['buildingTime'])) self._logger.info( "walk operator for particle {} is consuming {} bytes in memory and {} bytes in disk" .format(p + 1, info['memoryUsed'], info['diskUsed'])) if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) eo.unpersist() elif kron_mode == 'dump': path = Utils.get_temp_path( Utils.get_conf(self._spark_context, 'dtqw.storage.tempPath', default='./')) evolution_operator.dump(path) for p in range(self._num_particles): if self._logger: self._logger.debug( "building walk operator for particle {}...".format( p + 1)) shape = shape_tmp if p == 0: # The first particle's walk operator consists in applying the tensor product between the # evolution operator and the other particles' corresponding identity matrices # # W1 = U (X) I2 (X) ... (X) In rdd_shape = (shape_tmp[0]**(self._num_particles - 1 - p), shape_tmp[1]**(self._num_particles - 1 - p)) def __map(m): with fileinput.input(files=glob(path + '/part-*')) as f: for line in f: l = line.split() yield int(l[0]) * rdd_shape[0] + m, int( l[1]) * rdd_shape[1] + m, complex(l[2]) rdd = self._spark_context.range( rdd_shape[0]).flatMap(__map) shape = (rdd_shape[0] * shape_tmp[0], rdd_shape[1] * shape_tmp[1]) else: t_tmp = datetime.now() # For the other particles, each one has its operator built by applying the # tensor product between its previous particles' identity matrices and its evolution operator. # # Wi = I1 (X) ... (X) Ii-1 (X) U ... rdd_shape = (shape_tmp[0]**p, shape_tmp[1]**p) def __map(m): with fileinput.input(files=glob(path + '/part-*')) as f: for line in f: l = line.split() yield m * shape_tmp[0] + int( l[0]), m * shape_tmp[1] + int( l[1]), complex(l[2]) rdd = self._spark_context.range( rdd_shape[0]).flatMap(__map) shape = (rdd_shape[0] * shape_tmp[0], rdd_shape[1] * shape_tmp[1]) # Then, the tensor product is applied between the following particles' identity matrices. # # ... (X) Ii+1 (X) ... In # # If it is the last particle, the tensor product is applied between # the pre-identity and evolution operators # # ... (X) Ii-1 (X) U if p < self._num_particles == 1: rdd_shape = (shape_tmp[0]**(self._num_particles - 1 - p), shape_tmp[1]**(self._num_particles - 1 - p)) def __map(m): for i in range(rdd_shape[0]): yield m[0] * rdd_shape[0] + i, m[ 1] * rdd_shape[1] + i, m[2] rdd = rdd.flatMap(__map) shape = (rdd_shape[0] * shape_tmp[0], rdd_shape[1] * shape_tmp[1]) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate(rdd, Utils.CoordinateDefault, new_coord=coord_format) expected_elems = evolution_operator.num_nonzero_elements * evolution_operator.shape[ 0]**(self._num_particles - 1) expected_size = Utils.get_size_of_type( complex) * expected_elems num_partitions = Utils.get_num_partitions( self._spark_context, expected_size) if num_partitions: rdd = rdd.partitionBy(numPartitions=num_partitions) self._num_partitions = num_partitions wo = Operator( rdd, shape, coord_format=coord_format).persist(storage_level) if Utils.get_conf(self._spark_context, 'dtqw.walkOperator.checkpoint', default='False') == 'True': wo = wo.checkpoint() self._walk_operator.append(wo.materialize(storage_level)) if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_operator( 'walkOperatorParticle{}'.format(p + 1), self._walk_operator[-1], (datetime.now() - t_tmp).total_seconds()) if self._logger: self._logger.info( "walk operator for particle {} was built in {}s" .format(p + 1, info['buildingTime'])) self._logger.info( "walk operator for particle {} is consuming {} bytes in memory and {} bytes in disk" .format(p + 1, info['memoryUsed'], info['diskUsed'])) if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id) evolution_operator.unpersist() Utils.remove_path(path) else: if self._logger: self._logger.error("invalid kronecker mode") raise ValueError("invalid kronecker mode")
def create_interaction_operator( self, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK): """ Build the particles' interaction operator for the walk. Parameters ---------- coord_format : int, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is CoordinateDefault. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. Raises ------ ValueError """ if not self._phase: if self._logger: self._logger.error( 'No collision phase or a zeroed collision phase was informed' ) raise ValueError( 'No collision phase or a zeroed collision phase was informed') if self._logger: self._logger.info("building interaction operator...") t1 = datetime.now() phase = cmath.exp(self._phase * (0.0 + 1.0j)) num_particles = self._num_particles coin_size = 2 if self._mesh.is_1d(): size = self._mesh.size cs_size = coin_size * size rdd_range = cs_size**num_particles shape = (rdd_range, rdd_range) def __map(m): x = [] for p in range(num_particles): x.append( int(m / (cs_size**(num_particles - 1 - p))) % size) for p1 in range(num_particles): for p2 in range(num_particles): if p1 != p2 and x[p1] == x[p2]: return m, m, phase return m, m, 1 elif self._mesh.is_2d(): size_x = self._mesh.size[0] size_y = self._mesh.size[1] cs_size_x = coin_size * size_x cs_size_y = coin_size * size_y cs_size_xy = cs_size_x * cs_size_y rdd_range = cs_size_xy**num_particles shape = (rdd_range, rdd_range) def __map(m): xy = [] for p in range(num_particles): xy.append( (int(m / (cs_size_xy**(num_particles - 1 - p) * size_y)) % size_x, int(m / (cs_size_xy**(num_particles - 1 - p))) % size_y)) for p1 in range(num_particles): for p2 in range(num_particles): if p1 != p2 and xy[p1][0] == xy[p2][0] and xy[p1][ 1] == xy[p2][1]: return m, m, phase return m, m, 1 else: if self._logger: self._logger.error("mesh dimension not implemented") raise NotImplementedError("mesh dimension not implemented") rdd = self._spark_context.range(rdd_range).map(__map) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate(rdd, Utils.CoordinateDefault, new_coord=coord_format) # The walk operators must be guaranteed to be previously built # in order to the number of partitions be already known. # Using the same number of partitions is important to avoid shuffle # when multiplying the state by the operators. num_partitions = self._num_partitions if not num_partitions: expected_elems = rdd_range expected_size = Utils.get_size_of_type( complex) * expected_elems num_partitions = Utils.get_num_partitions( self._spark_context, expected_size) if num_partitions: rdd = rdd.partitionBy(numPartitions=num_partitions) io = Operator(rdd, shape, coord_format=coord_format).persist(storage_level) if Utils.get_conf(self._spark_context, 'dtqw.interactionOperator.checkpoint', default='False') == 'True': io = io.checkpoint() self._interaction_operator = io.materialize(storage_level) app_id = self._spark_context.applicationId if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_operator( 'interactionOperator', self._interaction_operator, (datetime.now() - t1).total_seconds()) if self._logger: self._logger.info( "interaction operator was built in {}s".format( info['buildingTime'])) self._logger.info( "interaction operator is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) if Utils.get_conf(self._spark_context, 'dtqw.profiler.logExecutors', default='False') == 'True': self._profiler.log_executors(app_id=app_id)
def create_operator(self, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK): """ Build the shift operator for the walk. Parameters ---------- coord_format : bool, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is Utils.CoordinateDefault. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK. Returns ------- Operator Raises ------ ValueError """ if self._logger: self._logger.info("building shift operator...") initial_time = datetime.now() coin_size = 2 size = self._size num_edges = self._num_edges size_xy = size[0] * size[1] shape = (coin_size * coin_size * size_xy, coin_size * coin_size * size_xy) if self._broken_links: broken_links = self._broken_links.generate(num_edges) generation_mode = Utils.get_conf( self._spark_context, 'dtqw.mesh.brokenLinks.generationMode', default='broadcast') if generation_mode == 'rdd': def __map(e): """e = (edge, (edge, broken or not))""" for i in range(coin_size): l = (-1)**i # Finding the correspondent x,y coordinates of the vertex from the edge number if e[1][0] >= size[0] * size[1]: j = i x = int((e[1][0] - size[0] * size[1]) / size[0]) y = ((e[1][0] - size[0] * size[1]) % size[1] - i - l) % size[1] else: j = int(not i) x = (e[1][0] % size[0] - i - l) % size[0] y = int(e[1][0] / size[0]) delta = int(not (i ^ j)) if e[1][1]: l = 0 m = ((i + l) * coin_size + (abs(j + l) % coin_size)) * size_xy + \ ((x + l * (1 - delta)) % size[0]) * size[1] + (y + l * delta) % size[1] n = ((1 - i) * coin_size + (1 - j)) * size_xy + x * size[1] + y yield m, n, 1 rdd = self._spark_context.range(num_edges).map(lambda m: ( m, m)).leftOuterJoin(broken_links).flatMap(__map) elif generation_mode == 'broadcast': def __map(e): """e = (edge, (edge, broken or not))""" for i in range(coin_size): l = (-1)**i # Finding the correspondent x,y coordinates of the vertex from the edge number if e >= size[0] * size[1]: j = i delta = int(not (i ^ j)) x = int((e - size[0] * size[1]) / size[0]) y = ((e - size[0] * size[1]) % size[1] - i - l) % size[1] else: j = int(not i) delta = int(not (i ^ j)) x = (e % size[0] - i - l) % size[0] y = int(e / size[0]) if e in broadcast.value: bl = 0 else: bl = l m = ((i + bl) * coin_size + (abs(j + bl) % coin_size)) * size_xy + \ ((x + bl * (1 - delta)) % size[0]) * size[1] + (y + bl * delta) % size[1] n = ((1 - i) * coin_size + (1 - j)) * size_xy + x * size[1] + y yield m, n, 1 rdd = self._spark_context.range(num_edges).flatMap(__map) else: if self._logger: self._logger.error("invalid broken links generation mode") raise ValueError("invalid broken links generation mode") else: def __map(xy): x = xy % size[0] y = int(xy / size[0]) for i in range(coin_size): l = (-1)**i for j in range(coin_size): delta = int(not (i ^ j)) m = (i * coin_size + j) * size_xy + \ ((x + l * (1 - delta)) % size[0]) * size[1] + (y + l * delta) % size[1] n = (i * coin_size + j) * size_xy + x * size[1] + y yield m, n, 1 rdd = self._spark_context.range(size_xy).flatMap(__map) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate(rdd, Utils.CoordinateDefault, new_coord=coord_format) expected_elems = coin_size**2 * size_xy expected_size = Utils.get_size_of_type(int) * expected_elems num_partitions = Utils.get_num_partitions(self._spark_context, expected_elems) if num_partitions: rdd = rdd.partitionBy(numPartitions=num_partitions) operator = Operator( rdd, shape, data_type=int, coord_format=coord_format).materialize(storage_level) if self._broken_links: broken_links.unpersist() self._profile(operator, initial_time) return operator
def measure_particle(self, particle, storage_level=StorageLevel.MEMORY_AND_DISK): """ Perform the partial measurement of a particle of the system state. Parameters ---------- particle : int The desired particle to be measured. The particle number starts by 0. storage_level : StorageLevel The desired storage level when materializing the RDD. Returns ------- :obj:MarginalPDF The PDF of each particle. Raises ------ NotImplementedError ValueError """ if particle >= self._num_particles: if self._logger: self._logger.error("invalid particle number") raise ValueError("invalid particle number") if self._logger: self._logger.info( "measuring the state of the system for particle {}...".format( particle + 1)) t1 = datetime.now() coin_size = 2 if self._mesh.is_1d(): num_particles = self._num_particles size = self._mesh.size expected_elems = size cs_size = coin_size * size shape = (size, 1) def __map(m): x = int(m[0] / (cs_size**(num_particles - 1 - particle))) % size return x, (abs(m[1])**2).real def __unmap(m): return m elif self._mesh.is_2d(): num_particles = self._num_particles size_x = self._mesh.size[0] size_y = self._mesh.size[1] expected_elems = size_x * size_y cs_size_x = coin_size * size_x cs_size_y = coin_size * size_y cs_size_xy = cs_size_x * cs_size_y shape = (size_x, size_y) def __map(m): xy = (int( m[0] / (cs_size_xy**(num_particles - 1 - particle) * size_y)) % size_x, int(m[0] / (cs_size_xy**(num_particles - 1 - particle))) % size_y) return xy, (abs(m[1])**2).real def __unmap(m): return m[0][0], m[0][1], m[1] else: if self._logger: self._logger.error("mesh dimension not implemented") raise NotImplementedError("mesh dimension not implemented") expected_size = Utils.get_size_of_type(float) * expected_elems num_partitions = Utils.get_num_partitions(self.data.context, expected_size) data_type = self._data_type() rdd = self.data.filter(lambda m: m[1] != data_type).map( __map).reduceByKey(lambda a, b: a + b, numPartitions=num_partitions).map(__unmap) pdf = MarginalPDF(rdd, shape, self._mesh, self._num_particles).materialize(storage_level) if self._logger: self._logger.info("checking if the probabilities sum one...") round_precision = int( Utils.get_conf(self._spark_context, 'dtqw.math.roundPrecision', default='10')) if round(pdf.sum_values(), round_precision) != 1.0: if self._logger: self._logger.error("PDFs must sum one") raise ValueError("PDFs must sum one") app_id = self._spark_context.applicationId if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_pdf( 'partialMeasurementParticle{}'.format(particle + 1), pdf, (datetime.now() - t1).total_seconds()) if self._logger: self._logger.info( "partial measurement for particle {} was done in {}s". format(particle + 1, info['buildingTime'])) self._logger.info( "PDF with partial measurements for particle {} " "are consuming {} bytes in memory and {} bytes in disk". format(particle + 1, info['memoryUsed'], info['diskUsed'])) self._profiler.log_rdd(app_id=app_id) return pdf
def measure_collision(self, full_measurement, storage_level=StorageLevel.MEMORY_AND_DISK): """ Filter the measurement of the entire system by checking when all particles are located at the same site of the mesh. Parameters ---------- full_measurement : :obj:PDF The measurement of the entire system. storage_level : StorageLevel The desired storage level when materializing the RDD. Returns ------- :obj:CollisionPDF The PDF of the system when all particles are located at the same site. Raises ------ NotImplementedError """ if self._num_particles <= 1: if self._logger: self._logger.error( "the measurement of collision cannot be performed for quantum walks with only one particle" ) raise NotImplementedError( "the measurement of collision cannot be performed for quantum walks with only one particle" ) if self._logger: self._logger.info( "measuring the state of the system considering that the particles are at the same positions..." ) t1 = datetime.now() if not is_pdf(full_measurement): if self._logger: self._logger.error('PDF instance expected, not "{}"'.format( type(full_measurement))) raise TypeError('PDF instance expected, not "{}"'.format( type(full_measurement))) if self._mesh.is_1d(): ndim = 1 num_particles = self._num_particles ind = ndim * num_particles size = self._mesh.size expected_elems = size shape = (size, 1) def __filter(m): for p in range(num_particles): if m[0] != m[p]: return False return True def __map(m): return m[0], m[ind] elif self._mesh.is_2d(): ndim = 2 num_particles = self._num_particles ind = ndim * num_particles size_x = self._mesh.size[0] size_y = self._mesh.size[1] expected_elems = size_x * size_y shape = (size_x, size_y) def __filter(m): for p in range(0, ind, ndim): if m[0] != m[p] or m[1] != m[p + 1]: return False return True def __map(m): return m[0], m[1], m[ind] else: if self._logger: self._logger.error("mesh dimension not implemented") raise NotImplementedError("mesh dimension not implemented") expected_size = Utils.get_size_of_type(float) * expected_elems num_partitions = Utils.get_num_partitions(self.data.context, expected_size) rdd = full_measurement.data.filter(__filter).map(__map).coalesce( num_partitions) pdf = CollisionPDF(rdd, shape, self._mesh, self._num_particles).materialize(storage_level) app_id = self._spark_context.applicationId if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_pdf('collisionMeasurement', pdf, (datetime.now() - t1).total_seconds()) if self._logger: self._logger.info( "collision measurement was done in {}s".format( info['buildingTime'])) self._logger.info( "PDF with collision measurement is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) self._profiler.log_rdd(app_id=app_id) return pdf
def measure_system(self, storage_level=StorageLevel.MEMORY_AND_DISK): """ Perform the measurement of the entire system state. Parameters ---------- storage_level : StorageLevel The desired storage level when materializing the RDD. Returns ------- :obj:JointPDF The PDF of the entire system. Raises ------ NotImplementedError ValueError """ if self._logger: self._logger.info("measuring the state of the system...") t1 = datetime.now() coin_size = 2 if self._mesh.is_1d(): ndim = 1 num_particles = self._num_particles ind = ndim * num_particles size = self._mesh.size expected_elems = size cs_size = coin_size * size dims = [size for p in range(ind)] if self._num_particles == 1: dims.append(1) shape = tuple(dims) def __map(m): x = [] for p in range(num_particles): x.append( int(m[0] / (cs_size**(num_particles - 1 - p))) % size) return tuple(x), (abs(m[1])**2).real def __unmap(m): a = [] for p in range(num_particles): a.append(m[0][p]) a.append(m[1]) return tuple(a) elif self._mesh.is_2d(): ndim = 2 num_particles = self._num_particles ind = ndim * num_particles dims = [] for p in range(0, ind, ndim): dims.append(self._mesh.size[0]) dims.append(self._mesh.size[1]) size_x = self._mesh.size[0] size_y = self._mesh.size[1] expected_elems = size_x * size_y cs_size_x = coin_size * size_x cs_size_y = coin_size * size_y cs_size_xy = cs_size_x * cs_size_y shape = tuple(dims) def __map(m): xy = [] for p in range(num_particles): xy.append( int(m[0] / (cs_size_xy**(num_particles - 1 - p) * size_y)) % size_x) xy.append( int(m[0] / (cs_size_xy**(num_particles - 1 - p))) % size_y) return tuple(xy), (abs(m[1])**2).real def __unmap(m): xy = [] for p in range(0, ind, ndim): xy.append(m[0][p]) xy.append(m[0][p + 1]) xy.append(m[1]) return tuple(xy) else: if self._logger: self._logger.error("mesh dimension not implemented") raise NotImplementedError("mesh dimension not implemented") expected_size = Utils.get_size_of_type(float) * expected_elems num_partitions = Utils.get_num_partitions(self.data.context, expected_size) data_type = self._data_type() rdd = self.data.filter(lambda m: m[1] != data_type).map( __map).reduceByKey(lambda a, b: a + b, numPartitions=num_partitions).map(__unmap) pdf = JointPDF(rdd, shape, self._mesh, self._num_particles).materialize(storage_level) if self._logger: self._logger.info("checking if the probabilities sum one...") round_precision = int( Utils.get_conf(self._spark_context, 'dtqw.math.roundPrecision', default='10')) if round(pdf.sum_values(), round_precision) != 1.0: if self._logger: self._logger.error("PDFs must sum one") raise ValueError("PDFs must sum one") app_id = self._spark_context.applicationId if self._profiler: self._profiler.profile_resources(app_id) self._profiler.profile_executors(app_id) info = self._profiler.profile_pdf( 'fullMeasurement', pdf, (datetime.now() - t1).total_seconds()) if self._logger: self._logger.info("full measurement was done in {}s".format( info['buildingTime'])) self._logger.info( "PDF with full measurement is consuming {} bytes in memory and {} bytes in disk" .format(info['memoryUsed'], info['diskUsed'])) self._profiler.log_rdd(app_id=app_id) return pdf
def create_operator(self, mesh, coord_format=Utils.CoordinateDefault, storage_level=StorageLevel.MEMORY_AND_DISK): """ Build the coin operator for the walk. Parameters ---------- mesh : Mesh A Mesh instance. coord_format : bool, optional Indicate if the operator must be returned in an apropriate format for multiplications. Default value is Utils.CoordinateDefault. storage_level : StorageLevel, optional The desired storage level when materializing the RDD. Default value is StorageLevel.MEMORY_AND_DISK. Returns ------- Operator """ if self._logger: self._logger.info("building coin operator...") initial_time = datetime.now() if not is_mesh(mesh): if self._logger: self._logger.error("expected mesh, not {}".format(type(mesh))) raise TypeError("expected mesh, not {}".format(type(mesh))) if not mesh.is_2d(): if self._logger: self._logger.error( "non correspondent coin and mesh dimensions") raise ValueError("non correspondent coin and mesh dimensions") mesh_size = mesh.size[0] * mesh.size[1] shape = (self._data.shape[0] * mesh_size, self._data.shape[1] * mesh_size) data = Utils.broadcast(self._spark_context, self._data) # The coin operator is built by applying a tensor product between the chosen coin and # an identity matrix with the dimensions of the chosen mesh. def __map(xy): for i in range(data.value.shape[0]): for j in range(data.value.shape[1]): yield (i * mesh_size + xy, j * mesh_size + xy, data.value[i][j]) rdd = self._spark_context.range(mesh_size).flatMap(__map) if coord_format == Utils.CoordinateMultiplier or coord_format == Utils.CoordinateMultiplicand: rdd = Utils.change_coordinate(rdd, Utils.CoordinateDefault, new_coord=coord_format) expected_elems = len(self._data) * mesh_size expected_size = Utils.get_size_of_type(complex) * expected_elems num_partitions = Utils.get_num_partitions(self._spark_context, expected_size) if num_partitions: rdd = rdd.partitionBy(numPartitions=num_partitions) operator = Operator( rdd, shape, coord_format=coord_format).materialize(storage_level) self._profile(operator, initial_time) return operator