def k_means_distance(self, centers, result_name=None): """ Computes the distance between each row and each of the given center vectors for k-means """ if centers.shape[1] != self.__cols: raise BaseException('Dimensions of matrix and centers do not match') if result_name == None: result_name = MatrixFactory.getRandomMatrixName() redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) prefix = 'dist(' + self.__name + ',' + centers.name() + ')'; dist_job = kmeans_jobs.KMeansDistanceJob(self.context, self, centers, prefix) parts = dist_job.run() for p in range(0,len(parts)): part_name = parts[p] m = self.context.redis_master.lpop(part_name) sum = None while m != None: if sum == None: sum = numpy.loads(m) else: sum += numpy.loads(m) m = self.context.redis_master.lpop(part_name) self.context.redis_master.delete(part_name) redwrap.create_block(self.context.key_manager.get_block_name(result_name, p, 0), numpy.sqrt(sum)) res = Matrix(self.__rows, centers.shape[0], result_name, self.context) return res
def from_numpy(mat, context, name=None): """ Creates a matrix from a numpy matrix """ if len(mat.shape) != 2: raise BaseException('Shape of input matrix must be of size 2') if name == None: name = MatrixFactory.getRandomMatrixName() # Check if matrix already exists if context.redis_master.exists(const.INFO_FORMAT.format(name)): raise BaseException('A matrix with this name already exists on the redis server') rows = mat.shape[0] cols = mat.shape[1] redwrap = RedisWrapper(context.redis_master, context.key_manager) m = Matrix(rows, cols, name, context) # Separate blocks and send them to the redis server for j in range(0, m.row_blocks()): for i in range(0, m.col_blocks()): block_name = m.block_name(j,i) block = mat[max(j*context.block_size,0):min((j+1)*context.block_size,rows+1), max(i*context.block_size,0):min((i+1)*context.block_size,cols+1)] redwrap.create_block(block_name, block) return m
def toScalar(self): """ Returns a scalar if the matrix consists of only one cell """ redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) if self.shape[0] != 1 or self.shape[1] != 1: raise exceptions.MatrixOperationException('Cannot convert a matrix with more than one column and row to a scalar', 'MATRIX2SCALAR') return redwrap.get_block(self.block_name(0,0))[0,0]
def print_blocks(self): """ Prints each block """ redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) for row in range(0, self.row_blocks()): for col in range(0, self.col_blocks()): n = redwrap.get_block(self.block_name(row, col)) print self.block_name(row, col) print str(n) print '----'
def get_cell_value(self, row, col): """ Returns the value of a single matrix cell """ redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) block_row = int(math.floor(row / self.__block_size)) block_col = int(math.floor(col / self.__block_size)) offset_row = row % self.__block_size offset_col = col % self.__block_size block = redwrap.get_block(self.block_name(block_row, block_col)) return block[offset_row, offset_col]
def redis2file(name, redis, key_manager, file_format): rw = RedisWrapper(redis, key_manager) info = redis.hgetall(const.INFO_FORMAT.format(name)) rows = int(info['rows']) cols = int(info['cols']) block_size = int(info['block_size']) for row in range(0, rows / block_size): for col in range(0, cols / block_size): file = file_format.format(row, col) matrix = rw.get_block(key_manager.get_block_name(name, row, col)) numpy.savetxt(file, matrix, delimiter=';', fmt='%f')
def delete_matrix(self, name): info_key = const.INFO_FORMAT.format(name) info = self.redis_master.hgetall(info_key) rows = int(info['rows']) cols = int(info['cols']) block_size = int(info['block_size']) redwrap = RedisWrapper(self.redis_master, self.key_manager) for row in range(0, rows / block_size): for col in range(0, cols / block_size): block_name = self.key_manager.get_block_name(name, row, col) redwrap.delete_block(block_name) self.redis_master.delete(info_key)
def set_cell_value(self, row, col, val): """ Sets the value of a single matrix cell """ redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) block_row = int(math.floor(row / self.__block_size)) block_col = int(math.floor(col / self.__block_size)) offset_row = row % self.__block_size offset_col = col % self.__block_size block_name = self.block_name(block_row, block_col) block = redwrap.get_block(block_name) block[offset_row, offset_col] = val redwrap.create_block(block_name, block)
def __aggr(self, inner_aggr, outer_aggr, axis=None, expr='x', result_name=None): if result_name == None: result_name = MatrixFactory.getRandomMatrixName() prefix = self.__inner_aggr(inner_aggr, expr, axis) aggr_job = jobs.Job(self.context) if axis == 0: for col in range(0, self.col_blocks()): bin_cb = cmd.CommandBuilder(cmd.BINARYMATRIXOP) del_cb = cmd.CommandBuilder(cmd.DELETE) for row in range(0, self.row_blocks()): mname = self.context.key_manager.get_block_name(prefix, col, row) bin_cb.add_param(mname) del_cb.add_param(mname) bin_cb.add_param(outer_aggr) bin_cb.add_param(self.context.key_manager.get_block_name(result_name, 0, col)) aggr_job.add_subjob(bin_cb.join(del_cb)) aggr_job.execute() return Matrix(1, self.__cols, result_name, self.context) elif axis == 1: for row in range(0, self.row_blocks()): bin_cb = cmd.CommandBuilder(cmd.BINARYMATRIXOP) del_cb = cmd.CommandBuilder(cmd.DELETE) for col in range(0,self.col_blocks()): mname = self.context.key_manager.get_block_name(prefix, col, row) bin_cb.add_param(mname) del_cb.add_param(mname) bin_cb.add_param(outer_aggr) bin_cb.add_param(self.context.key_manager.get_block_name(result_name, row, 0)) aggr_job.add_subjob(bin_cb.join(del_cb)) aggr_job.execute() return Matrix(self.__rows, 1, result_name, self.context) elif axis == None: total = None redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) for col in range(0, self.col_blocks()): for row in range(0,self.row_blocks()): key = self.context.key_manager.get_block_name(prefix, col, row) val = float(redwrap.get_value(key)) if total == None: total = val else: total = eval(outer_aggr, { 'numpy' : numpy, 'x' : total, 'y' : val }) redwrap.delete_block(key) return total
def file2redis(name, rows, cols, redis, key_manager, file_format, block_size=None): # Handle 0,0 block first to guess the block size rw = RedisWrapper(redis, key_manager) file = file_format.format(0, 0) matrix = numpy.genfromtxt(file ,delimiter=';',dtype=None) rw.create_block(key_manager.get_block_name(name, 0, 0), matrix) if block_size == None: block_size = matrix.shape[0] redis.hmset(const.INFO_FORMAT.format(name), { 'block_size': block_size, 'rows' : rows, 'cols' : cols }) for row in range(0, rows / block_size): for col in range(0, cols / block_size): if not (row == col and row == 0): file = file_format.format(row, col) matrix = numpy.genfromtxt(file ,delimiter=';',dtype=None) rw.create_block(key_manager.get_block_name(name, row, col), matrix)
def k_means_recalc(self, dist, result_name=None): """ Calculates new k-means centers from a previously computed distance matrix """ if result_name == None: result_name = MatrixFactory.getRandomMatrixName() redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) num_centers = dist.shape[1] prefix = 'center(' + self.__name + ',' + dist.name() + ')' cnt_prefix = 'counter(' + self.__name + ',' + dist.name() + ')_' recalc_job = kmeans_jobs.KMeansRecalculationJob(self.context, self, dist, prefix, cnt_prefix) recalc_job.run() for col in range(0, self.col_blocks()): conc = None for center in range(0, num_centers): name = prefix + '_' + str(col) + '_' + str(center) m = self.context.redis_master.lpop(name) sum = None while m != None: if sum == None: sum = numpy.loads(m) else: sum += numpy.loads(m) m = self.context.redis_master.lpop(name) self.context.redis_master.delete(name) # Sum is only a row. To make it a matrix that we can concatenate, we have to wrap it if len(sum.shape) == 1: sum = numpy.matrix([sum]) num_records = self.context.redis_master.get(cnt_prefix + str(center)) num_records = float(num_records) if num_records != None else 1 #TODO: This is an error if conc == None: conc = sum / num_records else: conc = numpy.concatenate((conc, sum / num_records), axis=0) redwrap.create_block(self.context.key_manager.get_block_name(result_name, 0, col), conc) res = Matrix(num_centers, self.__cols, result_name, self.context) return res
def get_numpy_matrix(self): """ Concatenates all blocks of this matrix and returns one big numpy matrix """ m = None redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) for row in range(0,self.row_blocks()): b = redwrap.get_block(self.block_name(row, 0)) #print self.block_name(row, 0) for col in range(1,self.col_blocks()): if row == 0 and col == 0: continue #print self.block_name(row, col) #print '---' n = redwrap.get_block(self.block_name(row, col)) b = numpy.concatenate((b, n), axis=1) if m is None: m = b else: m = numpy.concatenate((m, b)) return m
def __delete(self): redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) for block in self.block_names(): redwrap.delete_block(block) self.context.redis_master.delete(const.INFO_FORMAT.format(self.__name))
def slice(self, row, num_rows, col, num_cols, result_name=None): if result_name == None: result_name = MatrixFactory.getRandomMatrixName() # Check if given values are valid if row + num_rows > self.__rows or (num_rows < 0 and row + num_rows < 0): raise Exception('Row index out of bounds') if col + num_cols > self.__cols or (num_cols < 0 and col + num_cols < 0): raise Exception('Column index out of bounds') # Handle negative indices if row < 0: row = self.__rows + row if num_rows < 0: row = row + num_rows num_rows = -num_rows if col < 0: col = self.__cols + col if num_cols < 0: col = col + num_cols num_cols = -num_cols redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) row_blocks = num_rows / self.__block_size if num_rows % self.__block_size != 0: row_blocks += 1 col_blocks = num_cols / self.__block_size if num_cols % self.__block_size != 0: col_blocks += 1 # Iterate the blocks of the new slice for r in range(0, row_blocks): for c in range(0, col_blocks): start_row = row + r * self.__block_size end_row = min(row + num_rows, start_row + self.__block_size) start_col = col + c * self.__block_size end_col = min(col + num_cols, start_col + self.__block_size) # Iterate the blocks of the current matrix that intersect with the current block of the new slice # and patch them together a = None for i in range(start_row / self.__block_size, (end_row-1) / self.__block_size + 1): row_b = None for j in range(start_col / self.__block_size, (end_col-1) / self.__block_size + 1): n = redwrap.get_block(self.block_name(i, j)) if row_b == None: row_b = n else: row_b = numpy.concatenate((row_b, n), axis=1) if a == None: a = row_b else: a = numpy.concatenate((a, row_b), axis=0) # Now we have a matrix so big that the whole current block of the new slice as defined by r and c fits in it # Use numpy slicing to get the block from this matrix mr = min(row + self.__block_size, a.shape[0]) mc = min(col + self.__block_size, a.shape[1]) sc = col % self.__block_size sr = row % self.__block_size if r == row_blocks-1 and (row + num_rows) % self.__block_size != 0: mr = min(mr, (row + num_rows) % self.__block_size) if c == col_blocks-1 and (col + num_cols) % self.__block_size != 0: mc = min(mc, (col + num_cols) % self.__block_size) if mr == row and row % self.__block_size != 0: mr += 1 if mc == col and col % self.__block_size != 0: mc += 1 block = a[sr:mr,sc:mc] redwrap.create_block(self.context.key_manager.get_block_name(result_name, r, c), block) return Matrix(num_rows, num_cols, result_name, self.context)
def get_numpy_block(self, row, col): """ Returns a block as numpy matrix """ redwrap = RedisWrapper(self.context.redis_master, self.context.key_manager) return redwrap.get_block(self.block_name(row, col))
def delete(cmd_ctx): redwrap = RedisWrapper(cmd_ctx.redis_master, cmd_ctx.key_manager) for m in cmd_ctx.cmdArgs: redwrap.delete_block(m)