def benchmark_convnet(ctx, timer): image_size = BASE_IMG_SIZE minibatch = 64 #minibatch = ctx.num_workers hint = util.divup(image_size, sqrt(ctx.num_workers)) tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size) util.log_info('Hint: %s', tile_hint) images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size), tile_hint=tile_hint)) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) def _(): conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) expr.force(pool3) # force parakeet functions to compile before timing. _() for i in range(2): timer.time_op('convnet', _)
def shortestPath_np(dim, linkMatrix, dist): #linkMatrix, dist = init(dim, startVertex) for i in range(1000): util.log_info("%s", "enter") dist = (dist + linkMatrix).min(axis = 0).reshape(dim, 1) util.log_info("numItersation %s", i) return dist
def mark_failed_worker(self, worker_id): util.log_info('Marking worker %s as failed.', worker_id) self._available_workers.remove(worker_id) for array in self._arrays: for ex, tile_id in array.tiles.iteritems(): if tile_id.worker == worker_id: array.bad_tiles.append(ex)
def bfs(ctx, dim): util.log_info("start to computing......") sGenerate = time.time() current = eager( expr.shuffle( expr.ndarray( (dim, 1), dtype = np.int64, tile_hint = (dim / ctx.num_workers, 1)), make_current, )) linkMatrix = eager( expr.shuffle( expr.ndarray( (dim, dim), dtype = np.int64, tile_hint = (dim, dim / ctx.num_workers)), make_matrix, )) eGenerate = time.time() startCompute = time.time() while(True): next = expr.dot(linkMatrix, current) formerNum = expr.count_nonzero(current) laterNum = expr.count_nonzero(next) hasNew = expr.equal(formerNum, laterNum).glom() current = next if (hasNew): break current.evaluate() endCompute = time.time() return (eGenerate - sGenerate, endCompute - startCompute)
def test_newaxis(self): na = np.arange(100).reshape(10, 10) a = expr.from_numpy(na) Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape, a[expr.newaxis,2:7, 4:8].shape) Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape, a[expr.newaxis,2:7, expr.newaxis, 4:8].shape) Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape, a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis].shape) #Extreme case Assert.all_eq(na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, np.newaxis, np.newaxis, np.newaxis, 4:8, np.newaxis, np.newaxis, np.newaxis].shape, a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis, 2:7, expr.newaxis, expr.newaxis, expr.newaxis, 4:8, expr.newaxis, expr.newaxis, expr.newaxis].shape) util.log_info('\na.shape: %s \nna.shape: %s', a[expr.newaxis,2:7, expr.newaxis, 4:8, expr.newaxis, expr.newaxis, expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis, np.newaxis, np.newaxis].shape)
def _(axis): util.log_info('Testing sum over axis %s', axis) a = expr.ones((TEST_SIZE, TEST_SIZE)) + expr.ones( (TEST_SIZE, TEST_SIZE)) b = a.sum(axis=axis) Assert.all_eq(b.glom(), 2 * np.ones( (TEST_SIZE, TEST_SIZE)).sum(axis))
def benchmark_convnet(ctx, timer): image_size = BASE_IMG_SIZE minibatch = 64 #minibatch = ctx.num_workers hint = util.divup(image_size, sqrt(ctx.num_workers)) tile_hint = (util.divup(minibatch, ctx.num_workers), N_COLORS, image_size, image_size) util.log_info('Hint: %s', tile_hint) images = expr.eager(expr.ones((minibatch, N_COLORS, image_size, image_size), tile_hint=tile_hint)) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) def _(): conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) pool3.evaluate() # force parakeet functions to compile before timing. _() for i in range(2): timer.time_op('convnet', _)
def test_newaxis(self): na = np.arange(100).reshape(10, 10) a = expr.from_numpy(na) Assert.all_eq(na[np.newaxis, 2:7, 4:8].shape, a[expr.newaxis, 2:7, 4:8].shape) Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, 4:8].shape, a[expr.newaxis, 2:7, expr.newaxis, 4:8].shape) Assert.all_eq( na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis].shape, a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis].shape) #Extreme case Assert.all_eq( na[np.newaxis, np.newaxis, np.newaxis, np.newaxis, 2:7, np.newaxis, np.newaxis, np.newaxis, 4:8, np.newaxis, np.newaxis, np.newaxis].shape, a[expr.newaxis, expr.newaxis, expr.newaxis, expr.newaxis, 2:7, expr.newaxis, expr.newaxis, expr.newaxis, 4:8, expr.newaxis, expr.newaxis, expr.newaxis].shape) util.log_info( '\na.shape: %s \nna.shape: %s', a[expr.newaxis, 2:7, expr.newaxis, 4:8, expr.newaxis, expr.newaxis, expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, 4:8, np.newaxis, np.newaxis, np.newaxis].shape)
def test_convnet(ctx): hint = util.divup(64, sqrt(ctx.num_workers)) images = expr.eager( expr.ones((N_IMGS, ) + IMG_SIZE, tile_hint=(N_IMGS, N_COLORS, hint, hint))) w1 = expr.eager( expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) w2 = expr.eager( expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) w3 = expr.eager( expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) util.log_info(pool3.shape)
def start_remote_worker(worker, st, ed): """ Start processes on a worker machine. The machine will launch worker processes ``st`` through ``ed``. :param worker: hostname to connect to. :param st: First process index to start. :param ed: Last process to start. """ if FLAGS.use_threads and worker == "localhost": util.log_info("Using threads.") for i in range(st, ed): p = threading.Thread(target=spartan.worker._start_worker, args=((socket.gethostname(), FLAGS.port_base), i)) p.daemon = True p.start() time.sleep(0.1) return util.log_info("Starting worker %d:%d on host %s", st, ed, worker) if FLAGS.oprofile: os.system("mkdir operf.%s" % worker) ssh_args = ["ssh", "-oForwardX11=no", worker] args = ["cd %s && " % os.path.abspath(os.path.curdir)] if FLAGS.xterm: args += ["xterm", "-e"] if FLAGS.oprofile: args += ["operf -e CPU_CLK_UNHALTED:100000000", "-g", "-d", "operf.%s" % worker] args += [ #'gdb', '-ex', 'run', '--args', "python", "-m spartan.worker", "--master=%s:%d" % (socket.gethostname(), FLAGS.port_base), "--count=%d" % (ed - st), "--heartbeat_interval=%d" % FLAGS.heartbeat_interval, ] # add flags from config/user for (name, value) in FLAGS: if name in ["worker_list", "print_options"]: continue args += [repr(value)] # print >>sys.stderr, args util.log_debug("Running worker %s", " ".join(args)) time.sleep(0.1) # TODO: improve this to make log break at newline if worker != "localhost": p = subprocess.Popen(ssh_args + args, executable="ssh") else: p = subprocess.Popen(" ".join(args), shell=True, stdin=subprocess.PIPE) return p
def train_smo_1998(self, data, labels): ''' Train an SVM model using the SMO (1998) algorithm. Args: data(Expr): points to be trained labels(Expr): the correct labels of the training data ''' N = data.shape[0] # Number of instances D = data.shape[1] # Number of features self.b = 0.0 self.alpha = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() # linear kernel kernel_results = expr.dot(data, expr.transpose(data), tile_hint=[N/self.ctx.num_workers, N]) labels = expr.force(labels) self.E = expr.zeros((N,1), dtype=np.float64, tile_hint=[N/self.ctx.num_workers, 1]).force() for i in xrange(N): self.E[i, 0] = self.b + expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=kernel_results[:,i].force())).glom() - labels[i, 0] util.log_info("Starting SMO") it = 0 num_changed = 0 examine_all = True while (num_changed > 0 or examine_all) and (it < self.maxiter): util.log_info("Iteration:%d", it) num_changed = 0 if examine_all: for i in xrange(N): num_changed += self.examine_example(i, N, labels, kernel_results) else: for i in xrange(N): if self.alpha[i, 0] > 0 and self.alpha[i, 0] < self.C: num_changed += self.examine_example(i, N, labels, kernel_results) it += 1 if examine_all: examine_all = False elif num_changed == 0: examine_all = True self.w = expr.zeros((D, 1), dtype=np.float64).force() for i in xrange(D): self.w[i,0] = expr.reduce(self.alpha, axis=None, dtype_fn=lambda input: input.dtype, local_reduce_fn=margin_mapper, accumulate_fn=np.add, fn_kw=dict(label=labels, data=expr.force(data[:,i]))).glom() self.usew_ = True print 'iteration finish:', it print 'b:', self.b print 'w:', self.w.glom()
def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None): ''' clustering data points using fuzzy kmeans clustering method. Args: points(Expr or DistArray): the input data points matrix. k(int): the number of clusters. num_iter(int): the max iterations to run. m(float): the parameter of fuzzy kmeans. centers(Expr or DistArray): the initialized centers of each cluster. ''' points = expr.force(points) num_dim = points.shape[1] if centers is None: centers = expr.rand(k, num_dim) labels = expr.zeros((points.shape[0],), dtype=np.int) for iter in range(num_iter): centers = expr.as_array(centers) points_broadcast = expr.reshape(points, (points.shape[0], 1, points.shape[1])) centers_broadcast = expr.reshape(centers, (1, centers.shape[0], centers.shape[1])) distances = expr.sum(expr.square(points_broadcast - centers_broadcast), axis=2) # This is used to avoid dividing zero distances = distances + 0.00000000001 util.log_info('distances shape %s' % str(distances.shape)) distances_broadcast = expr.reshape(distances, (distances.shape[0], 1, distances.shape[1])) distances_broadcast2 = expr.reshape(distances, (distances.shape[0], distances.shape[1], 1)) prob = 1.0 / expr.sum(expr.power(distances_broadcast / distances_broadcast2, 2.0 / (m - 1)), axis=2) prob.force() counts = expr.sum(prob, axis=0) counts = expr.reshape(counts, (counts.shape[0], 1)) labels = expr.argmax(prob, axis=1) centers = expr.sum(expr.reshape(points, (points.shape[0], 1, points.shape[1])) * expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)), axis=0) # We assume that the size of centers are relative small that can be handled # on the master. counts = counts.glom() centers = centers.glom() # If any centroids don't have any points assigned to them. zcount_indices = (counts == 0).reshape(k) if np.any(zcount_indices): # One or more centroids may not have any points assigned to them, which results in their # position being the zero-vector. We reseed these centroids with new random values # and set their counts to 1 in order to get rid of dividing by zero. counts[zcount_indices, :] = 1 centers[zcount_indices, :] = np.random.rand(np.count_nonzero(zcount_indices), num_dim) centers = centers / counts return labels
def start_remote_worker(worker, st, ed): ''' Start processes on a worker machine. The machine will launch worker processes ``st`` through ``ed``. :param worker: hostname to connect to. :param st: First process index to start. :param ed: Last process to start. ''' if FLAGS.use_threads and worker == 'localhost': util.log_info('Using threads.') for i in range(st, ed): p = threading.Thread(target=spartan.worker._start_worker, args=((socket.gethostname(), FLAGS.port_base), i)) p.daemon = True p.start() time.sleep(0.1) return util.log_info('Starting worker %d:%d on host %s', st, ed, worker) if FLAGS.oprofile: os.system('mkdir operf.%s' % worker) ssh_args = ['ssh', '-oForwardX11=no', worker ] args = ['cd %s && ' % os.path.abspath(os.path.curdir)] if FLAGS.xterm: args += ['xterm', '-e',] if FLAGS.oprofile: args += ['operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d', 'operf.%s' % worker] args += [ #'gdb', '-ex', 'run', '--args', 'python', '-m spartan.worker', '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base), '--count=%d' % (ed - st), '--heartbeat_interval=%d' % FLAGS.heartbeat_interval ] # add flags from config/user for (name, value) in FLAGS: if name in ['worker_list', 'print_options']: continue args += [repr(value)] #print >>sys.stderr, args util.log_debug('Running worker %s', ' '.join(args)) time.sleep(0.1) if worker != 'localhost': p = subprocess.Popen(ssh_args + args, executable='ssh') else: p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE) return p
def make_current(tile, ex): util.log_info("start to creatting") ul = ex.ul lr = ex.lr dim = ex.shape[0] current = np.zeros((dim, 1), dtype = np.int64) if(ul[0] <= startVertex <= lr[0]): current[startVertex, 0] = 1 return [(ex, current)]
def test_del_dim(self): na = np.arange(100).reshape(10, 10) a = expr.from_numpy(na) Assert.all_eq(na[2:7, 8], a[2:7, 8].glom()) Assert.all_eq(na[3:9, 4].shape, a[3:9, 4].shape) Assert.all_eq(na[2:7, -1], a[2:7, -1].glom()) Assert.all_eq(na[-1, 3:9].shape, a[-1, 3:9].shape) util.log_info('\na.shape: %s \nna.shape %s', a[3:9, 4].shape, na[3:9, 4].shape)
def bind(self): host, port = self.addr host = socket.gethostbyname(host) util.log_debug('Binding... %s', (host, port)) if port == -1: self.addr = (host, self._zmq.bind_to_random_port('tcp://%s' % host)) else: try: self._zmq.bind('tcp://%s:%d' % (host, port)) except zmq.ZMQError: util.log_info('Failed to bind (%s, %d)' % (host, port)) raise
def test_ndimension(self): for case in xrange(5): dim = np.random.randint(low=2, high=6) shape = np.random.randint(low=5, high=11, size=dim) util.log_info('Test Case #%s: DIM(%s) shape%s', case + 1, dim, shape) na = new_ndarray(shape) a = expr.from_numpy(na) for axis in xrange(dim): Assert.all_eq(expr.sort(a, axis).glom(), np.sort(na, axis)) Assert.all_eq(expr.argsort(a, axis).glom(), np.argsort(na, axis))
def fake_netflix_mapper(inputs, ex, p_rating=None): ''' Create "Netflix-like" data for the given extent. :param p_rating: Sparsity factor (probability a given cell will have a rating) ''' n_ratings = int(max(1, ex.size * p_rating)) uids = np.random.randint(0, ex.shape[0], n_ratings) mids = np.random.randint(0, ex.shape[1], n_ratings) ratings = np.random.randint(0, 5, n_ratings).astype(np.float32) util.log_info('%s %s %s %s', ex, p_rating, ex.size, len(ratings)) data = scipy.sparse.coo_matrix((ratings, (uids, mids)), shape=ex.shape) yield ex, data
def benchmark_jacobi(ctx, timer): global base, ITERATION util.log_warn('util.log_warn: %s', ctx.num_workers) A, b = jacobi.jacobi_init(base * ctx.num_workers) A, b = A.evaluate(), b.evaluate() start = time.time() result = jacobi.jacobi_method(A, b, ITERATION).glom() cost = time.time() - start util.log_info('\nresult =\n%s', result) util.log_warn('time cost: %s s', cost) util.log_warn('cost per iteration: %s s\n', cost / ITERATION)
def _initialize(self): """Sends an initialization request to all workers and waits for their response. """ util.log_info("Initializing...") req = core.InitializeReq(peers=dict([(id, w.addr()) for id, w in self._workers.iteritems()])) futures = rpc.FutureGroup() for id, w in self._workers.iteritems(): req.id = id futures.append(w.initialize(req)) futures.wait() self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self) self._initialized = True util.log_info("done...")
def shutdown(self): '''Shutdown all workers and halt.''' if self._ctx.active is False: return self._ctx.active = False futures = rpc.FutureGroup() for id, w in self._workers.iteritems(): util.log_info('Shutting down worker %d', id) futures.append(w.shutdown()) # Wait a second to let our shutdown request go out. time.sleep(1) self._server.shutdown()
def test_combo(self): na = np.arange(100).reshape(10, 10) a = expr.from_numpy(na) Assert.all_eq(na[np.newaxis, 2:7, 4], a[expr.newaxis, 2:7, 4].glom()) Assert.all_eq(na[2:7, np.newaxis, -1], a[2:7, expr.newaxis, -1].glom()) Assert.all_eq(na[-1, np.newaxis, 2:7], a[-1, expr.newaxis, 2:7].glom()) Assert.all_eq(na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis, np.newaxis], a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4, expr.newaxis, expr.newaxis].glom()) util.log_info('\na.shape: %s \nna.shape: %s', a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis, expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, np.newaxis, -1, np.newaxis, np.newaxis].shape)
def test_combo(self): na = np.arange(100).reshape(10, 10) a = expr.from_numpy(na) Assert.all_eq(na[np.newaxis, 2:7, 4], a[expr.newaxis, 2:7, 4].glom()) Assert.all_eq(na[2:7, np.newaxis, -1], a[2:7, expr.newaxis, -1].glom()) Assert.all_eq(na[-1, np.newaxis, 2:7], a[-1, expr.newaxis, 2:7].glom()) Assert.all_eq( na[np.newaxis, 2:7, np.newaxis, np.newaxis, 4, np.newaxis, np.newaxis], a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, 4, expr.newaxis, expr.newaxis].glom()) util.log_info( '\na.shape: %s \nna.shape: %s', a[expr.newaxis, 2:7, expr.newaxis, expr.newaxis, -1, expr.newaxis, expr.newaxis].shape, na[np.newaxis, 2:7, np.newaxis, np.newaxis, -1, np.newaxis, np.newaxis].shape)
def _build_mapper(ex, task_array, target_array, X, y, criterion, max_depth, min_samples_split, min_samples_leaf, max_features, bootstrap): """ Mapper kernel for building a random forest classifier. Each kernel instance fetches the entirety of the feature and prediction (X and y) arrays, and invokes sklearn to create a local random forest classifier which may has more than one tree. The criterion, max_depth, min_samples_split, min_samples_leaf, max_features and bootstrap options are passed to the `sklearn.RandomForest` method. """ # The number of rows decides how many trees this kernel will build. st = time.time() idx = ex.ul[0] # Get the number of trees this worker needs to train. n_estimators = task_array[idx] X = X.glom() y = y.glom() rf = SKRF(n_estimators = n_estimators, criterion = criterion, max_depth = max_depth, n_jobs = 1, min_samples_split = min_samples_split, min_samples_leaf = min_samples_leaf, max_features = max_features, bootstrap = bootstrap) rf.fit(X, y) # Update the target array. target_array[idx, :] = (rf,) result = core.LocalKernelResult() result.result = None util.log_info("Finish construction : %s", time.time() - st) return result
def compile_parakeet_source(src): '''Compile source code defining a parakeet function.''' util.log_debug('Compiling parakeet source.') tmpfile = tempfile.NamedTemporaryFile(delete=True, prefix='spartan-local-', suffix='.py') tmpfile.write(src) tmpfile.flush() #util.log_info('File: %s, Source: \n %s \n', tmpfile.name, src) #os.rename(tmpfile.name, srcfile) #atexit.register(lambda: os.remove(srcfile)) try: module = imp.load_source('parakeet_temp', tmpfile.name) except Exception, ex: util.log_info('Failed to build parakeet wrapper') util.log_debug('Source was: %s', src) raise CodegenException(ex.message, ex.args)
def _initialize(self): '''Sends an initialization request to all workers and waits for their response. ''' util.log_info('Initializing...') req = core.InitializeReq( peers=dict([(id, w.addr()) for id, w in self._workers.iteritems()])) futures = rpc.FutureGroup() for id, w in self._workers.iteritems(): req.id = id futures.append(w.initialize(req)) futures.wait() self._ctx = blob_ctx.BlobCtx(blob_ctx.MASTER_ID, self._workers, self) self._initialized = True util.log_info('done...')
def benchmark_pagerank(ctx, timer): num_pages = PAGES_PER_WORKER * ctx.num_workers util.log_info('Total pages: %s', num_pages) wts = eager( expr.shuffle( expr.ndarray( (num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, )) p = eager(expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) for i in range(3): timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
def benchmark_pagerank(ctx, timer): num_pages = PAGES_PER_WORKER * ctx.num_workers util.log_info('Total pages: %s', num_pages) wts = eager( expr.shuffle( expr.ndarray((num_pages, num_pages), dtype=np.float32, tile_hint=(num_pages, PAGES_PER_WORKER / 8)), make_weights, )) p = eager( expr.ones((num_pages, 1), tile_hint=(PAGES_PER_WORKER / 8, 1), dtype=np.float32)) for i in range(3): timer.time_op('pagerank', lambda: expr.dot(wts, p).force())
def load_netflix_mapper(inputs, ex, load_file=None): # first column will load all of the data row_start, row_end = ex.ul[0], ex.lr[0] col_start, col_end = ex.ul[1], ex.lr[1] data = scipy.sparse.dok_matrix(ex.shape, dtype=np.float) zf = zipfile.ZipFile(load_file, 'r', allowZip64=True) for i in range(row_start, row_end): offset = i - row_start row_data = cPickle.loads(zf.read('%d' % (i + FILE_START))) filtered = row_data[row_data['userid'] > col_start] filtered = filtered[filtered['userid'] < col_end] for uid, rating in filtered: uid -= col_start data[(offset, uid)] = rating util.log_info('Loaded: %s', ex) yield ex, data.tocoo()
def register(self, req, handle): ''' RPC method. Register a new worker with the master. Args: req (RegisterReq): handle (PendingRequest): ''' id = len(self._workers) self._workers[id] = rpc.connect(req.host, req.port) self._available_workers.append(id) util.log_info('Registered %s:%s (%d/%d)', req.host, req.port, id, self.num_workers) handle.done(core.EmptyMessage()) self.init_worker_score(id, req.worker_status) if len(self._workers) == self.num_workers: threading.Thread(target=self._initialize).start()
def benchmark_matmul(ctx, timer): N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0)) # N = 4000 M = util.divup(N, ctx.num_workers) T = util.divup(N, math.sqrt(ctx.num_workers)) util.log_info("Testing with %d workers, N = %d, tile_size=%s", ctx.num_workers, N, T) # x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) # y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) # print expr.glom(expr.dot(x, y)) # print expr.dag(expr.dot(x, y)) def _step(): expr.evaluate(expr.dot(x, y)) timer.time_op("matmul", _step)
def pagerankDistributed(ctx, numPage, numIters, alpha): sGenerate = time.time() rank = eager(expr.ones((numPage, 1), tile_hint = (numPage / ctx.num_workers, 1), dtype = np.float32)) linkMatrix = eager( expr.shuffle( expr.ndarray( (numPage, numPage), dtype = np.float32, tile_hint = (numPage, numPage / ctx.num_workers)), make_weights, )) eGenerate = time.time() util.log_info("**pagerank** rank init finished") startCompute = time.time() for i in range(numIters): #rank = ((1 - alpha) * expr.dot(linkMatrix, rank,tile_hint = (numPage, numPage/10))) + belta rank = expr.dot(linkMatrix, rank, tile_hint = (numPage, numPage/10)) rank.evaluate() endCompute = time.time() util.log_info("**pagerank** compute finished") return (eGenerate - sGenerate, endCompute - startCompute)
def predict_price(ask, bid, t): # element-wise difference spread = ask - bid # element-wise average of ask and bid midprice = (ask + bid) / 2 # slices allow for cheaply extracting parts of an array d_spread = spread[t:] - spread[:-t] # find prices `t` steps in the future of d_spread d_spread = d_spread[:-t] future_price = midprice[2*t:] util.log_info('D: %s, M: %s', d_spread.shape, future_price.shape) # compute a univariate linear predictor regression = mean(future_price / d_spread) prediction = regression * d_spread error = mean(abs(prediction - future_price)) return error
def benchmark_matmul(ctx, timer): N = int(1000 * math.pow(ctx.num_workers, 1.0 / 3.0)) #N = 4000 M = util.divup(N, ctx.num_workers) T = util.divup(N, math.sqrt(ctx.num_workers)) util.log_info('Testing with %d workers, N = %d, tile_size=%s', ctx.num_workers, N, T) #x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) #y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(N, M))) x = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) y = expr.eager(expr.ones((N, N), dtype=np.double, tile_hint=(T, T))) #print expr.glom(expr.dot(x, y)) #print expr.dag(expr.dot(x, y)) def _step(): expr.evaluate(expr.dot(x, y)) timer.time_op('matmul', _step)
def test_convnet(ctx): hint = util.divup(64, sqrt(ctx.num_workers)) images = expr.eager(expr.ones((N_IMGS,) + IMG_SIZE, tile_hint=(N_IMGS, N_COLORS, hint, hint))) w1 = expr.eager(expr.ones((N_FILTERS, N_COLORS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv1 = stencil.stencil(images, w1, 2) pool1 = stencil.maxpool(conv1) w2 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv2 = stencil.stencil(pool1, w2, 2) pool2 = stencil.maxpool(conv2) w3 = expr.eager(expr.ones((N_FILTERS, N_FILTERS) + FILTER_SIZE, tile_hint=ONE_TILE)) conv3 = stencil.stencil(pool2, w3, 2) pool3 = stencil.maxpool(conv3) util.log_info(pool3.shape)
def _evaluate(self, ctx, deps): V, M, U = deps['V'], deps['M'], deps['U'] strata = _compute_strata(V) util.log_info('Start eval') for i, stratum in enumerate(strata): util.log_info('Processing stratum: %d of %d (size = %d)', i, len(strata), len(stratum)) #for ex in stratum: print ex worklist = set(stratum) expr.shuffle(V, sgd_netflix_mapper, kw={'V' : lazify(V), 'M' : lazify(M), 'U' : lazify(U), 'worklist' : worklist }).force() util.log_info('Eval done.')
def run(filename): signal.signal(signal.SIGQUIT, sig_handler) os.system('rm ./_worker_profiles/*') mod_name, _ = splitext(basename(filename)) module = imp.load_source(mod_name, filename) util.log_info('Running benchmarks for module: %s (%s)', module, filename) benchmarks = [ k for k in dir(module) if (k.startswith('benchmark_') and isinstance(getattr(module, k), types.FunctionType)) ] spartan.config.parse(sys.argv) if benchmarks: # csv header print 'num_workers,bench,time' workers = [int(w) for w in FLAGS.worker_list.split(',')] for i in workers: # restart the cluster FLAGS.num_workers = i ctx = spartan.initialize() timer = BenchTimer(i) util.log_info('Running benchmarks on %d workers', i) if FLAGS.test_optimizations: timer.prefix = 'opt_enabled' FLAGS.optimization = 1 run_benchmarks(module, benchmarks, ctx, timer) timer.prefix = 'opt_disabled' FLAGS.optimization = 1 run_benchmarks(module, benchmarks, ctx, timer) spartan.shutdown() time.sleep(1) if FLAGS.profile_worker: util.log_info('Writing worker profiles...') join_profiles('./_worker_profiles')
def run(filename): signal.signal(signal.SIGQUIT, sig_handler) os.system('rm ./_worker_profiles/*') mod_name, _ = splitext(basename(filename)) module = imp.load_source(mod_name, filename) util.log_info('Running benchmarks for module: %s (%s)', module, filename) benchmarks = [k for k in dir(module) if ( k.startswith('benchmark_') and isinstance(getattr(module, k), types.FunctionType)) ] spartan.config.parse(sys.argv) if benchmarks: # csv header print 'num_workers,bench,time' workers = [int(w) for w in FLAGS.worker_list.split(',')] for i in workers: # restart the cluster FLAGS.num_workers = i ctx = spartan.initialize() timer = BenchTimer(i) util.log_info('Running benchmarks on %d workers', i) if FLAGS.test_optimizations: timer.prefix = 'opt_enabled' FLAGS.optimization = 1 run_benchmarks(module, benchmarks, ctx, timer) timer.prefix = 'opt_disabled' FLAGS.optimization = 0 run_benchmarks(module, benchmarks, ctx, timer) spartan.shutdown() time.sleep(1) if FLAGS.profile_worker: util.log_info('Writing worker profiles...') join_profiles('./_worker_profiles')
def _evaluate(self, ctx, deps): V, M, U = deps['V'], deps['M'], deps['U'] strata = _compute_strata(V) util.log_info('Start eval') for i, stratum in enumerate(strata): util.log_info('Processing stratum: %d of %d (size = %d)', i, len(strata), len(stratum)) #for ex in stratum: print ex worklist = set(stratum) expr.shuffle(V, sgd_netflix_mapper, kw={ 'V': lazify(V), 'M': lazify(M), 'U': lazify(U), 'worklist': worklist }).evaluate() util.log_info('Eval done.')
def start_remote_worker(worker, st, ed): ''' Start processes on a worker machine. The machine will launch worker processes ``st`` through ``ed``. :param worker: hostname to connect to. :param st: First process index to start. :param ed: Last process to start. ''' if FLAGS.use_threads and worker == 'localhost': util.log_info('Using threads.') for i in range(st, ed): p = threading.Thread(target=spartan.worker._start_worker, args=((socket.gethostname(), FLAGS.port_base), i)) p.daemon = True p.start() time.sleep(0.1) return util.log_info('Starting worker %d:%d on host %s', st, ed, worker) if FLAGS.oprofile: os.system('mkdir operf.%s' % worker) ssh_args = ['ssh', '-oForwardX11=no', worker] args = ['cd %s && ' % os.path.abspath(os.path.curdir)] if FLAGS.xterm: args += [ 'xterm', '-e', ] if FLAGS.oprofile: args += [ 'operf -e CPU_CLK_UNHALTED:100000000', '-g', '-d', 'operf.%s' % worker ] args += [ #'gdb', '-ex', 'run', '--args', 'python', '-m spartan.worker', '--master=%s:%d' % (socket.gethostname(), FLAGS.port_base), '--count=%d' % (ed - st), '--heartbeat_interval=%d' % FLAGS.heartbeat_interval ] # add flags from config/user for (name, value) in FLAGS: if name in ['worker_list', 'print_options']: continue args += [repr(value)] #print >>sys.stderr, args util.log_debug('Running worker %s', ' '.join(args)) time.sleep(0.1) if worker != 'localhost': p = subprocess.Popen(ssh_args + args, executable='ssh') else: p = subprocess.Popen(' '.join(args), shell=True, stdin=subprocess.PIPE) return p
def add_one_extent(v, ex): result = v.fetch(ex) + 1 util.log_info('AddOne: %s, %s', ex, result) yield (ex, result)
def fuzzy_kmeans(points, k=10, num_iter=10, m=2.0, centers=None): ''' clustering data points using fuzzy kmeans clustering method. Args: points(Expr or DistArray): the input data points matrix. k(int): the number of clusters. num_iter(int): the max iterations to run. m(float): the parameter of fuzzy kmeans. centers(Expr or DistArray): the initialized centers of each cluster. ''' points = expr.force(points) num_dim = points.shape[1] if centers is None: centers = expr.rand(k, num_dim) labels = expr.zeros((points.shape[0], ), dtype=np.int) for iter in range(num_iter): centers = expr.as_array(centers) points_broadcast = expr.reshape(points, (points.shape[0], 1, points.shape[1])) centers_broadcast = expr.reshape( centers, (1, centers.shape[0], centers.shape[1])) distances = expr.sum(expr.square(points_broadcast - centers_broadcast), axis=2) # This is used to avoid dividing zero distances = distances + 0.00000000001 util.log_info('distances shape %s' % str(distances.shape)) distances_broadcast = expr.reshape( distances, (distances.shape[0], 1, distances.shape[1])) distances_broadcast2 = expr.reshape( distances, (distances.shape[0], distances.shape[1], 1)) prob = 1.0 / expr.sum(expr.power( distances_broadcast / distances_broadcast2, 2.0 / (m - 1)), axis=2) prob.force() counts = expr.sum(prob, axis=0) counts = expr.reshape(counts, (counts.shape[0], 1)) labels = expr.argmax(prob, axis=1) centers = expr.sum( expr.reshape(points, (points.shape[0], 1, points.shape[1])) * expr.reshape(prob, (prob.shape[0], prob.shape[1], 1)), axis=0) # We assume that the size of centers are relative small that can be handled # on the master. counts = counts.glom() centers = centers.glom() # If any centroids don't have any points assigned to them. zcount_indices = (counts == 0).reshape(k) if np.any(zcount_indices): # One or more centroids may not have any points assigned to them, which results in their # position being the zero-vector. We reseed these centroids with new random values # and set their counts to 1 in order to get rid of dividing by zero. counts[zcount_indices, :] = 1 centers[zcount_indices, :] = np.random.rand( np.count_nonzero(zcount_indices), num_dim) centers = centers / counts return labels
def profile2(self): self.create_path() t1 = expr.sparse_rand((10000, 10000)).evaluate() time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, False)) util.log_info('Save a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, False).evaluate()) util.log_info('Load a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.save(t1, "fiotest3", self.test_dir, True)) util.log_info('Save a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.load("fiotest3", self.test_dir, True).evaluate()) util.log_info('Load a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, False)) util.log_info('Pickle a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, False).evaluate()) util.log_info('Unpickle a %s sparse array in %s without zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.pickle(t1, "fiotest4", self.test_dir, True)) util.log_info('Pickle a %s sparse array in %s with zip', t1.shape, time_a) time_a, a = util.timeit(lambda: expr.unpickle("fiotest4", self.test_dir, True).evaluate()) util.log_info('Unpickle a %s sparse array in %s with zip', t1.shape, time_a)
def test_find_change(self): arr = expr.randn(100) movers = finance.find_change(arr) #util.log_info(optimize(movers)) util.log_info(movers.glom())
def test_put(self): put, call = finance.black_scholes(self.current, self.strike, maturity, rate, volatility) #util.log_info(put) #util.log_info(optimize(put)) util.log_info(put.glom())
def test_call(self): put, call = finance.black_scholes(self.current, self.strike, maturity, rate, volatility) #util.log_info(call) util.log_info(call.glom())
def solve(A, AT, desired_rank, is_symmetric=False): ''' A simple implementation of the Lanczos algorithm (http://en.wikipedia.org/wiki/Lanczos_algorithm) for eigenvalue computation. Like the Mahout implementation, only the matrix*vector step is parallelized. First we use lanczos method to turn the matrix into tridiagonoal form. Then we use numpy.linalg.eig function to extract the eigenvalues and eigenvectors from the tridiagnonal matrix(desired_rank*desired_rank). Since desired_rank should be smaller than the size of matrix, so we could it in local machine efficiently. ''' # Calculate two more eigenvalues, but we only keep the largest desired_rank # one. Doing this to keep the result consistent with scipy.sparse.linalg.svds. desired_rank += 2 n = A.shape[1] v_next = np.ones(n) / np.sqrt(n) v_prev = np.zeros(n) beta = np.zeros(desired_rank+1) beta[0] = 0 alpha = np.zeros(desired_rank) # Since the disiredRank << size of matrix, so we keep # V in local memory for efficiency reason(It needs to be updated # for every iteration). # If the case which V can't be fit in local memory occurs, # you could turn it into spartan distributed array. V = np.zeros((n, desired_rank)) for i in range(0, desired_rank): util.log_info("Iter : %s", i) v_next_expr = expr.from_numpy(v_next.reshape(n, 1)) if is_symmetric: w = expr.dot(A, v_next_expr).optimized().glom().reshape(n) else: w = expr.dot(A, v_next_expr) w = expr.dot(AT, w).optimized().glom().reshape(n) alpha[i] = np.dot(w, v_next) w = w - alpha[i] * v_next - beta[i] * v_prev # Orthogonalize: for t in range(i): tmpa = np.dot(w, V[:, t]) if tmpa == 0.0: continue w -= tmpa * V[:, t] beta[i+1] = np.linalg.norm(w, 2) v_prev = v_next v_next = w / beta[i+1] V[:, i] = v_prev # Create tridiag matrix with size (desired_rank X desired_rank) tridiag = np.diag(alpha) for i in range(0, desired_rank-1): tridiag[i, i+1] = beta[i+1] tridiag[i+1, i] = beta[i+1] # Get eigenvectors and eigenvalues of this tridiagonal matrix. # The eigenvalues of this tridiagnoal matrix equals to the eigenvalues # of matrix dot(A, A.T.). We can get the eigenvectors of dot(A, A.T) # by multiplying V with eigenvectors of this tridiagonal matrix. d, v = np.linalg.eig(tridiag) # Sort eigenvalues and their corresponding eigenvectors sorted_idx = np.argsort(np.absolute(d))[::-1] d = d[sorted_idx] v = v[:, sorted_idx] # Get the eigenvetors of dot(A, A.T) s = np.dot(V, v) return d[0:desired_rank-2], s[:, 0:desired_rank-2]
def test_local_offset(): a = extent.create((0, 0), (5, 5), None) b = extent.create((2, 2), (3, 3), None) util.log_info('%s', extent.offset_from(a, b))