def _convert_df_to_output_type(df, input_type): """ Given a cudf.DataFrame df, convert it to a new type appropriate for the graph algos in this module, based on input_type. """ if input_type in [Graph, DiGraph]: return df elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): return df.to_pandas() elif (cp is not None) and \ (input_type in [cp_coo_matrix, cp_csr_matrix, cp_csc_matrix]): # A CuPy/SciPy input means the return value will be a 2-tuple of: # distance: cupy.ndarray # predecessor: cupy.ndarray sorted_df = df.sort_values("vertex") distances = cp.fromDlpack(sorted_df["distance"].to_dlpack()) preds = cp.fromDlpack(sorted_df["predecessor"].to_dlpack()) if "sp_counter" in df.columns: return (distances, preds, cp.fromDlpack(sorted_df["sp_counter"].to_dlpack())) else: return (distances, preds) else: raise TypeError(f"input type {input_type} is not a supported type.")
def show_manhattan_plot(df, group_by, x_axis, y_axis): chroms = df[group_by].unique().to_array() manhattan_fig = figure() start_position = -0.5 for chrom in chroms: query = '%s == %s' % (group_by, chrom) cdf = df.query(query) x_array = cupy.fromDlpack(cdf[x_axis].to_dlpack()) + start_position y_array = cupy.fromDlpack(cdf[y_axis].to_dlpack()) manhattan_fig.circle(x_array.get(), y_array.get(), size=2, color='orange' if (start_position - 0.5) % 2 == 0 else 'gray', alpha=0.5) start_position += 1 manhattan_handle = show(manhattan_fig, notebook_handle=True) push_notebook(handle=manhattan_handle) return manhattan_fig
def test_multiple_consumption_error(self): # Prevent segfault, see #3611 array = cupy.empty(10) tensor = array.toDlpack() array2 = cupy.fromDlpack(tensor) # noqa with pytest.raises(ValueError) as e: array3 = cupy.fromDlpack(tensor) # noqa assert 'consumed multiple times' in str(e.value)
def cupy_adapter_sync(fun, in1, in2): with cupy_stream: tin1 = [cupy.fromDlpack(dltensor) for dltensor in in1] tin2 = [cupy.fromDlpack(dltensor) for dltensor in in2] tout1, tout2 = fun(tin1, tin2) out1, out2 = [tout.toDlpack() for tout in tout1], \ [tout.toDlpack() for tout in tout2] cupy_stream.synchronize() return out1, out2
def getConnectionMatrix(self) -> csr_matrix: distances = cupy.ravel(cupy.fromDlpack(self.D.to_dlpack())) indices = cupy.ravel(cupy.fromDlpack(self.I.to_dlpack())) n_samples = indices.shape[0] n_nonzero = n_samples * self.nneighbors rowptr = cupy.arange(0, n_nonzero + 1, self.nneighbors) knn_graph = cupyx.scipy.sparse.csr_matrix((distances, indices, rowptr), shape=(n_samples, n_samples)) print(f"Completed KNN, sparse graph shape = {knn_graph.shape}") return knn_graph
def _train(self, train_dataloader, validation_dataloader, model, epochs): model.train() # Enable training mode for _ in trange(epochs, desc="Epoch"): tr_loss = 0 # Tracking variables nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(train_dataloader): batch = tuple(t.to(self._device) for t in batch) # Add batch to GPU b_input_ids, b_input_mask, b_labels = batch # Unpack the inputs from dataloader self._optimizer.zero_grad() # Clear out the gradients loss = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask, labels=b_labels)[0] # forwardpass loss.sum().backward() self._optimizer.step() # update parameters tr_loss += loss.sum().item() # get a numeric value nb_tr_examples += b_input_ids.size(0) nb_tr_steps += 1 print("Train loss: {}".format(tr_loss / nb_tr_steps)) model.eval( ) # Put model in evaluation mode to evaluate loss on the validation set eval_accuracy = 0 nb_eval_steps = 0 for batch in validation_dataloader: batch = tuple(t.to(self._device) for t in batch) b_input_ids, b_input_mask, b_labels = batch with torch.no_grad( ): # Telling the model not to compute or store gradients, saving memory and speeding up validation logits = model( b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[ 0] # Forward pass, calculate logit predictions logits = cupy.fromDlpack(to_dlpack(logits)) label_ids = cupy.fromDlpack(to_dlpack(b_labels)) # logits = logits.detach().cpu().numpy() # label_ids = b_labels.to('cpu').numpy() temp_eval_accuracy = self._flatten_accuracy(logits, label_ids) eval_accuracy += temp_eval_accuracy nb_eval_steps += 1 print("Validation Accuracy: {}".format(eval_accuracy / nb_eval_steps)) return model
def evaluate_model(self, test_data, labels, max_seq_len=128, batch_size=32): """ Evaluate trained model :param test_data: test data to evaluate model :type test_data: cudf.Series :param labels: labels for each element in test_data :type labels: cudf.Series :param max_seq_len: Limits the length of the sequence returned by tokenizer. If tokenized sentence is shorter than max_seq_len, output will be padded with 0s. If the tokenized sentence is longer than max_seq_len it will be truncated to max_seq_len. :type max_seq_len: int :param batch_size: batch size :type batch_size: int Examples -------- >>> from cuml.preprocessing.model_selection import train_test_split >>> emails_train, emails_test, labels_train, labels_test = train_test_split(train_emails_df, 'label', train_size=0.8) >>> sc.evaluate_model(emails_test, labels_test) """ self._model.eval() test_gdf = cudf.DataFrame() test_gdf["text"] = test_data test_gdf["label"] = labels test_dataset = Dataset(test_gdf) test_dataloader = DataLoader(test_dataset, batchsize=batch_size) eval_accuracy = 0 nb_eval_steps = 0 for df in test_dataloader.get_chunks(): b_input_ids, b_input_mask = self._bert_uncased_tokenize( df["text"], max_seq_len) b_labels = torch.tensor(df["label"].to_numpy()) with torch.no_grad(): logits = self._model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)[0] logits = logits.type(torch.DoubleTensor).to(self._device) logits = cupy.fromDlpack(to_dlpack(logits)) label_ids = b_labels.type(torch.IntTensor).to(self._device) label_ids = cupy.fromDlpack(to_dlpack(label_ids)) temp_eval_accuracy = self._flatten_accuracy(logits, label_ids) eval_accuracy += temp_eval_accuracy nb_eval_steps += 1 accuracy = eval_accuracy / nb_eval_steps return float(accuracy)
def __call__(self, loc, fg_score, anchor, img_size, scale=1.): """ Arg: - loc: (N,4) - fg_score: (N,) - anchor: (9, 4) - img_size: (2) """ if self.parent_model.training: n_pre_nms = self.n_train_pre_nms n_post_nms = self.n_train_post_nms else: n_pre_nms = self.n_test_pre_nms n_post_nms = self.n_test_post_nms loc = cp.fromDlpack(to_dlpack(loc)) fg_score = cp.fromDlpack(to_dlpack(fg_score)) anchor = cp.asarray(anchor) roi = loc2bbox(anchor, loc) # clip roi[:, slice(0, 4, 2)] = cp.clip(roi[:, slice(0, 4, 2)], 0, img_size[1]) roi[:, slice(1, 4, 2)] = cp.clip(roi[:, slice(1, 4, 2)], 0, img_size[0]) # remove small box less than threshold min_size = self.min_size * scale hs = roi[:, 3] - roi[:, 1] ws = roi[:, 2] - roi[:, 0] keep = cp.where((hs > min_size) & (ws > min_size))[0] roi = roi[keep, :] fg_score = fg_score[keep] # sort the score order = cp.argsort(fg_score.ravel())[::-1] if n_pre_nms > 0: order = order[0:n_pre_nms] roi = roi[order, :] keep = non_maximum_suppression(cp.ascontiguousarray(cp.asarray(roi)), thresh=self.nms_thresh) if n_post_nms > 0: keep = keep[:n_post_nms] roi = roi[keep] return roi
def update_W(W, a_last, z, u, rho): size = dist.get_world_size() rank = dist.get_rank() # convert to pytorch data #update W temp1 = z + u/rho temp1 = from_dlpack(ndar.toDlpack(temp1)) a_last = from_dlpack(ndar.toDlpack(a_last)) data1 = torch.mm(temp1, torch.t(a_last)) data2 = torch.mm(a_last, torch.t(a_last)) data = torch.cat((data1, data2), 0) # data = comm.reduce(data, op=MPI.SUM, root=0) dist.reduce(data, dst=0, op=dist.ReduceOp.SUM) if rank == 0: middle_pos = data1.shape[0] data1 = data[0: middle_pos] data2 = data[middle_pos:] inverse_data = torch.pinverse(data2) W = torch.mm(data1, inverse_data) else: W = from_dlpack(ndar.toDlpack(W)) # W = None dist.broadcast(W, src=0) # convert to cupy data W = fromDlpack(to_dlpack(W)) return W
def calculate(self, **kwargs): smiles_dataset = kwargs['smiles_dataset'] fingerprint_dataset = kwargs['fingerprint_dataset'] properties = kwargs['properties'] estimator = kwargs['estimator'] param_dict = kwargs['param_dict'] embeddings = self.sample_many(smiles_dataset, zero_padded_vals=False, average_tokens=True) embeddings = cupy.asarray(embeddings, dtype=cupy.float32) fingerprints = cupy.fromDlpack(fingerprint_dataset.data.to_dlpack()) fingerprints = cupy.asarray(fingerprints, order='C', dtype=cupy.float32) metric, fingerprint_errors, embedding_errors = self._calculate_metric( embeddings, fingerprints, properties, estimator, param_dict) logger.info( f'{type(metric)} {type(fingerprint_errors)} {type(embedding_errors)}' ) metric = cupy.nanmean(metric) fingerprint_errors = cupy.nanmean(fingerprint_errors) embedding_errors = cupy.nanmean(embedding_errors) return pd.Series({ 'name': self.name, 'value': metric, 'fingerprint_error': fingerprint_errors, 'embedding_error': embedding_errors })
def convert_to_cupy_array(input_data): """Convert Tensor data to a CuPy Array. This method converts input tensor data to a cupy array. Parameters ---------- input_data : torch.Tensor Input Tensor to be converted Returns ------- cupy.ndarray The tensor data as a CuPy array Raises ------ ImportError If Torch package not found """ if not import_torch: raise ImportError( 'Required version of Torch package not found' + 'see documentation for details: https://cea-cosmic.' + 'github.io/ModOpt/#optional-packages', ) if input_data.is_cuda: return cp.fromDlpack(torch_to_dlpack(input_data)) return input_data.detach().numpy()
def copy_tensor(dst_tensor, src_tensor): """Copy the content from src_tensor to dst_tensor. Args: dst_tensor: the tensor to copy from. src_tensor: the tensor to copy to. Returns: None """ copied = True if isinstance(dst_tensor, cupy.ndarray) \ and isinstance(src_tensor, cupy.ndarray): cupy.copyto(dst_tensor, src_tensor) elif torch_available(): if isinstance(dst_tensor, torch.Tensor) and isinstance( src_tensor, torch.Tensor): dst_tensor.copy_(src_tensor) elif isinstance(dst_tensor, torch.Tensor) and isinstance( src_tensor, cupy.ndarray): t = torch.utils.dlpack.from_dlpack(src_tensor.toDlpack()) dst_tensor.copy_(t) elif isinstance(dst_tensor, cupy.ndarray) and isinstance( src_tensor, torch.Tensor): t = cupy.fromDlpack(torch.utils.dlpack.to_dlpack(src_tensor)) cupy.copyto(dst_tensor, t) else: copied = False else: copied = False if not copied: raise ValueError( "Unsupported tensor type. Got: {} and {}. Supported " "GPU tensor types are: torch.Tensor, cupy.ndarray.".format( type(dst_tensor), type(src_tensor)))
def show_qq_plot(df, x_axis, y_axis): x_values = cupy.fromDlpack(df[x_axis].to_dlpack()) y_values = cupy.fromDlpack(df[y_axis].to_dlpack()) x_max = cupy.max(x_values).tolist() y_max = cupy.max(y_values).tolist() qq_fig = figure(x_range=(0, x_max), y_range=(0, y_max)) qq_fig.circle(-cupy.log10(x_values + 1e-10).get(), -cupy.log10(y_values).get()) qq_fig.line([0, x_max], [0, y_max]) qq_handle = show(qq_fig, notebook_handle=True) push_notebook(handle=qq_handle) return qq_fig
def torch2cupy(tensor): """ :param tensor: PyTorch CUDA tensor. :return: CuPy tensor. """ dx = to_dlpack(tensor) return cupy.fromDlpack(dx)
def get_embedding(self, text, show_tokens=False): bert_tokens = self.tokenizer.tokenize(text) ids = self.tokenizer.convert_tokens_to_ids( ['[CLS]'] + bert_tokens[:(self.window_size - 2)] + ['[SEP]']) tokens_tensor = torch.tensor(ids).reshape(1, -1).to(self.DEVICE) self.model.eval() with torch.no_grad(): all_encoder_laylers, _ = self.model(tokens_tensor) embedding = all_encoder_laylers[0] if available: xp_embedding = xp.fromDlpack(to_dlpack(embedding)) else: xp_embedding = embedding.numpy() if xp_embedding.shape[0] < self.window_size: xp_embedding = xp.concatenate([ xp_embedding, xp.zeros(((self.window_size - xp_embedding.shape[0]), 768)) ], 0) if show_tokens: return (xp_embedding, ['[CLS]'] + bert_tokens[:(self.window_size - 2)] + ['[SEP]']) else: return xp_embedding
def forward(ctx, x, forw, adj, pylops, device): ctx.forw = forw ctx.adj = adj ctx.pylops = pylops ctx.device = device # prepare input if ctx.pylops: if ctx.device == 'cpu': # bring x to cpu and numpy x = x.cpu().detach().numpy() else: # pass x to cupy using DLPack x = cp.fromDlpack(to_dlpack(x)) # apply forward operator y = ctx.forw(x) # prepare output if ctx.pylops: if ctx.device == 'cpu': # move y to torch and device y = torch.from_numpy(y) else: # move y to torch and device y = from_dlpack(y.toDlpack()) return y
def afterOptimizerStep(self, retrievedPosIndexes, retrievedNegIndexes=None): reshapedRetrieval = self._getReshapedRetrieval(retrievedPosIndexes, retrievedNegIndexes) self.CUPYmemmap[reshapedRetrieval] = (cupy.fromDlpack( to_dlpack(self.model_variable.weight.data)))
def correlate(ip, weight, padding, stride, kernel_size, op_size=None, flip=False, dim_switch=False): if flip: w_transform = torch.flip(weight, torch.arange(weight.dim()).tolist()) if dim_switch: ip = torch.transpose(ip, 1, 0) # calculate padding tmp = ((op_size - 1) * stride + kernel_size - ip.size(3)) padding = math.floor(max(tmp, 0) / 2) if flip or dim_switch: weight = torch.transpose(weight, 1, 0) o_channels = weight.size(0) ip_unfold = torch.nn.functional.unfold(ip, (kernel_size, kernel_size), padding=padding, stride=stride) w_unfold = weight.contiguous().view(o_channels, -1) # print("op_size :", op_size) # print("stride : ", stride) # print("padding :", padding) # print("kernel_size :", kernel_size) # print("o_channels :", o_channels) # print("input :", ip.size()) # print("input_unfold :", ip_unfold.size()) # print("weight :", weight.size()) # print("weight_unfold :", w_unfold.size()) # print() # cupy conversion ip_cupy = cp.fromDlpack(to_dlpack(ip_unfold)).astype('int8') w_cupy = cp.fromDlpack(to_dlpack(w_unfold)).astype('int8') # multiplication in cupy op = cp.matmul(cp.transpose(ip_cupy, (0, 2, 1)), cp.transpose(w_cupy)) op = cp.transpose(op, (0, 2, 1)) return op
def afterOptimizerStep(self,retrievedPosIndexes , retrievedNegIndexes = None): torch.cuda.synchronize() cupy.cuda.Device().synchronize() reshapedRetrieval = self._getReshapedRetrieval( retrievedPosIndexes, retrievedNegIndexes ) self.CUPYcorpus[ reshapedRetrieval ] = ( cupy.fromDlpack( to_dlpack( self.model_variable.weight.data ) ) )
def generate_z(self, memory, x, T, outputSize): batchSize = x.size(0) z_size = int(outputSize//4) + 1 z = torch.zeros((batchSize,z_size), dtype=torch.float32).cuda() x1 = to_dlpack(x) z1 = to_dlpack(z) memory1 = to_dlpack(memory) c_x = cp.fromDlpack(x1) #.astype(cp.float32) c_z = cp.fromDlpack(z1) c_mem = cp.fromDlpack(memory1) #.astype(cp.float32) kernel = self.cuda_kernel_UFL() kernel((int(z_size//64) + 1, (batchSize // 16) + 1), (64,16), (c_x, c_mem, T, c_z, int(z_size), int(batchSize), int(outputSize))) return z.sum(dim=-1) #.double()
def cuda_correlate(ip, weight, padding, stride, kernel_size, corner_case=False, op_channels=None, dilation=1) : if corner_case: w_transform = torch.flip(weight, torch.arange(weight.dim()).tolist()) o_channels = weight.size(0) ip_unfold = torch.nn.functional.unfold(ip, (kernel_size, kernel_size), padding=padding, stride=stride) w_unfold = weight.view(o_channels, -1) # setup output batch_size, channels, height, width = ip.size() kernel_h, kernel_w = weight.size()[2:] output_h = int((height + 2 * padding - (dilation * (kernel_h - 1) + 1)) / stride + 1) output_w = int((width + 2 * padding - (dilation * (kernel_w - 1) + 1)) / stride + 1) output = ip.new(batch_size, weight.size(0), output_h, output_w) op_unfold = output.view(ip.size(0), weight.size(0), output_h*output_w).byte() # cupy conversion ip_cupy = cp.fromDlpack(to_dlpack(ip_unfold)).astype('int8') w_cupy = cp.fromDlpack(to_dlpack(w_unfold)).astype('int8') op_cupy = cp.fromDlpack(to_dlpack(op_unfold)).astype('int8') # need to pretend like these are transposed, so swapping the values for m, n and k # should be: m = ip(1), n = ip(2) k = w(1) m = ip_unfold.size(2) n = ip_unfold.size(1) k = w_unfold.size(0) # set these up properly to make this work blockSize = 16 batchNum = ip_unfold.size(0) dimBlock = (blockSize, blockSize, 1) dimGrid = (int((k + blockSize - 1)/ blockSize), int((m + blockSize - 1)/ blockSize), 1) # print("batchNum: ", batchNum, "dimBlock:", dimBlock, "dimGrid:", dimGrid) f = load_kernel('gpu_matrix_mult', matmul_kernel) f(block=dimBlock, grid=dimGrid, args=[ip_cupy.data.ptr, w_cupy.data.ptr, op_cupy.data.ptr, m, n, k], stream=Stream(ptr=torch.cuda.current_stream().cuda_stream)) # multiplication in cupy op = cp.matmul(cp.transpose(ip_cupy, (0,2,1)), cp.transpose(w_cupy)) op = cp.transpose(op, (0,2,1)) return op
def afterOptimizerStep(self, retrievedPosIndexes , retrievedNegIndexes = None): torch.cuda.synchronize() cupy.cuda.Device().synchronize() reshapedRetrieval = self._getReshapedRetrieval( retrievedPosIndexes, retrievedNegIndexes ) for idx, optVar in enumerate(self.optVarList): self.CUPYcorpi[idx][ reshapedRetrieval ] = ( cupy.fromDlpack( to_dlpack( self.given_optimizer.state_dict()['state'][ self.optimizerKey ][optVar] ) ) )
def decompress(self, tensor_compressed, shape): tensor_compressed, = tensor_compressed cupy_tensor = cupy.fromDlpack(to_dlpack(tensor_compressed)) sign = cupy_tensor > 127 exps = cupy.bitwise_and(cupy_tensor, 0b01111111) floats = cupy.left_shift((exps + 18).astype(cupy.int32), 23).view(cupy.float32) tensor_decompressed = cupy.where(sign, -floats, floats) tensor_decompressed = cupy.multiply((exps >= 1).astype(cupy.float32), tensor_decompressed) return from_dlpack(tensor_decompressed.toDlpack()).view(shape)
def com_reduce(buffer_m: torch.tensor, rank, world_size, comm): tensor_size = torch.numel(buffer_m) chunk_size = (tensor_size + world_size - 1) // world_size last_chunk_size = tensor_size - chunk_size * (world_size - 1) my_chunk_size = last_chunk_size if rank == world_size - 1 else chunk_size flatten_buffer_m = buffer_m.flatten() flatten_buffer_m_cupy = cupy.fromDlpack(to_dlpack(flatten_buffer_m)) # First round of communication recvbuf = cupy.zeros([world_size, my_chunk_size], dtype=flatten_buffer_m_cupy.dtype) requests = [] for idx in range(world_size): start = idx * chunk_size length = last_chunk_size if idx == world_size - 1 else chunk_size req_sign = comm.Igather(flatten_buffer_m_cupy[start:start + length], recvbuf, root=idx) requests.append(req_sign) MPI.Request.Waitall(requests) # Second round of communication recvbuf_flatten = recvbuf.flatten() local_reduced_chunk = cupy.zeros(my_chunk_size, dtype=flatten_buffer_m_cupy.dtype) _avg_chunks(recvbuf_flatten, my_chunk_size, world_size, local_reduced_chunk) recvbuf_server = [ cupy.zeros(chunk_size, dtype=flatten_buffer_m_cupy.dtype) ] * (world_size - 1) recvbuf_server.append( cupy.zeros(last_chunk_size, dtype=flatten_buffer_m_cupy.dtype)) recvbuf_server[rank] = local_reduced_chunk server_requests = [] for idx in range(world_size): if idx != rank: req_server_send = comm.Isend(local_reduced_chunk, idx) req_server_recv = comm.Irecv(recvbuf_server[idx], idx) server_requests.append(req_server_send) server_requests.append(req_server_recv) MPI.Request.Waitall(server_requests) recvbuf_server_flatten = cupy.concatenate(recvbuf_server) aggregated_m_tensor = from_dlpack(recvbuf_server_flatten.toDlpack()) buffer_m.set_(aggregated_m_tensor.type(buffer_m.dtype).view_as(buffer_m))
def torch_to_xp(input: torch.Tensor ) -> np.ndarray: # torch Tensor to numpy/cupy ndarray if not torch.is_tensor(input): raise RuntimeError(f'torch_to_numpy expects torch.Tensor as input, but got {type(input)}') if IS_CUPY_AVAILABLE and input.is_cuda: return cupy.fromDlpack(to_dlpack(input)) else: return input.numpy()
def afterOptimizerStep(self, retrievedPosIndexes, retrievedNegIndexes=None): reshapedRetrieval = self._getReshapedRetrieval(retrievedPosIndexes, retrievedNegIndexes) for idx, optVar in enumerate(self.optVarList): self.CUPYmemmap[idx][reshapedRetrieval] = (cupy.fromDlpack( to_dlpack(self.given_optimizer.state_dict()['state'][ self.optimizerKey][optVar])))
def test_to_dlpack_mixed_dtypes(): df = cudf.DataFrame({"a": [1, 2, 3, 4], "b": [10.32, 0.4, -0.2, -1000.32]}) cudf_host_array = df.to_numpy() dlt = df.to_dlpack() cupy_array = cupy.fromDlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array)
def test_to_dlpack_cupy_1d(data_1d): expectation = data_size_expectation_builder(data_1d, False) with expectation: gs = cudf.Series(data_1d, nan_as_null=False) cudf_host_array = gs.to_numpy(na_value=np.nan) dlt = gs.to_dlpack() cupy_array = cupy.fromDlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array)
def test_to_dlpack_cupy_1d(data_1d): expectation = data_size_expectation_builder(data_1d, False) with expectation: gs = cudf.Series(data_1d, nan_as_null=False) cudf_host_array = gs.to_array(fillna="pandas") dlt = gs._column.to_dlpack() cupy_array = cupy.fromDlpack(dlt) cupy_host_array = cupy_array.get() assert_eq(cudf_host_array, cupy_host_array)
def content(self, input, gan_out): input = cp.fromDlpack(to_dlpack(input)) gan_out = cp.fromDlpack(to_dlpack(gan_out)) output = [] for x_it, gan_out_it in zip(input, gan_out): ch, w, h = x_it.shape gan_out_it = gan_out_it.reshape((ch, -1)) x_it = x_it.reshape((ch, -1)) x_it = cp.concatenate((x_it, cp.ones((1, x_it.shape[1]))), axis=0) gan_out_it = cp.asarray(gan_out_it) x_it_inv = cp.linalg.pinv(x_it) weight = cp.dot(gan_out_it, x_it_inv) if (abs(weight[:, 3]).mean() > self.max_bias or abs(weight[:3, :3]).mean() < self.min_weight ) and self.last_weight is not None: print(abs(weight[:, 3]).mean(), abs(weight[:3, :3]).mean()) weight = self.last_weight.copy() else: self.last_weight = weight.copy() output.append(cp.dot(weight, x_it).reshape((ch, w, h))) return from_dlpack(cp.stack(output).toDlpack()).float()
def torch2xp(torch_tensor): if torch_tensor.is_cuda: return cupy.fromDlpack(torch.utils.dlpack.to_dlpack(torch_tensor)) else: return torch_tensor.detach().numpy()