def generate_chunk(i_chunk, local_size, num_chunks, chunk_type, frac_match): cupy.random.seed(42) if chunk_type == "build": # Build dataframe # # "key" column is a unique sample within [0, local_size * num_chunks) # # "shuffle" column is a random selection of partitions (used for shuffle) # # "payload" column is a random permutation of the chunk_size start = local_size * i_chunk stop = start + local_size parts_array = cupy.arange(num_chunks, dtype="int64") df = cudf.DataFrame( { "key": cupy.arange(start, stop=stop, dtype="int64"), "payload": cupy.arange(local_size, dtype="int64"), } ) else: # Other dataframe # # "key" column matches values from the build dataframe # for a fraction (`frac_match`) of the entries. The matching # entries are perfectly balanced across each partition of the # "base" dataframe. # # "payload" column is a random permutation of the chunk_size # Step 1. Choose values that DO match sub_local_size = local_size // num_chunks sub_local_size_use = max(int(sub_local_size * frac_match), 1) arrays = [] for i in range(num_chunks): bgn = (local_size * i) + (sub_local_size * i_chunk) end = bgn + sub_local_size ar = cupy.arange(bgn, stop=end, dtype="int64") arrays.append(cupy.random.permutation(ar)[:sub_local_size_use]) key_array_match = cupy.concatenate(tuple(arrays), axis=0) # Step 2. Add values that DON'T match missing_size = local_size - key_array_match.shape[0] start = local_size * num_chunks + local_size * i_chunk stop = start + missing_size key_array_no_match = cupy.arange(start, stop=stop, dtype="int64") # Step 3. Combine and create the final dataframe chunk key_array_combine = cupy.concatenate( (key_array_match, key_array_no_match), axis=0 ) df = cudf.DataFrame( { "key": cupy.random.permutation(key_array_combine), "payload": cupy.arange(local_size, dtype="int64"), } ) return df
def encode(self, x): # print(x.shape) # print(x.shape) a, b, c = x.shape x = x.reshape(a, 1, c).astype(xp.float32) # x = xp.hstack([x[:,:,i:b-440+i:221] for i in range(441)]) * hamming x = xp.concatenate([ x[:, :, :-221].reshape(a, -1, 1, 442), x[:, :, 221:].reshape( a, -1, 1, 442) ], axis=2).reshape(a, -1, 442) * self.mado # print(x) x = xp.fft.fft(x, axis=-1) # xp.fft.fft(xp.arange(100).reshape(2,5,10),axis=-1) x = xp.concatenate( [x.real.reshape(a, 1, -1, 442), x.imag.reshape(a, 1, -1, 442)], axis=1) #.reshape(a, 2, -1, 442) # xp.concatenate([s.real.reshape(2,5,1,10),s.imag.reshape(2,5,1,10)],axis=2) # print(x.shape) x = xp.transpose(x, axes=(0, 1, 3, 2)) # print(x.dtype) return x
def forward_prop(x, local_time, sequence, isFirst, timestamp, satellite_name): s = cp.empty([local_time, distance_forward, channels_hidden, M, N]) e = cp.empty([local_time, distance_forward]) alpha = cp.empty([local_time, distance_forward]) p = cp.empty([local_time, channels_p, M, N]) # Hidden Unit h = cp.empty([local_time + 1, channels_hidden, M, N]) h[-1] = cp.zeros([channels_hidden, M, N]) # LSTM FORWARD PROPAGATION for t in np.arange(local_time): # Attention Network for z in range( timestamp + t - (distance + learning_window), timestamp + distance_forward + t - (distance + learning_window)): temp = cp.concatenate( (cp.asarray(satellite_images[sequence][z]), h[t - 1]), axis=0) s[t][z - (timestamp + t - (distance + learning_window))] = tanh( cp.asarray( F.convolution_2d(temp.reshape( 1, channels_img + channels_hidden, M, N), e_kernel, b=None, pad=pad_constant)[0].data) + bias_e) s_temp = s[t][z - (timestamp + t - (distance + learning_window))].reshape( M * N * channels_hidden) e[t][z - (timestamp + t - (distance + learning_window))] = cp.dot( v_connected_weights, s_temp) + bias_v[z - (timestamp + t - (distance + learning_window))] xtemp = satellite_images[sequence][timestamp - distance:timestamp - distance + distance_forward, 0] alpha[t] = softmax(e[t]) p[t] = cp.tensordot(alpha[t], cp.asarray(xtemp), axes=1).reshape( 1, M, N) # Sum all x arrays up, weighted array temporary = cp.concatenate((x[t], p[t], h[t - 1]), axis=0) temporary = temporary.reshape( 1, channels_img + channels_p + channels_hidden, M, N) h[t] = tanh( cp.asarray( F.convolution_2d(temporary, main_kernel, b=None, pad=2) [0].data) + bias_h) # 1 x 1 convolution output = cp.matmul(connected_weights, h[local_time - 1].reshape( channels_hidden, M * N)).reshape(M, N) + bias_y[0] true_output = rect_linear(output) return true_output, output, cp.reshape( h[local_time - 1], (channels_hidden, M * N)), p, h, s, e, alpha, xtemp
def find_tcs(self, chunk_size=65536, overlap=1000, order=40, threshold=4, return_stds=False, whitening=False): all_spikes = [[], []] spike_amps = [] stds = np.std(self.filtered_data(from_sample=0, to_sample=chunk_size), axis=1) for i in trange(round(self.sample_length / chunk_size)): chunk = self.filtered_data(from_sample=i * chunk_size, to_sample=(i + 1) * chunk_size + overlap) if whitening: U, S, Vt = np.linalg.svd(chunk, full_matrices=False) chunk = cp.dot(U, Vt) chunk_spikes = cusignal.peak_finding.peak_finding.argrelmin( chunk, order=order, axis=1) spike_vals = chunk[chunk_spikes[0].get(), chunk_spikes[1].get()] sig_spikes = np.where( spike_vals <= -threshold * stds[chunk_spikes[0].get()])[0] all_spikes[0].append(chunk_spikes[0][sig_spikes]) all_spikes[1].append(chunk_spikes[1][sig_spikes] + i * chunk_size) spike_amps.append(spike_vals[sig_spikes]) all_spikes = cp.array( [cp.concatenate(all_spikes[0]), cp.concatenate(all_spikes[1])]) if return_stds: return all_spikes, spike_amps, stds else: return all_spikes, spike_amps
def gradient2DGPU(mat): ''' Function to calculate gradient of the 2D square matrix. Works on GPU. Copyright (c) Gabriel Peyre Parameters ---------- mat : cupy.ndarray matrix to calculate gradient Returns: ---------- [fx, fy] : cupy.ndarray gradient of `mat` ''' x1 = mat[1:len(mat), :] x2 = cp.array([mat[-1, :]]) x = cp.concatenate((x1, x2), axis=0) fx = x - mat y1 = mat[:, 1:len(mat)] y2 = cp.array([mat[:, -1]]).transpose() y = cp.concatenate((y1, y2), axis=1) fy = y - mat return [fx, fy]
def function_wrapper(x, b, axis=0, **kwargs): # add the padding to the array xsize = x.shape[axis] if 'pad' in kwargs and kwargs['pad']: npad = b.shape[axis] // 2 padd = cp.take(x, cp.arange(npad), axis=axis) * 0 if kwargs['pad'] == 'zeros': x = cp.concatenate((padd, x, padd), axis=axis) if kwargs['pad'] == 'constant': x = cp.concatenate((padd * 0 + cp.mean(x[:npad]), x, padd + cp.mean(x[-npad:])), axis=axis) if kwargs['pad'] == 'flip': pad_in = cp.flip(cp.take(x, cp.arange(1, npad + 1), axis=axis), axis=axis) pad_out = cp.flip(cp.take(x, cp.arange(xsize - npad - 1, xsize - 1), axis=axis), axis=axis) x = cp.concatenate((pad_in, x, pad_out), axis=axis) # run the convolution y = fcn_convolve(x, b, **kwargs) # remove padding from both arrays (necessary for x ?) if 'pad' in kwargs and kwargs['pad']: # remove the padding y = cp.take(y, cp.arange(npad, x.shape[axis] - npad), axis=axis) x = cp.take(x, cp.arange(npad, x.shape[axis] - npad), axis=axis) assert xsize == x.shape[axis] assert xsize == y.shape[axis] return y
def take_filter(N, filter): os = 4 d = 0.5 Ne = os * N t = cp.arange(0, Ne / 2 + 1) / Ne if (filter == 'ramp'): wfa = Ne * 0.5 * wint(12, t) # .*(t/(2*d)<=1)%compute the weigths elif (filter == 'shepp-logan'): wfa = Ne * 0.5 * wint(12, t) * cp.sinc(t / (2 * d)) * (t / d <= 2) elif (filter == 'cosine'): wfa = Ne * 0.5 * wint(12, t) * cp.cos(cp.pi * t / (2 * d)) * (t / d <= 1) elif (filter == 'cosine2'): wfa = Ne * 0.5 * wint(12, t) * (cp.cos(cp.pi * t / (2 * d)))**2 * (t / d <= 1) elif (filter == 'hamming'): wfa = Ne * 0.5 * wint( 12, t) * (.54 + .46 * cp.cos(cp.pi * t / d)) * (t / d <= 1) elif (filter == 'hann'): wfa = Ne * 0.5 * wint( 12, t) * (1 + np.cos(cp.pi * t / d)) / 2.0 * (t / d <= 1) elif (filter == 'parzen'): wfa = Ne * 0.5 * wint(12, t) * pow(1 - t / d, 3) * (t / d <= 1) wfa = wfa * (wfa >= 0) wfamid = cp.array([2 * wfa[0]]) tmp = wfa wfa = cp.concatenate((cp.flipud(tmp[1:]), wfamid)) wfa = cp.concatenate((wfa, tmp[1:])) wfa = wfa[:-1].astype('float32') return wfa
def _build_laplacian(data, spacing, mask, beta, multichannel): l_x, l_y, l_z = data.shape[:3] edges = _make_graph_edges_3d(l_x, l_y, l_z) weights = _compute_weights_3d(data, spacing, beta=beta, eps=1.e-10, multichannel=multichannel) assert weights.dtype == data.dtype if mask is not None: # Remove edges of the graph connected to masked nodes, as well # as corresponding weights of the edges. mask0 = cp.concatenate([mask[..., :-1].ravel(), mask[:, :-1].ravel(), mask[:-1].ravel()]) mask1 = cp.concatenate([mask[..., 1:].ravel(), mask[:, 1:].ravel(), mask[1:].ravel()]) ind_mask = cp.logical_and(mask0, mask1) edges, weights = edges[:, ind_mask], weights[ind_mask] # Reassign edges labels to 0, 1, ... edges_number - 1 _, inv_idx = cp.unique(edges, return_inverse=True) edges = inv_idx.reshape(edges.shape) # Build the sparse linear system pixel_nb = l_x * l_y * l_z i_indices = edges.ravel() j_indices = edges[::-1].ravel() data = cp.concatenate((weights, weights)) lap = sparse.coo_matrix((data, (i_indices, j_indices)), shape=(pixel_nb, pixel_nb)) # need CSR instead of COO for indexing used later in _build_linear_system lap = lap.tocsr() lap.setdiag(-cp.ravel(lap.sum(axis=0))) return lap
def divergence2DGPU(mat): ''' Function to calculate divergence of the 2D square matrix. Works on GPU. Copyright (c) Gabriel Peyre Parameters ---------- mat : cupy.ndarray matrix to calculate divergence Returns: ---------- fx + fy : cupy.ndarray divergence of `mat` ''' Px = mat[0] Py = mat[1] fx = Px - cp.concatenate( (cp.array([Px[0, :]]), Px[0:len(Px) - 1, :]), axis=0) fx[0, :] = Px[0, :] fx[-1, :] = -Px[-2, :] fy = Py - cp.concatenate( (cp.array([Py[:, 0]]).transpose(), Py[:, 0:len(Py) - 1]), axis=1) fy[:, 0] = Py[:, 0] fy[:, -1] = -Py[:, -2] return fx + fy
def process(self, inputs): df = inputs[self.INPUT_PORT_NAME] all_sample_ids = df['sample_id'].unique() total_samples = len(all_sample_ids) window = self.conf['window'] negative = self.conf.get("negative", False) drawdown, all_dates = get_drawdown(df, total_samples, negative=negative, window=window) total_samples, num_months, assets = drawdown.shape months_id = all_dates.dt.year * 12 + (all_dates.dt.month - 1) months_id = months_id - months_id.min() mid = (cupy.arange(months_id.max() + 1) + (all_dates.dt.month - 1)[0])[window:] minyear = all_dates.dt.year.min() if len(mid) == 0: mid = cupy.array([0]) months = mid % 12 years = mid // 12 + minyear output = {} df_drawdown = cudf.DataFrame( drawdown.reshape(total_samples * num_months, -1)) df_drawdown['year'] = cupy.concatenate([years] * total_samples).astype( cupy.int16) df_drawdown['month'] = cupy.concatenate( [months] * total_samples).astype(cupy.int16) df_drawdown['sample_id'] = cupy.repeat( cupy.arange(total_samples) + all_sample_ids.min(), len(mid)) output.update({self.OUTPUT_PORT_NAME: df_drawdown}) return output
def test_array_split(type, test_size, train_size, shuffle): X = np.zeros((100, 10)) + np.arange(100).reshape(100, 1) y = np.arange(100).reshape(100, 1) if type == 'cupy': X = cp.asarray(X) y = cp.asarray(y) if type == 'numba': X = cuda.to_device(X) y = cuda.to_device(y) if type == 'rmm': X = rmm.to_device(X) y = rmm.to_device(y) X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=train_size, test_size=test_size, shuffle=shuffle, random_state=0) if type == 'cupy': assert isinstance(X_train, cp.ndarray) assert isinstance(X_test, cp.ndarray) assert isinstance(y_train, cp.ndarray) assert isinstance(y_test, cp.ndarray) if type in ['numba', 'rmm']: assert cuda.devicearray.is_cuda_ndarray(X_train) assert cuda.devicearray.is_cuda_ndarray(X_test) assert cuda.devicearray.is_cuda_ndarray(y_train) assert cuda.devicearray.is_cuda_ndarray(y_test) if train_size is not None: assert X_train.shape[0] == X.shape[0] * train_size assert y_train.shape[0] == y.shape[0] * train_size if test_size is not None: assert X_test.shape[0] == X.shape[0] * test_size assert y_test.shape[0] == y.shape[0] * test_size if shuffle is None: assert X_train == X[0:train_size] assert y_train == y[0:train_size] assert X_test == X[-1 * test_size:] assert y_test == y[-1 * test_size:] if tnc(X_train): X_train = PatchedNumbaDeviceArray(X_train) X_test = PatchedNumbaDeviceArray(X_test) y_train = PatchedNumbaDeviceArray(y_train) y_test = PatchedNumbaDeviceArray(y_test) X_rec = cp.sort(cp.concatenate(X_train, X_test)) y_rec = cp.sort(cp.concatenate(y_train, y_test)) assert X_rec == X assert y_rec == y
def _bss_decomp_mtifilt_cupy(reference_sources, estimated_source, j, flen, nsrc): """Decomposition of an estimated source image into four components representing respectively the true source image, spatial (or filtering) distortion, interference and artifacts, derived from the true source images using multichannel time-invariant filters. """ # decomposition # true source image s_true = cp.concatenate( (reference_sources[j], cp.zeros([flen - 1], dtype=cp.float64)), 0) # spatial (or filtering) distortion e_spat = _project_cupy(cp.expand_dims(reference_sources[j, :], 0), estimated_source, flen, 1) - s_true # interference e_interf = _project_cupy(reference_sources, estimated_source, flen, nsrc) - s_true - e_spat # artifacts e_artif = -s_true - e_spat - e_interf e_artif += cp.concatenate( (estimated_source, cp.zeros([flen - 1], dtype=cp.float64)), 0) return (s_true, e_spat, e_interf, e_artif)
def train(self, x_train, y_train, x_test, y_test, iterations): self.minx = -min(x_train.min().item(), x_test.min().item()) train_in = cp.tile( cp.concatenate((x_train, cp.zeros(shape=(x_train.shape[0], self.hidden + self.outputs))), axis=1), (self.pop_size, 1, 1)) test_in = cp.tile( cp.concatenate((x_test, cp.zeros(shape=(x_test.shape[0], self.hidden + self.outputs))), axis=1), (self.pop_size, 1, 1)) for i in range(iterations): start_time = time.time() y_hat = self._forward_prop(train_in, True) fitness, train_loss, train_acc = self.evaluate( y_hat, y_train, self.problem_type) self._evolve(fitness) test_loss, test_acc = self.evaluate( self._forward_prop(test_in, False), y_test, self.problem_type)[1:] print( '[GENERATION %i/%i]\n\tTIME: %.2f seconds | TRAIN Loss: %.4f Acc: %.4f | TEST Loss: %.4f Acc: %.4f\n' % (i + 1, iterations, time.time() - start_time, train_loss, train_acc, test_loss, test_acc)) if self.log_file: with open(self.log_file, 'a+') as file: file.write(' '.join([ str(s) for s in [ i + 1, iterations, time.time() - start_time, train_loss, train_acc, test_loss, test_acc ] ]) + '\n') return test_acc
def generate_negatives(neg_users, true_mat, item_range, sort=False, use_trick=False): """ Generate negative samples for data augmentation """ neg_u = [] neg_i = [] # If using the shortcut, generate negative items without checking if the associated # user has interacted with it. Speeds up training significantly with very low impact # on accuracy. if use_trick: neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0]) return neg_users, neg_items # Otherwise, generate negative items, check if associated user has interacted with it, # then generate a new one if true while len(neg_users) > 0: neg_items = cp.random.randint(0, high=item_range, size=neg_users.shape[0]) neg_mask = true_mat[neg_users, neg_items] neg_u.append(neg_users[neg_mask]) neg_i.append(neg_items[neg_mask]) neg_users = neg_users[cp.logical_not(neg_mask)] neg_users = cp.concatenate(neg_u) neg_items = cp.concatenate(neg_i) if sort == False: return neg_users, neg_items sorted_users = cp.sort(neg_users) sort_indices = cp.argsort(neg_users) return sorted_users, neg_items[sort_indices]
def test_concatenate_large_different_devices(self): arrs = [] for i in range(10): with cuda.Device(i % 2): arrs.append(cupy.empty((2, 3, 4))) with pytest.raises(ValueError): cupy.concatenate(arrs)
def computeRT(item_path, xp=np): with open(item_path, 'r') as f: LINES = [line.strip("';\n") for line in f] param = {} for index, args in enumerate(LINES): name = args[:15].replace(" ", "") name = name[:].replace("=", "") val = args[15:] val = xp.asarray(val.strip("[]").split(","), dtype=xp.float64) param[name] = val cam_dir = param["cam_dir"] cam_pos = param["cam_pos"] cam_up = param["cam_up"] z = cam_dir / xp.linalg.norm(cam_dir) x = xp.cross(cam_up, z) x = x / xp.linalg.norm(x) y = xp.cross(z, x) x = xp.expand_dims(x, axis=1) y = xp.expand_dims(y, axis=1) z = xp.expand_dims(z, axis=1) R = xp.concatenate([x, y, z], axis=1) T = xp.expand_dims(cam_pos, axis=1) R_T = xp.concatenate([R, T], axis=1) return R_T
def __init__(self, dtype): self.c = np.hstack([ np.random.rand(N, int(2 * M / 4), dtype=np.float32), np.zeros((N, int(2 * M / 4))) ]) self.x = np.array([(m / M) * A * Pe for m in range(M)], dtype=np.float32) self.y = np.array([(n / N) * Pe for n in range(N)], dtype=np.float32) a = np.asarray(range(0, int(M / 2)), dtype=np.float32) * 2 * np.pi / (A * Pe) b = np.asarray(range(int(-M / 2 + 1), 0), dtype=np.float32) * 2 * np.pi / (A * Pe) self.km = np.concatenate((a, np.concatenate( (np.array([0]), b)))).astype(np.float32) c = np.asarray(range(0, int(N / 2)), dtype=np.float32) * 2 * np.pi / Pe d = np.asarray(range(int(-N / 2 + 1), 0), dtype=np.float32) * 2 * np.pi / Pe self.kn = np.concatenate((c, np.concatenate( (np.array([0]), d)))).astype(np.float32) self.km_km = np.tile(self.km, (N, 1)).astype(np.float32) self.kn_kn = np.tile(self.kn, (M, 1)).astype(np.float32).T #print(self.kn_kn.shape) self.c_hat = np.fft.fft2(self.c).astype(np.complex64) self.phi_x = np.zeros((N, M), dtype=np.complex) self.phi_y = np.zeros((N, M), dtype=np.complex) self.c_x = np.fft.ifft2(self.c_hat * self.km_km * 1.0j).astype( np.complex64) self.c_y = np.fft.ifft2(self.c_hat * self.kn_kn * 1.0j).astype( np.complex64) self.km2kn2 = self.km_km**2 + self.kn_kn**2
def concatenate_organism(organism_a, organism_b): return [ ( cp.concatenate((organism_a[i][0], organism_b[i][0]), axis=0), # layer_weight cp.concatenate((organism_a[i][1], organism_b[i][1]), axis=0) # layer_bias ) for i in range(len(organism_a)) ]
def mass2_gpu(ts, query): """ Compute the distance profile for the given query over the given time series. This require cupy to be installed. Parameters ---------- ts : array_like The array to create a rolling window on. query : array_like The query. Returns ------- An array of distances. Raises ------ ValueError If ts is not a list or np.array. If query is not a list or np.array. If ts or query is not one dimensional. """ def moving_mean_std_gpu(a, w): s = cp.concatenate([cp.array([0]), cp.cumsum(a)]) sSq = cp.concatenate([cp.array([0]), cp.cumsum(a**2)]) segSum = s[w:] - s[:-w] segSumSq = sSq[w:] - sSq[:-w] movmean = segSum / w movstd = cp.sqrt(segSumSq / w - (segSum / w)**2) return (movmean, movstd) x = cp.asarray(ts) y = cp.asarray(query) n = x.size m = y.size meany = cp.mean(y) sigmay = cp.std(y) meanx, sigmax = moving_mean_std_gpu(x, m) meanx = cp.concatenate([cp.ones(n - meanx.size), meanx]) sigmax = cp.concatenate([cp.zeros(n - sigmax.size), sigmax]) y = cp.concatenate((cp.flip(y, axis=0), cp.zeros(n - m))) X = cp.fft.fft(x) Y = cp.fft.fft(y) Z = X * Y z = cp.fft.ifft(Z) dist = 2 * (m - (z[m - 1:n] - m * meanx[m - 1:n] * meany) / (sigmax[m - 1:n] * sigmay)) dist = cp.sqrt(dist) return cp.asnumpy(dist)
def transform(self, X): """ Transform X using one-hot encoding. Parameters ---------- X : cudf.DataFrame or cupy.ndarray The data to encode. Returns ------- X_out : sparse matrix if sparse=True else a 2-d array Transformed input. """ self._check_is_fitted() X = self._check_input(X) cols, rows = list(), list() j = 0 for feature in X.columns: encoder = self._encoders[feature] col_idx = encoder.transform(X[feature]) col_idx = cp.asarray(col_idx.to_gpu_array(fillna="pandas")) idx_to_keep = col_idx > -1 # increase indices to take previous features into account col_idx += j # Filter out rows with null values row_idx = cp.arange(len(X))[idx_to_keep] col_idx = col_idx[idx_to_keep] if self.drop_idx_ is not None: drop_idx = self.drop_idx_[feature] + j mask = cp.ones(col_idx.shape, dtype=cp.bool) mask[col_idx == drop_idx] = False col_idx = col_idx[mask] row_idx = row_idx[mask] # account for dropped category in indices col_idx[col_idx > drop_idx] -= 1 # account for dropped category in current cats number j -= 1 j += len(encoder.classes_) cols.append(col_idx) rows.append(row_idx) cols = cp.concatenate(cols) rows = cp.concatenate(rows) val = cp.ones(rows.shape[0], dtype=self.dtype) ohe = cp.sparse.coo_matrix((val, (rows, cols)), shape=(len(X), j), dtype=self.dtype) if not self.sparse: ohe = ohe.toarray() return ohe
def _process_feats(self, output_reshaped, mask): """Take in a reshaped YOLO output in height,width,3,85 format together with its corresponding YOLO mask and return the detected bounding boxes, the confidence, and the class probability in each cell/pixel. Keyword arguments: output_reshaped -- reshaped YOLO output as NumPy arrays with shape (3,height,width,85) mask -- 2-dimensional tuple with mask specification for this output """ # Two in-line functions required for calculating the bounding box # descriptors: def sigmoid(value): """Return the sigmoid of the input.""" return 1.0 / (1.0 + cp.exp(-value)) def exponential(value): """Return the exponential of the input.""" return cp.exp(value) # Vectorized calculation of above two functions: sigmoid_v = np.vectorize(sigmoid) exponential_v = np.vectorize(exponential) output_reshaped = cp.asarray(output_reshaped) grid_h, grid_w, _, _ = output_reshaped.shape anchors = cp.asarray(np.array([self.anchors[i] for i in mask])) # Reshape to N, height, width, num_anchors, box_params: anchors_tensor = cp.reshape(anchors, (1, 1, anchors.shape[0], 2)) box_xy = sigmoid(output_reshaped[..., :2]) box_wh = exponential(output_reshaped[..., 2:4]) * anchors_tensor box_confidence = sigmoid(output_reshaped[..., 4]) box_confidence = cp.expand_dims(box_confidence, axis=-1) box_class_probs = sigmoid(output_reshaped[..., 5:]) col = cp.tile(cp.arange(0, grid_w), grid_w).reshape(-1, grid_w) row = cp.tile(cp.arange(0, grid_h).reshape(-1, 1), grid_h) col = col.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) row = row.reshape(grid_h, grid_w, 1, 1).repeat(3, axis=-2) grid = cp.concatenate((col, row), axis=-1) box_xy += grid #box_xy /= (grid_w, grid_h) box_xy[..., 0] = box_xy[..., 0] / grid_w box_xy[..., 1] = box_xy[..., 1] / grid_h #box_wh /= self.input_resolution_yolo box_wh[..., 0] = box_wh[..., 0] / self.input_resolution_yolo[0] box_wh[..., 1] = box_wh[..., 1] / self.input_resolution_yolo[1] box_xy -= (box_wh / 2.) boxes = cp.concatenate((box_xy, box_wh), axis=-1) # boxes: centroids, box_confidence: confidence level, box_class_probs: # class confidence return boxes, box_confidence, box_class_probs
def moving_mean_std_gpu(a, w): s = cp.concatenate([cp.array([0]), cp.cumsum(a)]) sSq = cp.concatenate([cp.array([0]), cp.cumsum(a**2)]) segSum = s[w:] - s[:-w] segSumSq = sSq[w:] - sSq[:-w] movmean = segSum / w movstd = cp.sqrt(segSumSq / w - (segSum / w)**2) return (movmean, movstd)
def convert(batch): data = [] label1 = [] label2 = [] for x in batch: data.append(cp.array(x[:, 0:-2], dtype=cp.float32)) label1.append(cp.array(x[:, -2], dtype=cp.int32)) label2.append(cp.array(x[:, -1], dtype=cp.int32)) return data, cp.concatenate(label1, axis=0), cp.concatenate(label2, axis=0)
def pack_beta(self): beta = cp.array([]) for i, layer in enumerate(self.data): for neuron in layer: if i == 0: for w in neuron.W.T: beta = cp.concatenate((beta, w), axis=0) else: beta = cp.concatenate((beta, neuron.w), axis=0) return beta
def load_data_slices(filename, slice_lists, input_name, target_name): stops = [s.stop for s in list_concat(slice_lists)] if not all(stops): raise Exception("Slices can't be open-ended") data = read_csv(filename, max(stops), input_name, target_name) return [(np.concatenate([data[0][s] for s in slices], axis=0), np.concatenate([data[1][s] for s in slices], axis=0)) for slices in slice_lists]
def sampler(self, w, gamma, pi, xi, z, alpha, u, N, T, y, x, beta, sigma_v_sqr, sigma_alpha_sqr, eta, H, delta): NT = N * T # sample u from N(mu_u, V_u) my_mean = cp.asarray(np.dot(w, z)) my_std = cp.asarray(np.exp(np.dot(w, gamma))**0.5) a, b = -my_mean / my_std, cp.inf * cp.ones([ N, ]) # eta_particles = truncnorm.rvs(a,b,loc = my_mean, scale = my_std, size = (H,N)) eta_particles = TruncNormal.rvs(a, b, my_mean, my_std, (H, N)) eta_particles = cp.concatenate( [eta_particles, cp.asarray(eta).reshape(-1, 1).T], axis=0) my_mean = cp.asarray(np.dot(pi, delta)) my_std = cp.asarray(np.exp(np.dot(pi, xi))**0.5) a, b = -my_mean / my_std, cp.inf * cp.ones([ NT, ]) u_particles = TruncNormal.rvs(a, b, my_mean, my_std, (H, NT)) u_particles = cp.concatenate( [u_particles, cp.asarray(u).reshape(-1, 1).T], axis=0) # alpha_particles = norm.rvs(0, sigma_alpha_sqr ** 0.5, size=(H,N)) # alpha_particles = np.asarray(alpha_particles) # alpha_particles = np.concatenate([alpha_particles, alpha.reshape(-1,1).T], axis=0) alpha_particles = cp.random.normal(0, sigma_alpha_sqr**0.5, size=(H, N)) alpha_particles = cp.concatenate( [alpha_particles, cp.asarray(alpha).reshape(-1, 1).T], axis=0) inv_sqrt_2pi = 0.3989422804014327 W = inv_sqrt_2pi * cp.exp(-cp.square( ((cp.asarray(y - np.dot(x, beta)) - cp.kron(alpha_particles + eta_particles, cp.ones([ T, ])) - u_particles) / (sigma_v_sqr**0.5))) / 2) / (sigma_v_sqr**0.5) #x_ = ((cp.asarray(y - np.dot(x,beta))-alpha_particles_)/(sigma_v_sqr**0.5)) #del alpha_particles_ #w = gpu_normal_pdf(x_) w_ = W.reshape([H + 1, N, T]).prod(axis=2) w_ = w_ / w_.sum(axis=0) index = self._vectorized(w_) new_alpha = alpha_particles[index, cp.arange(N)].get() new_eta = eta_particles[index, cp.arange(N)].get() new_u = u_particles[cp.kron(index, cp.ones([ T, ])).astype(int), cp.arange(N * T)].get() return new_eta, new_alpha, new_u
def evaluate(self): def sigmoid(x): return 1. / (1 + cupy.exp(-x)) if self.device >= 0 else \ 1. / (1 + numpy.exp(-x)) iterator = self._iterators['main'] eval_func = self.eval_func or self._targets['main'] if self.eval_hook: self.eval_hook(self) if hasattr(iterator, 'reset'): iterator.reset() it = iterator else: it = copy.copy(iterator) y_total = [] t_total = [] for batch in it: in_arrays = self.converter(batch, self.device) with chainer.no_backprop_mode(), chainer.using_config( 'train', False): y = eval_func(*in_arrays[:-1]) t = in_arrays[-1] # y_data = _get_1d_numpy_array(y) # t_data = _get_1d_numpy_array(t) y_data = y.data t_data = t y_total.append(y_data) t_total.append(t_data) # y_total = numpy.concatenate(y_total).ravel() # t_total = numpy.concatenate(t_total).ravel() y_total = cupy.concatenate( y_total) if self.device >= 0 else numpy.concatenate(y_total) y_total = sigmoid(y_total) t_total = cupy.concatenate( t_total) if self.device >= 0 else numpy.concatenate(t_total) # metrics_value = self.metrics_fun(y_total, t_total) if self.device >= 0: y_total = cupy.asnumpy(y_total) t_total = cupy.asnumpy(t_total) metrics = { key: metric_fun(y_total, t_total) for key, metric_fun in self.metrics_fun.items() } observation = {} with reporter.report_scope(observation): reporter.report(metrics, self._targets['main']) return observation
def _crossover(self, selected): point = cp.random.randint(1, self.total - 1).item() self.weights = cp.concatenate( (self.weights[selected[0, :], :point, :], self.weights[selected[1, :], point:, :]), axis=1) self.biases = self.biases[selected[ 1, :], :, :] if point <= self.inputs else cp.concatenate( (self.biases[selected[0, :], :, :point - self.inputs], self.biases[selected[1, :], :, point - self.inputs:]), axis=2)
def test_concatenate_large_different_devices(self): arrs = [] for i in range(10): with cuda.Device(i % 2): arrs.append(cupy.empty((2, 3, 4))) if cuda.runtime.deviceCanAccessPeer(0, 1) == 1: with pytest.warns(cupy._util.PerformanceWarning): cupy.concatenate(arrs) else: with pytest.raises(ValueError): cupy.concatenate(arrs)
def inplace_swap_row_csr(X, m, n): """ Swaps two rows of a CSR matrix in-place. Parameters ---------- X : scipy.sparse.csr_matrix, shape=(n_samples, n_features) Matrix whose two rows are to be swapped. m : int Index of the row of X to be swapped. n : int Index of the row of X to be swapped. """ for t in [m, n]: if isinstance(t, np.ndarray): raise TypeError("m and n should be valid integers") if m < 0: m += X.shape[0] if n < 0: n += X.shape[0] # The following swapping makes life easier since m is assumed to be the # smaller integer below. if m > n: m, n = n, m indptr = X.indptr m_start = indptr[m] m_stop = indptr[m + 1] n_start = indptr[n] n_stop = indptr[n + 1] nz_m = m_stop - m_start nz_n = n_stop - n_start if nz_m != nz_n: # Modify indptr first X.indptr[m + 2:n] += nz_n - nz_m X.indptr[m + 1] = m_start + nz_n X.indptr[n] = n_stop - nz_m X.indices = np.concatenate([ X.indices[:m_start], X.indices[n_start:n_stop], X.indices[m_stop:n_start], X.indices[m_start:m_stop], X.indices[n_stop:] ]) X.data = np.concatenate([ X.data[:m_start], X.data[n_start:n_stop], X.data[m_stop:n_start], X.data[m_start:m_stop], X.data[n_stop:] ])
def _prepend_const(narray, pad_amount, value, axis=-1): if pad_amount == 0: return narray padshape = tuple(x if i != axis else pad_amount for i, x in enumerate(narray.shape)) return cupy.concatenate((cupy.full(padshape, value, narray.dtype), narray), axis=axis)
def test_concatenate_wrong_shape(self): a = cupy.empty((2, 3, 4)) b = cupy.empty((3, 3, 4)) c = cupy.empty((4, 4, 4)) with self.assertRaises(ValueError): cupy.concatenate((a, b, c))
def test_concatenate_wrong_ndim(self): a = cupy.empty((2, 3)) b = cupy.empty((2,)) with self.assertRaises(ValueError): cupy.concatenate((a, b))
def train(self): # clear grads self.q_func.zerograds() # pull tuples from memory pool batch_tuples = self.replay.pull(Config.batch_size) if not len(batch_tuples): return # stack inputs cur_x = [self.env.getX(t.state) for t in batch_tuples] next_x = [self.env.getX(t.next_state) for t in batch_tuples] # merge inputs into one array if Config.gpu: cur_x = [cupy.expand_dims(t, 0) for t in cur_x] cur_x = cupy.concatenate(cur_x, 0) next_x = [cupy.expand_dims(t, 0) for t in next_x] next_x = cupy.concatenate(next_x, 0) else: cur_x = np.stack(cur_x) next_x = np.stack(next_x) # get cur outputs cur_output = self.QFunc(self.q_func, cur_x) # get next outputs, NOT target next_output = self.QFunc(self.q_func, next_x) # choose next action for each output next_action = [ self.env.getBestAction( o.data, [t.next_state for t in batch_tuples] ) for o in next_output # for each head in Model ] # get next outputs, target next_output = self.QFunc(self.target_q_func, next_x) # clear err of tuples for t in batch_tuples: t.err = 0. # store err count err_count_list = [0.] * len(batch_tuples) # compute grad's weights weights = np.array([t.P for t in batch_tuples], np.float32) if Config.gpu: weights = cuda.to_gpu(weights) if self.replay.getPoolSize(): weights *= self.replay.getPoolSize() weights = weights ** -Config.beta weights /= weights.max() if Config.gpu: weights = cupy.expand_dims(weights, 1) else: weights = np.expand_dims(weights, 1) # update beta Config.beta = min(1, Config.beta + Config.beta_add) # compute grad for each head for k in range(Config.K): if Config.gpu: cur_output[k].grad = cupy.zeros_like(cur_output[k].data) else: cur_output[k].grad = np.zeros_like(cur_output[k].data) # compute grad from each tuples for i in range(len(batch_tuples)): if batch_tuples[i].mask[k]: cur_action_value = \ cur_output[k].data[i][batch_tuples[i].action].tolist() reward = batch_tuples[i].reward next_action_value = \ next_output[k].data[i][next_action[k][i]].tolist() target_value = reward # if not empty position, not terminal state if batch_tuples[i].next_state.in_game: target_value += Config.gamma * next_action_value loss = cur_action_value - target_value cur_output[k].grad[i][batch_tuples[i].action] = 2 * loss # count err if cur_action_value: batch_tuples[i].err += abs(loss / cur_action_value) err_count_list[i] += 1 # multiply weights with grad and clip if Config.gpu: cur_output[k].grad = cupy.multiply( cur_output[k].grad, weights) cur_output[k].grad = cupy.clip(cur_output[k].grad, -1, 1) else: cur_output[k].grad = np.multiply( cur_output[k].grad, weights) cur_output[k].grad = np.clip(cur_output[k].grad, -1, 1) # backward cur_output[k].backward() # adjust grads of shared for param in self.q_func.shared.params(): param.grad /= Config.K # update params self.optimizer.update() # avg err for i in range(len(batch_tuples)): if err_count_list[i] > 0: batch_tuples[i].err /= err_count_list[i] self.replay.merge(Config.alpha) return np.mean([t.err for t in batch_tuples])