def cutStrings(seqs, length): print('Preparing Data...') X = np.zeros([0, length * 2 + 3]) count = 0 bar = ProgressBar() for seq in bar(seqs): paddedSeq = np.pad(np.asarray(seq), (length, length), 'constant', constant_values=23.) if paddedSeq.size > length * 2: cutSeq = vaw(paddedSeq, (length * 2 + 1, )) label = np.ones([ cutSeq.shape[0], ]) * cutSeq[:, length] cutSeq = np.delete(cutSeq, length, axis=1) indLabel = np.ones([cutSeq.shape[0], 1]) * np.arange( cutSeq.shape[0])[:, None] cutSeq = np.concatenate( (indLabel, indLabel[::-1, :], cutSeq, label[:, None]), 1) X = np.concatenate((X, cutSeq), 0) count += 1 else: continue print('Used {} proteins.'.format(count)) return X
def _get_patches(mspec, w, step): h = mspec.shape[1] data = vaw(mspec, (w,h), step=step) data.shape = (len(data), w*h) data = (data - np.mean(data, axis=1).reshape((len(data), 1))) / np.std(data, axis=1).reshape((len(data), 1)) lfill = [data[0,:].reshape(1, h*w)] * (w // (2 * step)) rfill = [data[-1,:].reshape(1, h*w)] * (w // (2* step) - 1 + len(mspec) % 2) data = np.vstack(lfill + [data] + rfill ) finite = np.all(np.isfinite(data), axis=1) data.shape = (len(data), w, h) return data, finite
def avg_patch_vals(A, patch_size=(3, 3), pad_mode='edge'): '''Get the average value of each patch of A. Parameters ---------- A : array_like Array to patch-ify. patch_size : tuple, optional Size of patches. pad_mode : str, optional Mode to pass to numpy.pad. ''' patches = vaw(A, patch_size) pad_width = [ int((A.shape[ii] - patches.shape[ii]) / 2) for ii in range(A.ndim) ] patches = np.pad(patches, pad_width, pad_mode) return np.mean(np.mean(patches, axis=-1), axis=-1)