def test_concat_backward(): # shape of tensor to test tensor_shapes = [[(1, 4, 3), (1, 8, 3), (1, 5, 3)], [(2, 3, 4), (1, 3, 4)], [(6, 7, 8, 9), (6, 7, 8, 1), (6, 7, 8, 2)], [(1, 2, 3), (1, 2, 4), (1, 2, 3), (1, 2, 4)]] cat_dims = [1, 0, 3, 2] for tensor_shapes_cur, d_cur in zip(tensor_shapes, cat_dims): # get mytorch and torch tensor: 'a' a = [Tensor.randn(*shape_i) for shape_i in tensor_shapes_cur] for i in range(len(a)): a[i].requires_grad = True a_torch = [get_same_torch_tensor(a_i) for a_i in a] c = cat(a, d_cur) c_torch = torch.cat(a_torch, dim=d_cur) l = (c**2).sum() l_torch = (c_torch**2).sum() l.backward() l_torch.backward() for a_i, a_torch_i in zip(a, a_torch): assert check_grad(a_i, a_torch_i, eps=eps) return True
def forward(self, input, hidden=None): ''' NOTE: Please get a good grasp on util.PackedSequence before attempting this. Args: input (PackedSequence): input.data is tensor of shape ( total number of timesteps (sum) across all samples in the batch, input_size) hidden (Tensor, None): (batch_size, hidden_size) Returns: PackedSequence: ( total number of timesteps (sum) across all samples in the batch, hidden_size) Tensor (batch_size,hidden_size): This is the hidden generated by the last time step for each sample joined together. Samples are ordered in descending order based on number of timesteps. This is a slight deviation from PyTorch. ''' # Resolve the PackedSequence into its components data, sorted_indices, batch_sizes = input # TODO: INSTRUCTIONS # Iterate over appropriate segments of the "data" tensor to pass same timesteps across all samples in the batch simultaneously to the unit for processing. # Remeber to account for scenarios when effective_batch_size changes between one iteration to the next #raise NotImplementedError('Implement Forward') output = [] output_hidden = ["" for i in sorted_indices] start = 0 hidden_shape = None for batch_size in batch_sizes: end = start + batch_size input = data[start:end] if hidden is not None: hidden = hidden[:input.shape[0]] hidden = self.unit.forward(input, hidden) output.append(hidden) start = end j = 0 if hidden_shape is None: hidden_shape = hidden.shape for i in sorted_indices: if j < hidden.shape[0]: output_hidden[j] = hidden[j] j += 1 #return tensor.cat(output), tensor.cat(output_hidden).reshape(*hidden_shape) return pack_sequence( tensor.cat(output).unsqueeze()), tensor.cat(output_hidden).reshape( *hidden_shape)
def unpack_sequence(ps): ''' Given a PackedSequence, this unpacks this into the original list of tensors. NOTE: Attempt this only after you have completed pack_sequence and understand how it works. Args: ps (PackedSequence) Returns: list of Tensors ''' # TODO: INSTRUCTIONS # This operation is just the reverse operation of pack_sequences # Use the ps.batch_size to determine number of time steps in each tensor of the original list (assuming the tensors were sorted in a descending fashion based on number of timesteps) # Construct these individual tensors using tensor.cat # Re-arrange this list of tensor based on ps.sorted_indices #raise NotImplementedError('Implement unpack_sequence') sequences = ["" for i in range(ps.sorted_indices.shape[0])] k = 0 myDict = {} for i in ps.sorted_indices: sequence = [] start = k for batch_size in ps.batch_sizes: if start >= ps.data.shape[0]: break end = start + 1 if str(start) + "_" + str(end) not in myDict: #print("Start: "+str(start)+", End: "+str(end)+", BatchSize: "+str(batch_size) ) sequence.append(ps.data[start:end]) myDict[str(start) + "_" + str(end)] = 1 start += batch_size seq = tensor.cat(sequence) sequences[i] = seq k += 1 return sequences
def pack_sequence(sequence): ''' Constructs a packed sequence from an input sequence of tensors. By default assumes enforce_sorted ( compared to PyTorch ) is False i.e the length of tensors in the sequence need not be sorted (desc). Args: sequence (list of Tensor): ith tensor in the list is of shape (Ti,K) where Ti is the number of time steps in sample i and K is the # features Returns: PackedSequence: data attribute of the result is of shape ( total number of timesteps (sum) across all samples in the batch, # features ) ''' # TODO: INSTRUCTIONS # Find the sorted indices based on number of time steps in each sample # Extract slices from each sample and properly order them for the construction of the packed tensor. __getitem__ you defined for Tensor class will come in handy # Use the tensor.cat function to create a single tensor from the re-ordered segements # Finally construct the PackedSequence object # REMEMBER: All operations here should be able to construct a valid autograd graph. #raise NotImplementedError('Implement pack_Sequence!') sequence_lengths = [seq.shape[0] for seq in sequence] ranked = np.argsort(np.asarray(sequence_lengths)) sorted_indices = ranked[::-1] packed_seq = [] batch_sizes = [] for i in range(sequence[sorted_indices[0]].shape[0]): batch_size = 0 for j in sorted_indices: start = i end = start + 1 if start < sequence[j].shape[0]: packed_seq.append(sequence[j][start:end]) batch_size += 1 batch_sizes.append(batch_size) pack_seq = tensor.cat(packed_seq) return PackedSequence(pack_seq, sorted_indices, np.asarray(batch_sizes))