def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]. Additional elements of sequence are start and stop control markers, stored in additional bits. : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS], output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS], mask [BATCH_SIZE, 2*SEQ_LENGTH+2] TODO: every item in batch has now the same seq_length. """ # Set sequence length seq_length = np.random.randint(self.min_sequence_length, self.max_sequence_length + 1) # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X # DATA_BITS] bit_seq = np.random.binomial( 1, self.bias, (self.batch_size, seq_length, self.data_bits)) # Generate input: [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS] inputs = np.zeros([ self.batch_size, 2 * seq_length + 2, self.control_bits + self.data_bits ], dtype=np.float32) # Set start control marker. inputs[:, 0, 0] = 1 # Memorization bit. # Set bit sequence. inputs[:, 1:seq_length + 1, self.control_bits:self.control_bits + self.data_bits] = bit_seq # Set end control marker. inputs[:, seq_length + 1, 1] = 1 # Recall bit. # Generate target: [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS] (only data # bits!) targets = np.zeros( [self.batch_size, 2 * seq_length + 2, self.data_bits], dtype=np.float32) # Set bit sequence. targets[:, seq_length + 2:, :] = bit_seq # Generate target mask: [BATCH_SIZE, 2*SEQ_LENGTH+2] mask = torch.zeros([self.batch_size, 2 * seq_length + 2]).type(torch.ByteTensor) mask[:, seq_length + 2:] = 1 # PyTorch variables. ptinputs = torch.from_numpy(inputs).type(self.dtype) pttargets = torch.from_numpy(targets).type(self.dtype) # Return tuples. data_tuple = DataTuple(ptinputs, pttargets) aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]. Additional elements of sequence are start and stop control markers, stored in additional bits. : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS], output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS], mask [BATCH_SIZE, 2*SEQ_LENGTH+2] TODO: every item in batch has now the same seq_length. """ # Define control channel bits. # ctrl_main = [0, 0, 0, 0] # not really used. #ctrl_aux = [0, 0, 0, 1] ctrl_aux = np.zeros(self.control_bits) if (self.control_bits == 4): ctrl_aux[3] = 1 # [0, 0, 0, 1] else: if self.randomize_control_lines: # Randomly pick one of the bits to be set. ctrl_bit = np.random.randint(3, self.control_bits) ctrl_aux[ctrl_bit] = 1 else: ctrl_aux[self.control_bits - 1] = 1 # Markers. marker_start_main = np.zeros(self.control_bits) marker_start_main[0] = 1 # [1, 0, 0, 0] marker_start_aux_serial = np.zeros(self.control_bits) marker_start_aux_serial[1] = 1 # [0, 1, 0, 0] marker_start_aux_reverse = np.zeros(self.control_bits) marker_start_aux_reverse[2] = 1 # [0, 0, 1, 0] # Set sequence length. seq_length = np.random.randint(self.min_sequence_length, self.max_sequence_length + 1) # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X # DATA_BITS] bit_seq = np.random.binomial( 1, self.bias, (self.batch_size, seq_length, self.data_bits)) # 1. Generate inputs. # Generate input: [BATCH_SIZE, 3*SEQ_LENGTH+3, CONTROL_BITS+DATA_BITS] inputs = np.zeros([ self.batch_size, 3 * seq_length + 3, self.control_bits + self.data_bits ], dtype=np.float32) # Set start main control marker. inputs[:, 0, 0:self.control_bits] = np.tile(marker_start_main, (self.batch_size, 1)) # Set bit sequence. inputs[:, 1:seq_length + 1, self.control_bits:self.control_bits + self.data_bits] = bit_seq # Set start aux serial recall control marker. inputs[:, seq_length + 1, 0:self.control_bits] = np.tile(marker_start_aux_serial, (self.batch_size, 1)) inputs[:, seq_length + 2:2 * seq_length + 2, 0:self.control_bits] = np.tile(ctrl_aux, (self.batch_size, seq_length, 1)) # Set start aux serial reverse control marker. inputs[:, 2 * seq_length + 2, 0:self.control_bits] = np.tile(marker_start_aux_reverse, (self.batch_size, 1)) inputs[:, 2 * seq_length + 3:3 * seq_length + 3, 0:self.control_bits] = np.tile(ctrl_aux, (self.batch_size, seq_length, 1)) # 2. Generate targets. # Generate target: [BATCH_SIZE, 3*SEQ_LENGTH+3, DATA_BITS] (only data # bits!) targets = np.zeros( [self.batch_size, 3 * seq_length + 3, self.data_bits], dtype=np.float32) # Set bit sequence for serial recall. targets[:, seq_length + 2:2 * seq_length + 2, :] = bit_seq # Set bit sequence for reverse recall. targets[:, 2 * seq_length + 3:, :] = np.fliplr(bit_seq) # 3. Generate mask. # Generate target mask: [BATCH_SIZE, 3*SEQ_LENGTH+3] mask = torch.zeros([self.batch_size, 3 * seq_length + 3]).type(torch.ByteTensor) mask[:, seq_length + 2:2 * seq_length + 2] = 1 mask[:, 2 * seq_length + 3:] = 1 # PyTorch variables. ptinputs = torch.from_numpy(inputs).type(self.dtype) pttargets = torch.from_numpy(targets).type(self.dtype) # Return tuples. data_tuple = DataTuple(ptinputs, pttargets) aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]. Additional elements of sequence are start and stop control markers, stored in additional bits. :return: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS], :return: Output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS], :return: Mask [BATCH_SIZE, 2*SEQ_LENGTH+2] TODO: every item in batch has now the same seq_length. """ # Set sequence length. seq_length = np.random.randint( self.min_sequence_length, self.max_sequence_length + 1) # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X # DATA_BITS] bit_seq = np.random.binomial( 1, self.bias, (self.batch_size, seq_length, self.data_bits)) # Generate input: [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS] inputs = np.zeros([self.batch_size, 2 * seq_length + 2, self.control_bits + self.data_bits], dtype=np.float32) # Set start control marker. inputs[:, 0, 0] = 1 # Memorization bit. # Set bit sequence. inputs[:, 1:seq_length + 1, self.control_bits:self.control_bits + self.data_bits] = bit_seq # Set end control marker. inputs[:, seq_length + 1, 1] = 1 # Recall bit. # Generate target: [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS] (only data # bits!) targets = np.zeros([self.batch_size, 2 * seq_length + 2, self.data_bits], dtype=np.float32) # Rotate sequence by shifting the bits to right: data_bits >> num_bits num_bits = -self.num_bits # Check if we are using relative or absolute rotation. if -1 < num_bits < 1: num_bits = num_bits * self.data_bits # Round bitshift to int. num_bits = np.round(num_bits) # Modulo bitshift with data_bits. num_bits = int(num_bits % self.data_bits) # Apply items shift bit_seq = np.concatenate( (bit_seq[:, :, num_bits:], bit_seq[:, :, :num_bits]), axis=2) targets[:, seq_length + 2:, :] = bit_seq # Generate target mask: [BATCH_SIZE, 2*SEQ_LENGTH+2] mask = torch.zeros([self.batch_size, 2 * seq_length + 2] ).type(torch.ByteTensor) mask[:, seq_length + 2:] = 1 # PyTorch variables. ptinputs = torch.from_numpy(inputs).type(self.dtype) pttargets = torch.from_numpy(targets).type(self.dtype) # Return tuples. data_tuple = DataTuple(ptinputs, pttargets) aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, SEQ_LENGTH, CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences and its lengths. :returns: Tuple consisting of: inputs, target and mask pattern of inputs: # x1 % y1 & d1 # x2 % y2 & d2 ... # xn % yn & dn $ d` pattern of target: d d F(y1) d d F(y2) ... d d F(yn) all(xi) F: inversion function mask: used to mask the data part of the target. xi, yi, and dn(d'): sub sequences x of random length, sub sequence y of random length and dummies. """ # define control channel markers pos = [0, 0, 0, 0] ctrl_data = [0, 0, 0, 0] ctrl_dummy = [0, 0, 1, 0] ctrl_inter = [0, 0, 0, 1] # assign markers markers = ctrl_data, ctrl_dummy, pos # number of sub_sequences nb_sub_seq_a = np.random.randint( self.num_subseq_min, self.num_subseq_max + 1) # might be different in future implementation nb_sub_seq_b = nb_sub_seq_a # set the sequence length of each marker seq_lengths_a = np.random.randint( low=self.min_sequence_length, high=self.max_sequence_length + 1, size=nb_sub_seq_a) seq_lengths_b = np.random.randint( low=self.min_sequence_length, high=self.max_sequence_length + 1, size=nb_sub_seq_b) # generate subsequences for x and y x = [ np.random.binomial( 1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_lengths_a] y = [ np.random.binomial( 1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_lengths_b] # create the target target = np.concatenate(y + x, axis=1) # add marker at the begging of x and dummies of same length, also a # marker at the begging of dummies is added xx = [self.augment(seq, markers, ctrl_start=[ 1, 0, 0, 0], add_marker_data=True) for seq in x] # add dummies to y of same length, also a marker at the begging of dummies is added # TODO: ctrl_start is not needed here, this is replaced by ctrl_xy yy = [self.augment(seq, markers, ctrl_start=[0, 1, 0, 0], add_marker_data=False) for seq in y] # this is a marker to separate dummies of x and y at the end of the # sequence inter_seq = self.add_ctrl( np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos) ctrl_xy = np.zeros_like(ctrl_data) ctrl_xy[1] = 1 # this is a marker between sub sequence x and y inter_xy = self.add_ctrl( np.zeros((self.batch_size, 1, self.data_bits)), ctrl_xy, pos) # data which contains all xs and all ys plus dummies of ys data_1 = [ arr for a, b in zip(xx, yy) for arr in a[: -1] + [inter_xy] + [np.fliplr(b[0])] + [b[1]]] # dummies of xs data_2 = [a[-1][:, 1:, :] for a in xx] # concatenate all parts of the inputs inputs = np.concatenate(data_1 + [inter_seq] + data_2, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) target = torch.from_numpy(target).type(self.dtype) # create the mask mask_all = inputs[:, :, 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # rest ctrl channel of dummies inputs[:, mask[0], 0:self.control_bits] = 0 # Create the target with the dummies target_with_dummies = torch.zeros_like( inputs[:, :, self.control_bits:]) target_with_dummies[:, mask[0], :] = target # Return tuples. data_tuple = DataTuple(inputs, target_with_dummies) # Returning maximum length of subsequence a - for now. aux_tuple = AlgSeqAuxTuple( mask, max(seq_lengths_a), nb_sub_seq_a + nb_sub_seq_b) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]. Additional elements of sequence are start and stop control markers, stored in additional bits. : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS], output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS], mask [BATCH_SIZE, 2*SEQ_LENGTH+2] TODO: every item in batch has now the same seq_length. """ assert (self.max_sequence_length > self.seq_start) # define control channel markers pos = np.zeros(self.control_bits) # [0, 0, 0] ctrl_data = np.zeros(self.control_bits) # [0, 0, 0] ctrl_inter = np.zeros(self.control_bits) ctrl_inter[1] = 1 # [0, 1, 0] ctrl_y = np.zeros(self.control_bits) ctrl_y[2] = 1 # [0, 0, 1] ctrl_dummy = np.ones(self.control_bits) # [1, 1, 1] ctrl_start = np.zeros(self.control_bits) ctrl_start[0] = 1 # [1, 0, 0] # assign markers markers = ctrl_data, ctrl_dummy, pos # Set sequence length seq_length = np.random.randint(self.min_sequence_length, self.max_sequence_length + 1) # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X # DATA_BITS] bit_seq = np.random.binomial( 1, self.bias, (self.batch_size, seq_length, self.data_bits)) # Generate target by indexing through the array target_seq = np.array(bit_seq[:, self.seq_start::self.skip_length, :]) # generate subsequences for x and y x = [np.array(bit_seq)] # data of x and dummies xx = [ self.augment(seq, markers, ctrl_start=ctrl_start, add_marker_data=True, add_marker_dummy=False) for seq in x ] # data of x data_1 = [arr for a in xx for arr in a[:-1]] # this is a marker between sub sequence x and dummies #inter_seq = [add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)] # dummies output markers2 = ctrl_dummy, ctrl_dummy, pos yy = [ self.augment(np.zeros(target_seq.shape), markers2, ctrl_start=ctrl_inter, add_marker_data=True, add_marker_dummy=False) ] data_2 = [arr for a in yy for arr in a[:-1]] # add dummies to target seq_length_tdummies = seq_length + 2 dummies_target = np.zeros( [self.batch_size, seq_length_tdummies, self.data_bits], dtype=np.float32) targets = np.concatenate((dummies_target, target_seq), axis=1) inputs = np.concatenate(data_1 + data_2, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) targets = torch.from_numpy(targets).type(self.dtype) # TODO: batch might have different sequence lengths mask_all = inputs[..., 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # TODO: fix the batch indexing # rest channel values of data dummies #inputs[:, mask[0], 0:self.control_bits] = ctrl_dummy # TODO: fix the batch indexing # rest channel values of data dummies inputs[:, mask[0], 0:self.control_bits] = torch.tensor(ctrl_y).type(self.dtype) # Return tuples. data_tuple = DataTuple(inputs, targets) aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, SEQ_LENGTH, CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences and its lengths. :returns: Tuple consisting of: input, output and mask pattern of inputs: # x1 % y1 # x2 % y2 ... # xn % yn & d pattern of target: dummies ... ... ... ... all(xi) mask: used to mask the data part of the target. xi, yi, and d: sub sequences x of random length, sub sequence y of random length and dummies. """ # define control channel markers pos = [0, 0, 0, 0] ctrl_data = [0, 0, 0, 0] ctrl_dummy = [0, 0, 1, 0] ctrl_inter = [0, 0, 0, 1] # assign markers markers = ctrl_data, ctrl_dummy, pos # number of sub_sequences nb_sub_seq_a = np.random.randint(self.num_subseq_min, self.num_subseq_max + 1) # might be different in future implementation nb_sub_seq_b = nb_sub_seq_a # set the sequence length of each marker seq_lengths_a = np.random.randint(low=self.min_sequence_length, high=self.max_sequence_length + 1, size=nb_sub_seq_a) seq_lengths_b = np.random.randint(low=self.min_sequence_length, high=self.max_sequence_length + 1, size=nb_sub_seq_b) # generate subsequences for x and y x = [ np.random.binomial(1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_lengths_a ] y = [ np.random.binomial(1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_lengths_b ] # create the target target = np.concatenate(x, axis=1) # add marker at the begging of x and dummies of same length xx = [ self.augment(seq, markers, ctrl_start=[1, 0, 0, 0], add_marker_data=True, add_marker_dummy=False) for seq in x ] # add marker at the begging of y and dummies of same length, also a marker at the begging of dummies is added # TODO: as we don't need the dummies here (no y needs recalling), we # should add an arguements specifying if dummies are needed or not yy = [ self.augment(seq, markers, ctrl_start=[0, 1, 0, 0], add_marker_data=True) for seq in y ] # this is a marker to separate dummies of x and y at the end of the # sequence inter_seq = self.add_ctrl( np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos) # data which contains all xs and all ys data_1 = [arr for a, b in zip(xx, yy) for arr in a[:-1] + b[:-1]] # dummies of y and xs data_2 = [inter_seq] + [a[-1] for a in xx] # concatenate all parts of the inputs inputs = np.concatenate(data_1 + data_2, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) target = torch.from_numpy(target).type(self.dtype) # create the mask mask_all = inputs[:, :, 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # rest ctrl channel of dummies inputs[:, mask[0], 0:self.control_bits] = 0 # Create the target with the dummies target_with_dummies = torch.zeros_like(inputs[:, :, self.control_bits:]) target_with_dummies[:, mask[0], :] = target # Return data tuple. data_tuple = DataTuple(inputs, target_with_dummies) # Returning maximum length of sequence a - for now. aux_tuple = AlgSeqAuxTuple(mask, max(seq_lengths_a), nb_sub_seq_a + nb_sub_seq_b) return data_tuple, aux_tuple
def generate_batch(self): """ # TODO : Documentation will be added soon """ # define control channel markers pos = [0, 0] ctrl_data = [0, 0] ctrl_dummy = [0, 1] ctrl_inter = [0, 1] # assign markers markers = ctrl_data, ctrl_dummy, pos # number sub sequences num_sub_seq = np.random.randint(self.num_subseq_min, self.num_subseq_max + 1) # set the sequence length of each marker seq_length = np.random.randint(low=self.min_sequence_length, high=self.max_sequence_length + 1, size=num_sub_seq) # generate subsequences for x and y x = [ np.random.binomial(1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_length ] x_last = [a[:, None, -1, :] for a in x] # create the target seq_length_tdummies = sum(seq_length) + seq_length.shape[0] + 1 dummies_target = np.zeros( [self.batch_size, seq_length_tdummies, self.data_bits], dtype=np.float32) targets = np.concatenate([dummies_target] + x_last, axis=1) # data of x and dummies xx = [ self.augment(seq, markers, ctrl_start=[1, 0], add_marker_data=True, add_marker_dummy=False) for seq in x ] # data of x data_1 = [arr for a in xx for arr in a[:-1]] # this is a marker between sub sequence x and dummies inter_seq = self.add_ctrl( np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos) # dummies of x x_dummy_last = [a[:, None, -1, :] for b in xx for a in b[-1:]] # concatenate all parts of the inputs inputs = np.concatenate(data_1 + [inter_seq] + x_dummy_last, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) targets = torch.from_numpy(targets).type(self.dtype) # TODO: batch might have different sequence lengths mask_all = inputs[..., 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # TODO: fix the batch indexing # rest channel values of data dummies inputs[:, mask[0], 0:self.control_bits] = 0 # Return tuples. data_tuple = DataTuple(inputs, targets) # Returning maximum sequence length - for now. aux_tuple = AlgSeqAuxTuple(mask, max(seq_length), num_sub_seq) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, SEQ_LENGTH, CONTROL_BITS+DATA_BITS]. :returns: Tuple consisting of: input, output and mask pattern of inputs: x1, x2, d pattern of target: d, d, e mask: used to mask the data part of the target where x1 and x2 are subsequences and d are dummies """ # define control channel markers # pos = [0, 0, 0] pos = np.zeros(self.control_bits) # [0, 0, 0] # ctrl_data = [0, 0, 0] ctrl_data = np.zeros(self.control_bits) # [0, 0, 0] # ctrl_dummy = [0, 0, 0 ] ctrl_dummy = np.zeros(self.control_bits) # ctrl_inter = [0, 1, 0] ctrl_inter = np.zeros(self.control_bits) ctrl_inter[1] = 1 # [0, 1, 0] # ctrl_y = [0, 0, 1] ctrl_y = np.zeros(self.control_bits) ctrl_y[2] = 1 # [0, 1, 0] # ctrl_start = [1, 0, 0] ctrl_start = np.zeros(self.control_bits) ctrl_start[0] = 1 # [1, 0, 0] # assign markers markers = ctrl_data, ctrl_dummy, pos # set the sequence length of each marker seq_length = np.random.randint(low=self.min_sequence_length, high=self.max_sequence_length + 1) # generate subsequences for x and y x = [ np.array( np.random.binomial( 1, self.bias, (self.batch_size, seq_length, self.data_bits))) ] # Generate the second sequence which is either a scrambled version of the first # or exactly identical with approximately 50% probability (technically the scrambling # allows them to be the same with a very low chance) # First generate a random binomial of the same size as x, this will be # used be used with an xor operation to scamble x to get y xor_scrambler = np.array(np.random.binomial(1, self.bias, x[0].shape)) # Create a mask that will set entire batches of the xor_scrambler to zero. The batches that are zero # will force the xor to return the original x for that batch scrambler_mask = np.array( np.random.binomial(1, self.bias, (self.batch_size, ))) xor_scrambler = np.array(xor_scrambler * scrambler_mask[:, np.newaxis, np.newaxis]) aux_seq = np.fliplr(np.logical_xor(x[0], xor_scrambler)) # if the xor scambler is all zeros then x and y will be the same so # target will be true actual_target = np.array(np.any(xor_scrambler, axis=(1, 2))) if self.predict_inverse: # if the xor scambler is all zeros then x and y will be the same so # target will be true actual_target = actual_target[:, np.newaxis, np.newaxis] else: actual_target = np.logical_not(actual_target[:, np.newaxis, np.newaxis]) # create the target seq_length_tdummies = 2 * seq_length + 1 dummies_target = np.zeros([self.batch_size, seq_length_tdummies, 1], dtype=np.float32) target = np.concatenate((dummies_target, actual_target), axis=1) # data of x and dummies xx = [ self.augment(seq, markers, ctrl_start=ctrl_start, add_marker_data=True, add_marker_dummy=False) for seq in x ] # data of x data_1 = [arr for a in xx for arr in a[:-1]] # this is a marker between sub sequence x and dummies inter_seq = [ self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos) ] # Second Sequence for comparison markers2 = ctrl_y, ctrl_dummy, pos yy = [ self.augment(aux_seq, markers2, ctrl_start=ctrl_y, add_marker_data=False, add_marker_dummy=False) ] data_2 = [arr for a in yy for arr in a[:-1]] data_2[0][:, -1, 0:self.control_bits] = np.ones(len(ctrl_dummy)) #ctrl_data_select = [1,0] #aux_seq_wctrls=add_ctrl(aux_seq, ctrl_data_select, pos) # aux_seq_wctrls[:,-1,0:self.control_bits]=np.ones(len(ctrl_dummy)) #data_2 = [aux_seq_wctrls] recall_seq = [ self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)), ctrl_dummy, pos) ] dummy_data = [ self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)), np.ones(len(ctrl_dummy)), pos) ] # print(data_1[0].shape) # print(inter_seq[0].shape) # print(data_2[0].shape) # concatenate all parts of the inputs inputs = np.concatenate(data_1 + inter_seq + data_2, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) target = torch.from_numpy(target).type(self.dtype) # Mask. mask_all = inputs[..., 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # TODO: fix the batch indexing # rest channel values of data dummies inputs[:, mask[0], 0:self.control_bits] = torch.tensor(ctrl_y).type(self.dtype) # Return tuples. data_tuple = DataTuple(inputs, target) aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1) return data_tuple, aux_tuple
def generate_batch(self): """ Generates a batch of size [BATCH_SIZE, SEQ_LENGTH, CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences and its lengths. :returns: Tuple consisting of: input, output and mask pattern of inputs: x1, x2, ...xn d pattern of target: d, d, ...d xn mask: used to mask the data part of the target xi, d: sub sequences, dummies TODO: deal with batch_size > 1 """ # define control channel markers pos = [0, 0] ctrl_data = [0, 0] ctrl_dummy = [0, 1] ctrl_inter = [0, 1] # assign markers markers = ctrl_data, ctrl_dummy, pos # number sub sequences num_sub_seq = np.random.randint(self.num_subseq_min, self.num_subseq_max + 1) # set the sequence length of each marker seq_length = np.random.randint(low=self.min_sequence_length, high=self.max_sequence_length + 1, size=num_sub_seq) # generate subsequences for x and y x = [ np.random.binomial(1, self.bias, (self.batch_size, n, self.data_bits)) for n in seq_length ] # create the target seq_length_tdummies = sum(seq_length) + seq_length.shape[0] + 1 dummies_target = np.zeros( [self.batch_size, seq_length_tdummies, self.data_bits], dtype=np.float32) targets = np.concatenate((dummies_target, x[-1]), axis=1) # data of x and dummies xx = [ self.augment(seq, markers, ctrl_start=[1, 0], add_marker_data=True, add_marker_dummy=False) for seq in x ] # data of x data_1 = [arr for a in xx for arr in a[:-1]] # this is a marker between sub sequence x and dummies inter_seq = self.add_ctrl( np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos) # dummies of x data_2 = [xx[-1][-1]] # concatenate all parts of the inputs inputs = np.concatenate(data_1 + [inter_seq] + data_2, axis=1) # PyTorch variables inputs = torch.from_numpy(inputs).type(self.dtype) targets = torch.from_numpy(targets).type(self.dtype) # TODO: batch might have different sequence lengths mask_all = inputs[..., 0:self.control_bits] == 1 mask = mask_all[..., 0] for i in range(self.control_bits): mask = mask_all[..., i] * mask # TODO: fix the batch indexing # rest channel values of data dummies inputs[:, mask[0], 0:self.control_bits] = 0 # Return tuples. data_tuple = DataTuple(inputs, targets) # Returning maximum sequence length - for now. aux_tuple = AlgSeqAuxTuple(mask, max(seq_length), num_sub_seq) return data_tuple, aux_tuple