Beispiel #1
0
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, 2*SEQ_LENGTH+2,
        CONTROL_BITS+DATA_BITS]. Additional elements of sequence are  start and
        stop control markers, stored in additional bits.

        : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS],
        output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS],
        mask [BATCH_SIZE, 2*SEQ_LENGTH+2]

        TODO: every item in batch has now the same seq_length.

        """
        # Set sequence length
        seq_length = np.random.randint(self.min_sequence_length,
                                       self.max_sequence_length + 1)

        # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X
        # DATA_BITS]
        bit_seq = np.random.binomial(
            1, self.bias, (self.batch_size, seq_length, self.data_bits))

        # Generate input:  [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]
        inputs = np.zeros([
            self.batch_size, 2 * seq_length + 2,
            self.control_bits + self.data_bits
        ],
                          dtype=np.float32)
        # Set start control marker.
        inputs[:, 0, 0] = 1  # Memorization bit.
        # Set bit sequence.
        inputs[:, 1:seq_length + 1,
               self.control_bits:self.control_bits + self.data_bits] = bit_seq
        # Set end control marker.
        inputs[:, seq_length + 1, 1] = 1  # Recall bit.

        # Generate target:  [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS] (only data
        # bits!)
        targets = np.zeros(
            [self.batch_size, 2 * seq_length + 2, self.data_bits],
            dtype=np.float32)
        # Set bit sequence.
        targets[:, seq_length + 2:, :] = bit_seq

        # Generate target mask: [BATCH_SIZE, 2*SEQ_LENGTH+2]
        mask = torch.zeros([self.batch_size,
                            2 * seq_length + 2]).type(torch.ByteTensor)
        mask[:, seq_length + 2:] = 1

        # PyTorch variables.
        ptinputs = torch.from_numpy(inputs).type(self.dtype)
        pttargets = torch.from_numpy(targets).type(self.dtype)

        # Return tuples.
        data_tuple = DataTuple(ptinputs, pttargets)
        aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1)

        return data_tuple, aux_tuple
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, 2*SEQ_LENGTH+2,
        CONTROL_BITS+DATA_BITS]. Additional elements of sequence are  start and
        stop control markers, stored in additional bits.

        : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS],
        output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS],
        mask [BATCH_SIZE, 2*SEQ_LENGTH+2]

        TODO: every item in batch has now the same seq_length.

        """
        # Define control channel bits.
        # ctrl_main = [0, 0, 0, 0] # not really used.
        #ctrl_aux = [0, 0, 0, 1]

        ctrl_aux = np.zeros(self.control_bits)
        if (self.control_bits == 4):
            ctrl_aux[3] = 1  # [0, 0, 0, 1]
        else:
            if self.randomize_control_lines:
                # Randomly pick one of the bits to be set.
                ctrl_bit = np.random.randint(3, self.control_bits)
                ctrl_aux[ctrl_bit] = 1
            else:
                ctrl_aux[self.control_bits - 1] = 1

        # Markers.
        marker_start_main = np.zeros(self.control_bits)
        marker_start_main[0] = 1  # [1, 0, 0, 0]
        marker_start_aux_serial = np.zeros(self.control_bits)
        marker_start_aux_serial[1] = 1  # [0, 1, 0, 0]
        marker_start_aux_reverse = np.zeros(self.control_bits)
        marker_start_aux_reverse[2] = 1  # [0, 0, 1, 0]

        # Set sequence length.
        seq_length = np.random.randint(self.min_sequence_length,
                                       self.max_sequence_length + 1)

        # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X
        # DATA_BITS]
        bit_seq = np.random.binomial(
            1, self.bias, (self.batch_size, seq_length, self.data_bits))

        # 1. Generate inputs.
        # Generate input:  [BATCH_SIZE, 3*SEQ_LENGTH+3, CONTROL_BITS+DATA_BITS]
        inputs = np.zeros([
            self.batch_size, 3 * seq_length + 3,
            self.control_bits + self.data_bits
        ],
                          dtype=np.float32)
        # Set start main control marker.
        inputs[:, 0, 0:self.control_bits] = np.tile(marker_start_main,
                                                    (self.batch_size, 1))

        # Set bit sequence.
        inputs[:, 1:seq_length + 1,
               self.control_bits:self.control_bits + self.data_bits] = bit_seq

        # Set start aux serial recall control marker.
        inputs[:, seq_length + 1,
               0:self.control_bits] = np.tile(marker_start_aux_serial,
                                              (self.batch_size, 1))
        inputs[:, seq_length + 2:2 * seq_length + 2,
               0:self.control_bits] = np.tile(ctrl_aux,
                                              (self.batch_size, seq_length, 1))

        # Set start aux serial reverse control marker.
        inputs[:, 2 * seq_length + 2,
               0:self.control_bits] = np.tile(marker_start_aux_reverse,
                                              (self.batch_size, 1))
        inputs[:, 2 * seq_length + 3:3 * seq_length + 3,
               0:self.control_bits] = np.tile(ctrl_aux,
                                              (self.batch_size, seq_length, 1))

        # 2. Generate targets.
        # Generate target:  [BATCH_SIZE, 3*SEQ_LENGTH+3, DATA_BITS] (only data
        # bits!)
        targets = np.zeros(
            [self.batch_size, 3 * seq_length + 3, self.data_bits],
            dtype=np.float32)
        # Set bit sequence for serial recall.
        targets[:, seq_length + 2:2 * seq_length + 2, :] = bit_seq
        # Set bit sequence for reverse recall.
        targets[:, 2 * seq_length + 3:, :] = np.fliplr(bit_seq)

        # 3. Generate mask.
        # Generate target mask: [BATCH_SIZE, 3*SEQ_LENGTH+3]
        mask = torch.zeros([self.batch_size,
                            3 * seq_length + 3]).type(torch.ByteTensor)
        mask[:, seq_length + 2:2 * seq_length + 2] = 1
        mask[:, 2 * seq_length + 3:] = 1

        # PyTorch variables.
        ptinputs = torch.from_numpy(inputs).type(self.dtype)
        pttargets = torch.from_numpy(targets).type(self.dtype)

        # Return tuples.
        data_tuple = DataTuple(ptinputs, pttargets)
        aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1)

        return data_tuple, aux_tuple
Beispiel #3
0
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, 2*SEQ_LENGTH+2,
        CONTROL_BITS+DATA_BITS]. Additional elements of sequence are  start and
        stop control markers, stored in additional bits.

        :return: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS],
        :return: Output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS],
        :return: Mask [BATCH_SIZE, 2*SEQ_LENGTH+2]

        TODO: every item in batch has now the same seq_length.

        """
        # Set sequence length.
        seq_length = np.random.randint(
            self.min_sequence_length, self.max_sequence_length + 1)

        # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X
        # DATA_BITS]
        bit_seq = np.random.binomial(
            1, self.bias, (self.batch_size, seq_length, self.data_bits))

        # Generate input:  [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS]
        inputs = np.zeros([self.batch_size,
                           2 * seq_length + 2,
                           self.control_bits + self.data_bits],
                          dtype=np.float32)
        # Set start control marker.
        inputs[:, 0, 0] = 1  # Memorization bit.
        # Set bit sequence.
        inputs[:, 1:seq_length + 1,
               self.control_bits:self.control_bits + self.data_bits] = bit_seq
        # Set end control marker.
        inputs[:, seq_length + 1, 1] = 1  # Recall bit.

        # Generate target:  [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS] (only data
        # bits!)
        targets = np.zeros([self.batch_size, 2 * seq_length + 2,
                            self.data_bits], dtype=np.float32)

        # Rotate sequence by shifting the bits to right: data_bits >> num_bits
        num_bits = -self.num_bits
        # Check if we are using relative or absolute rotation.
        if -1 < num_bits < 1:
            num_bits = num_bits * self.data_bits
        # Round bitshift  to int.
        num_bits = np.round(num_bits)
        # Modulo bitshift with data_bits.
        num_bits = int(num_bits % self.data_bits)

        # Apply items shift
        bit_seq = np.concatenate(
            (bit_seq[:, :, num_bits:], bit_seq[:, :, :num_bits]), axis=2)
        targets[:, seq_length + 2:, :] = bit_seq

        # Generate target mask: [BATCH_SIZE, 2*SEQ_LENGTH+2]
        mask = torch.zeros([self.batch_size, 2 * seq_length + 2]
                           ).type(torch.ByteTensor)
        mask[:, seq_length + 2:] = 1

        # PyTorch variables.
        ptinputs = torch.from_numpy(inputs).type(self.dtype)
        pttargets = torch.from_numpy(targets).type(self.dtype)

        # Return tuples.
        data_tuple = DataTuple(ptinputs, pttargets)
        aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1)

        return data_tuple, aux_tuple
Beispiel #4
0
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, SEQ_LENGTH,
        CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences
        and its lengths.

        :returns: Tuple consisting of: inputs, target and mask
                  pattern of inputs: # x1 % y1 & d1 # x2 % y2 & d2 ... # xn % yn & dn $ d`
                  pattern of target:    d   d   F(y1)  d  d    F(y2)  ... d   d   F(yn) all(xi)
                  F: inversion function
                  mask: used to mask the data part of the target.
                  xi, yi, and dn(d'): sub sequences x of random length, sub sequence y of random length and dummies.

        """
        # define control channel markers
        pos = [0, 0, 0, 0]
        ctrl_data = [0, 0, 0, 0]
        ctrl_dummy = [0, 0, 1, 0]
        ctrl_inter = [0, 0, 0, 1]

        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # number of sub_sequences
        nb_sub_seq_a = np.random.randint(
            self.num_subseq_min, self.num_subseq_max + 1)
        # might be different in future implementation
        nb_sub_seq_b = nb_sub_seq_a

        # set the sequence length of each marker
        seq_lengths_a = np.random.randint(
            low=self.min_sequence_length,
            high=self.max_sequence_length + 1,
            size=nb_sub_seq_a)
        seq_lengths_b = np.random.randint(
            low=self.min_sequence_length,
            high=self.max_sequence_length + 1,
            size=nb_sub_seq_b)

        #  generate subsequences for x and y
        x = [
            np.random.binomial(
                1,
                self.bias,
                (self.batch_size,
                 n,
                 self.data_bits)) for n in seq_lengths_a]
        y = [
            np.random.binomial(
                1,
                self.bias,
                (self.batch_size,
                 n,
                 self.data_bits)) for n in seq_lengths_b]

        # create the target
        target = np.concatenate(y + x, axis=1)

        # add marker at the begging of x and dummies of same length,  also a
        # marker at the begging of dummies is added
        xx = [self.augment(seq, markers, ctrl_start=[
                           1, 0, 0, 0], add_marker_data=True) for seq in x]
        # add dummies to y of same length, also a marker at the begging of dummies is added
        # TODO: ctrl_start is not needed here, this is replaced by ctrl_xy
        yy = [self.augment(seq, markers, ctrl_start=[0, 1, 0, 0],
                           add_marker_data=False) for seq in y]

        # this is a marker to separate dummies of x and y at the end of the
        # sequence
        inter_seq = self.add_ctrl(
            np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)
        ctrl_xy = np.zeros_like(ctrl_data)
        ctrl_xy[1] = 1

        # this is a marker between sub sequence x and y
        inter_xy = self.add_ctrl(
            np.zeros((self.batch_size, 1, self.data_bits)), ctrl_xy, pos)

        # data which contains all xs and all ys plus dummies of ys
        data_1 = [
            arr for a, b in zip(xx, yy)
            for arr in a[: -1] + [inter_xy] + [np.fliplr(b[0])] + [b[1]]]

        # dummies of xs
        data_2 = [a[-1][:, 1:, :] for a in xx]

        # concatenate all parts of the inputs
        inputs = np.concatenate(data_1 + [inter_seq] + data_2, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        target = torch.from_numpy(target).type(self.dtype)

        # create the mask
        mask_all = inputs[:, :, 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask

        # rest ctrl channel of dummies
        inputs[:, mask[0], 0:self.control_bits] = 0

        # Create the target with the dummies
        target_with_dummies = torch.zeros_like(
            inputs[:, :, self.control_bits:])
        target_with_dummies[:, mask[0], :] = target

        # Return tuples.
        data_tuple = DataTuple(inputs, target_with_dummies)
        # Returning maximum length of subsequence a - for now.
        aux_tuple = AlgSeqAuxTuple(
            mask, max(seq_lengths_a), nb_sub_seq_a + nb_sub_seq_b)

        return data_tuple, aux_tuple
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, 2*SEQ_LENGTH+2,
        CONTROL_BITS+DATA_BITS]. Additional elements of sequence are  start and
        stop control markers, stored in additional bits.

        : returns: Tuple consisting of: input [BATCH_SIZE, 2*SEQ_LENGTH+2, CONTROL_BITS+DATA_BITS],
        output [BATCH_SIZE, 2*SEQ_LENGTH+2, DATA_BITS],
        mask [BATCH_SIZE, 2*SEQ_LENGTH+2]

        TODO: every item in batch has now the same seq_length.

        """
        assert (self.max_sequence_length > self.seq_start)

        # define control channel markers
        pos = np.zeros(self.control_bits)  # [0, 0, 0]
        ctrl_data = np.zeros(self.control_bits)  # [0, 0, 0]
        ctrl_inter = np.zeros(self.control_bits)
        ctrl_inter[1] = 1  # [0, 1, 0]
        ctrl_y = np.zeros(self.control_bits)
        ctrl_y[2] = 1  # [0, 0, 1]
        ctrl_dummy = np.ones(self.control_bits)  # [1, 1, 1]
        ctrl_start = np.zeros(self.control_bits)
        ctrl_start[0] = 1  # [1, 0, 0]
        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # Set sequence length
        seq_length = np.random.randint(self.min_sequence_length,
                                       self.max_sequence_length + 1)

        # Generate batch of random bit sequences [BATCH_SIZE x SEQ_LENGTH X
        # DATA_BITS]
        bit_seq = np.random.binomial(
            1, self.bias, (self.batch_size, seq_length, self.data_bits))

        # Generate target by indexing through the array
        target_seq = np.array(bit_seq[:, self.seq_start::self.skip_length, :])

        #  generate subsequences for x and y
        x = [np.array(bit_seq)]
        # data of x and dummies
        xx = [
            self.augment(seq,
                         markers,
                         ctrl_start=ctrl_start,
                         add_marker_data=True,
                         add_marker_dummy=False) for seq in x
        ]

        # data of x
        data_1 = [arr for a in xx for arr in a[:-1]]

        # this is a marker between sub sequence x and dummies
        #inter_seq = [add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)]

        # dummies output
        markers2 = ctrl_dummy, ctrl_dummy, pos
        yy = [
            self.augment(np.zeros(target_seq.shape),
                         markers2,
                         ctrl_start=ctrl_inter,
                         add_marker_data=True,
                         add_marker_dummy=False)
        ]
        data_2 = [arr for a in yy for arr in a[:-1]]

        # add dummies to target
        seq_length_tdummies = seq_length + 2
        dummies_target = np.zeros(
            [self.batch_size, seq_length_tdummies, self.data_bits],
            dtype=np.float32)
        targets = np.concatenate((dummies_target, target_seq), axis=1)

        inputs = np.concatenate(data_1 + data_2, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        targets = torch.from_numpy(targets).type(self.dtype)
        # TODO: batch might have different sequence lengths
        mask_all = inputs[..., 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask
        # TODO: fix the batch indexing
        # rest channel values of data dummies
        #inputs[:, mask[0], 0:self.control_bits] = ctrl_dummy
        # TODO: fix the batch indexing
        # rest channel values of data dummies

        inputs[:, mask[0],
               0:self.control_bits] = torch.tensor(ctrl_y).type(self.dtype)

        # Return tuples.
        data_tuple = DataTuple(inputs, targets)
        aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1)

        return data_tuple, aux_tuple
Beispiel #6
0
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, SEQ_LENGTH,
        CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences
        and its lengths.

        :returns: Tuple consisting of: input, output and mask
                  pattern of inputs: # x1 % y1 # x2 % y2 ... # xn % yn & d
                  pattern of target: dummies ...   ...       ...   ...   all(xi)
                  mask: used to mask the data part of the target.
                  xi, yi, and d: sub sequences x of random length, sub sequence y of random length and dummies.

        """
        # define control channel markers
        pos = [0, 0, 0, 0]
        ctrl_data = [0, 0, 0, 0]
        ctrl_dummy = [0, 0, 1, 0]
        ctrl_inter = [0, 0, 0, 1]

        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # number of sub_sequences
        nb_sub_seq_a = np.random.randint(self.num_subseq_min,
                                         self.num_subseq_max + 1)
        # might be different in future implementation
        nb_sub_seq_b = nb_sub_seq_a

        # set the sequence length of each marker
        seq_lengths_a = np.random.randint(low=self.min_sequence_length,
                                          high=self.max_sequence_length + 1,
                                          size=nb_sub_seq_a)
        seq_lengths_b = np.random.randint(low=self.min_sequence_length,
                                          high=self.max_sequence_length + 1,
                                          size=nb_sub_seq_b)

        #  generate subsequences for x and y
        x = [
            np.random.binomial(1, self.bias,
                               (self.batch_size, n, self.data_bits))
            for n in seq_lengths_a
        ]
        y = [
            np.random.binomial(1, self.bias,
                               (self.batch_size, n, self.data_bits))
            for n in seq_lengths_b
        ]

        # create the target
        target = np.concatenate(x, axis=1)

        # add marker at the begging of x and dummies of same length
        xx = [
            self.augment(seq,
                         markers,
                         ctrl_start=[1, 0, 0, 0],
                         add_marker_data=True,
                         add_marker_dummy=False) for seq in x
        ]

        # add marker at the begging of y and dummies of same length,  also a marker at the begging of dummies is added
        # TODO: as we don't need the dummies here (no y needs recalling), we
        # should add an arguements specifying if dummies are needed or not
        yy = [
            self.augment(seq,
                         markers,
                         ctrl_start=[0, 1, 0, 0],
                         add_marker_data=True) for seq in y
        ]

        # this is a marker to separate dummies of x and y at the end of the
        # sequence
        inter_seq = self.add_ctrl(
            np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)

        # data which contains all xs and all ys
        data_1 = [arr for a, b in zip(xx, yy) for arr in a[:-1] + b[:-1]]

        # dummies of y and xs
        data_2 = [inter_seq] + [a[-1] for a in xx]

        # concatenate all parts of the inputs
        inputs = np.concatenate(data_1 + data_2, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        target = torch.from_numpy(target).type(self.dtype)

        # create the mask
        mask_all = inputs[:, :, 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask

        # rest ctrl channel of dummies
        inputs[:, mask[0], 0:self.control_bits] = 0

        # Create the target with the dummies
        target_with_dummies = torch.zeros_like(inputs[:, :,
                                                      self.control_bits:])
        target_with_dummies[:, mask[0], :] = target

        # Return data tuple.
        data_tuple = DataTuple(inputs, target_with_dummies)
        # Returning maximum length of sequence a - for now.
        aux_tuple = AlgSeqAuxTuple(mask, max(seq_lengths_a),
                                   nb_sub_seq_a + nb_sub_seq_b)

        return data_tuple, aux_tuple
Beispiel #7
0
    def generate_batch(self):
        """
        # TODO : Documentation will be added soon
        """

        # define control channel markers
        pos = [0, 0]
        ctrl_data = [0, 0]
        ctrl_dummy = [0, 1]
        ctrl_inter = [0, 1]
        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # number sub sequences
        num_sub_seq = np.random.randint(self.num_subseq_min,
                                        self.num_subseq_max + 1)

        # set the sequence length of each marker
        seq_length = np.random.randint(low=self.min_sequence_length,
                                       high=self.max_sequence_length + 1,
                                       size=num_sub_seq)

        #  generate subsequences for x and y
        x = [
            np.random.binomial(1, self.bias,
                               (self.batch_size, n, self.data_bits))
            for n in seq_length
        ]
        x_last = [a[:, None, -1, :] for a in x]

        # create the target
        seq_length_tdummies = sum(seq_length) + seq_length.shape[0] + 1
        dummies_target = np.zeros(
            [self.batch_size, seq_length_tdummies, self.data_bits],
            dtype=np.float32)
        targets = np.concatenate([dummies_target] + x_last, axis=1)

        # data of x and dummies
        xx = [
            self.augment(seq,
                         markers,
                         ctrl_start=[1, 0],
                         add_marker_data=True,
                         add_marker_dummy=False) for seq in x
        ]

        # data of x
        data_1 = [arr for a in xx for arr in a[:-1]]

        # this is a marker between sub sequence x and dummies
        inter_seq = self.add_ctrl(
            np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)

        # dummies of x
        x_dummy_last = [a[:, None, -1, :] for b in xx for a in b[-1:]]

        # concatenate all parts of the inputs
        inputs = np.concatenate(data_1 + [inter_seq] + x_dummy_last, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        targets = torch.from_numpy(targets).type(self.dtype)
        # TODO: batch might have different sequence lengths
        mask_all = inputs[..., 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask

        # TODO: fix the batch indexing
        # rest channel values of data dummies
        inputs[:, mask[0], 0:self.control_bits] = 0

        # Return tuples.
        data_tuple = DataTuple(inputs, targets)
        # Returning maximum sequence length - for now.
        aux_tuple = AlgSeqAuxTuple(mask, max(seq_length), num_sub_seq)

        return data_tuple, aux_tuple
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, SEQ_LENGTH,
        CONTROL_BITS+DATA_BITS].

        :returns: Tuple consisting of: input, output and mask
                  pattern of inputs: x1, x2, d
                  pattern of target: d, d, e
                  mask: used to mask the data part of the target
                  where x1 and x2 are subsequences and d are dummies

        """
        # define control channel markers
        # pos = [0, 0, 0]
        pos = np.zeros(self.control_bits)  # [0, 0, 0]

        # ctrl_data = [0, 0, 0]
        ctrl_data = np.zeros(self.control_bits)  # [0, 0, 0]

        # ctrl_dummy = [0, 0, 0 ]
        ctrl_dummy = np.zeros(self.control_bits)

        # ctrl_inter = [0, 1, 0]
        ctrl_inter = np.zeros(self.control_bits)
        ctrl_inter[1] = 1  # [0, 1, 0]

        # ctrl_y = [0, 0, 1]
        ctrl_y = np.zeros(self.control_bits)
        ctrl_y[2] = 1  # [0, 1, 0]

        # ctrl_start = [1, 0, 0]
        ctrl_start = np.zeros(self.control_bits)
        ctrl_start[0] = 1  # [1, 0, 0]

        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # set the sequence length of each marker
        seq_length = np.random.randint(low=self.min_sequence_length,
                                       high=self.max_sequence_length + 1)

        #  generate subsequences for x and y
        x = [
            np.array(
                np.random.binomial(
                    1, self.bias,
                    (self.batch_size, seq_length, self.data_bits)))
        ]

        # Generate the second sequence which is either a scrambled version of the first
        # or exactly identical with approximately 50% probability (technically the scrambling
        # allows them to be the same with a very low chance)

        # First generate a random binomial of the same size as x, this will be
        # used be used with an xor operation to scamble x to get y
        xor_scrambler = np.array(np.random.binomial(1, self.bias, x[0].shape))

        # Create a mask that will set entire batches of the xor_scrambler to zero. The batches that are zero
        # will force the xor to return the original x for that batch
        scrambler_mask = np.array(
            np.random.binomial(1, self.bias, (self.batch_size, )))
        xor_scrambler = np.array(xor_scrambler *
                                 scrambler_mask[:, np.newaxis, np.newaxis])

        aux_seq = np.fliplr(np.logical_xor(x[0], xor_scrambler))

        # if the xor scambler is all zeros then x and y will be the same so
        # target will be true
        actual_target = np.array(np.any(xor_scrambler, axis=(1, 2)))

        if self.predict_inverse:
            # if the xor scambler is all zeros then x and y will be the same so
            # target will be true
            actual_target = actual_target[:, np.newaxis, np.newaxis]
        else:
            actual_target = np.logical_not(actual_target[:, np.newaxis,
                                                         np.newaxis])

        # create the target
        seq_length_tdummies = 2 * seq_length + 1
        dummies_target = np.zeros([self.batch_size, seq_length_tdummies, 1],
                                  dtype=np.float32)
        target = np.concatenate((dummies_target, actual_target), axis=1)

        # data of x and dummies
        xx = [
            self.augment(seq,
                         markers,
                         ctrl_start=ctrl_start,
                         add_marker_data=True,
                         add_marker_dummy=False) for seq in x
        ]

        # data of x
        data_1 = [arr for a in xx for arr in a[:-1]]

        # this is a marker between sub sequence x and dummies
        inter_seq = [
            self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)),
                          ctrl_inter, pos)
        ]

        # Second Sequence for comparison
        markers2 = ctrl_y, ctrl_dummy, pos
        yy = [
            self.augment(aux_seq,
                         markers2,
                         ctrl_start=ctrl_y,
                         add_marker_data=False,
                         add_marker_dummy=False)
        ]
        data_2 = [arr for a in yy for arr in a[:-1]]
        data_2[0][:, -1, 0:self.control_bits] = np.ones(len(ctrl_dummy))
        #ctrl_data_select = [1,0]
        #aux_seq_wctrls=add_ctrl(aux_seq, ctrl_data_select, pos)
        # aux_seq_wctrls[:,-1,0:self.control_bits]=np.ones(len(ctrl_dummy))
        #data_2 = [aux_seq_wctrls]

        recall_seq = [
            self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)),
                          ctrl_dummy, pos)
        ]
        dummy_data = [
            self.add_ctrl(np.zeros((self.batch_size, 1, self.data_bits)),
                          np.ones(len(ctrl_dummy)), pos)
        ]

        # print(data_1[0].shape)
        # print(inter_seq[0].shape)
        # print(data_2[0].shape)
        # concatenate all parts of the inputs
        inputs = np.concatenate(data_1 + inter_seq + data_2, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        target = torch.from_numpy(target).type(self.dtype)

        # Mask.
        mask_all = inputs[..., 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask

        # TODO: fix the batch indexing
        # rest channel values of data dummies
        inputs[:, mask[0],
               0:self.control_bits] = torch.tensor(ctrl_y).type(self.dtype)

        # Return tuples.
        data_tuple = DataTuple(inputs, target)
        aux_tuple = AlgSeqAuxTuple(mask, seq_length, 1)

        return data_tuple, aux_tuple
Beispiel #9
0
    def generate_batch(self):
        """
        Generates a batch  of size [BATCH_SIZE, SEQ_LENGTH,
        CONTROL_BITS+DATA_BITS]. SEQ_LENGTH depends on number of sub-sequences
        and its lengths.

        :returns: Tuple consisting of: input, output and mask
                  pattern of inputs: x1, x2, ...xn d
                  pattern of target: d, d,   ...d xn
                  mask: used to mask the data part of the target
                  xi, d: sub sequences, dummies

        TODO: deal with batch_size > 1

        """
        # define control channel markers
        pos = [0, 0]
        ctrl_data = [0, 0]
        ctrl_dummy = [0, 1]
        ctrl_inter = [0, 1]
        # assign markers
        markers = ctrl_data, ctrl_dummy, pos

        # number sub sequences
        num_sub_seq = np.random.randint(self.num_subseq_min,
                                        self.num_subseq_max + 1)

        # set the sequence length of each marker
        seq_length = np.random.randint(low=self.min_sequence_length,
                                       high=self.max_sequence_length + 1,
                                       size=num_sub_seq)

        #  generate subsequences for x and y
        x = [
            np.random.binomial(1, self.bias,
                               (self.batch_size, n, self.data_bits))
            for n in seq_length
        ]

        # create the target
        seq_length_tdummies = sum(seq_length) + seq_length.shape[0] + 1
        dummies_target = np.zeros(
            [self.batch_size, seq_length_tdummies, self.data_bits],
            dtype=np.float32)
        targets = np.concatenate((dummies_target, x[-1]), axis=1)

        # data of x and dummies
        xx = [
            self.augment(seq,
                         markers,
                         ctrl_start=[1, 0],
                         add_marker_data=True,
                         add_marker_dummy=False) for seq in x
        ]

        # data of x
        data_1 = [arr for a in xx for arr in a[:-1]]

        # this is a marker between sub sequence x and dummies
        inter_seq = self.add_ctrl(
            np.zeros((self.batch_size, 1, self.data_bits)), ctrl_inter, pos)

        # dummies of x
        data_2 = [xx[-1][-1]]

        # concatenate all parts of the inputs
        inputs = np.concatenate(data_1 + [inter_seq] + data_2, axis=1)

        # PyTorch variables
        inputs = torch.from_numpy(inputs).type(self.dtype)
        targets = torch.from_numpy(targets).type(self.dtype)
        # TODO: batch might have different sequence lengths
        mask_all = inputs[..., 0:self.control_bits] == 1
        mask = mask_all[..., 0]
        for i in range(self.control_bits):
            mask = mask_all[..., i] * mask

        # TODO: fix the batch indexing
        # rest channel values of data dummies
        inputs[:, mask[0], 0:self.control_bits] = 0

        # Return tuples.
        data_tuple = DataTuple(inputs, targets)
        # Returning maximum sequence length - for now.
        aux_tuple = AlgSeqAuxTuple(mask, max(seq_length), num_sub_seq)

        return data_tuple, aux_tuple