예제 #1
0
parser.add_argument('--dropout', type=float, default=0.5)
parser.add_argument('--emb_dim', type=int, default=64)
parser.add_argument('--hidden_sizes', type=str, default='64,128,128')
parser.add_argument('--l_2', type=float, default=.0)
parser.add_argument('--filter_sizes', type=str, default='2,3,4')
parser.add_argument('--num_filters', type=int, default=128)

args = parser.parse_args()

# ##############################################################################
# Load data
################################################################################
from corpus import middle_load
from data_loader import DataLoader

data = middle_load(args.data)
args.max_len = data["max_len"]
args.vocab_size = data['dict']['vocab_size']
args.label_size = data['dict']['label_size']
args.hidden_sizes = list(map(int, args.hidden_sizes.split(",")))
args.filter_sizes = list(map(int, args.filter_sizes.split(",")))

training_data = DataLoader(data['train']['src'],
                           data['train']['label'],
                           args.max_len,
                           args.label_size,
                           batch_size=args.batch_size)

validation_data = DataLoader(data['valid']['src'],
                             data['valid']['label'],
                             args.max_len,
예제 #2
0
    def __next__(self):
        if self._step == self.stop_step:
            self._step = 0
            raise StopIteration()

        _start = self._step * self._batch_size
        _bsz = min(self._batch_size, self.sents_size - _start)

        self._step += 1
        data = data_pad(self._src_sents[_start:_start + _bsz], self._max_len)
        label = label_pad(self._label[_start:_start + _bsz], self.label_size)

        return data, label


if __name__ == '__main__':
    from corpus import middle_load

    data = middle_load('./data/corpus')
    print(data['dict']['label'])

    i2w = {v: k for k, v in data['dict']['train'].items()}

    training_data = DataLoader(data['train']['src'], data['train']['label'],
                               16, 6, 8)

    data, label = next(training_data)

    for d, l in zip(data, label):
        print([i2w[i] for i in d])
        print(l)