parser.add_argument('--dropout', type=float, default=0.5) parser.add_argument('--emb_dim', type=int, default=64) parser.add_argument('--hidden_sizes', type=str, default='64,128,128') parser.add_argument('--l_2', type=float, default=.0) parser.add_argument('--filter_sizes', type=str, default='2,3,4') parser.add_argument('--num_filters', type=int, default=128) args = parser.parse_args() # ############################################################################## # Load data ################################################################################ from corpus import middle_load from data_loader import DataLoader data = middle_load(args.data) args.max_len = data["max_len"] args.vocab_size = data['dict']['vocab_size'] args.label_size = data['dict']['label_size'] args.hidden_sizes = list(map(int, args.hidden_sizes.split(","))) args.filter_sizes = list(map(int, args.filter_sizes.split(","))) training_data = DataLoader(data['train']['src'], data['train']['label'], args.max_len, args.label_size, batch_size=args.batch_size) validation_data = DataLoader(data['valid']['src'], data['valid']['label'], args.max_len,
def __next__(self): if self._step == self.stop_step: self._step = 0 raise StopIteration() _start = self._step * self._batch_size _bsz = min(self._batch_size, self.sents_size - _start) self._step += 1 data = data_pad(self._src_sents[_start:_start + _bsz], self._max_len) label = label_pad(self._label[_start:_start + _bsz], self.label_size) return data, label if __name__ == '__main__': from corpus import middle_load data = middle_load('./data/corpus') print(data['dict']['label']) i2w = {v: k for k, v in data['dict']['train'].items()} training_data = DataLoader(data['train']['src'], data['train']['label'], 16, 6, 8) data, label = next(training_data) for d, l in zip(data, label): print([i2w[i] for i in d]) print(l)