def get_train_dataloader(self): """ Returns the training :class:`~torch.utils.data.DataLoader`. Will use no sampler if :obj:`self.train_dataset` does not implement :obj:`__len__`, a random sampler (adapted to distributed training if necessary) otherwise. Subclass and override this method if you want to inject some custom behavior. """ print("继承了类 %s %s",100,self.train_dataset.__len__()) if self.train_dataset is None: raise ValueError("Trainer: training requires a train_dataset.") train_dataset = self.train_dataset if is_datasets_available() and isinstance(train_dataset, datasets.Dataset): train_dataset = self._remove_unused_columns(train_dataset, description="training") train_sampler = self._get_train_sampler() # 在这里添加一个id序列,让他经过相同的sample操作,得到变换后的id序列 # self.train_dataset.__len__() result = DataLoader( self.train_dataset, batch_size=self.args.train_batch_size, # shuffle=True, sampler=train_sampler, collate_fn=self.data_collator, drop_last=self.args.dataloader_drop_last, num_workers=self.args.dataloader_num_workers, pin_memory=self.args.dataloader_pin_memory, # generator=g, ) return result
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ A subclass of `Trainer` specific to Question-Answering tasks """ from transformers import Trainer, is_datasets_available, is_torch_tpu_available from transformers.trainer_utils import PredictionOutput if is_datasets_available(): import datasets if is_torch_tpu_available(): import torch_xla.core.xla_model as xm import torch_xla.debug.metrics as met class QuestionAnsweringTrainer(Trainer): def __init__(self, *args, eval_examples=None, post_process_function=None, **kwargs): super().__init__(*args, **kwargs) self.eval_examples = eval_examples