Beispiel #1
0
def create_pyreader(args,
                    file_name,
                    feed_list,
                    place,
                    model='lac',
                    reader=None,
                    return_reader=False,
                    mode='train'):
    # init reader
    pyreader = fluid.io.PyReader(feed_list=feed_list,
                                 capacity=300,
                                 use_double_buffer=True,
                                 iterable=True)
    if model == 'lac':
        if reader == None:
            reader = Dataset(args)
        # create lac pyreader
        if mode == 'train':
            pyreader.decorate_sample_list_generator(paddle.batch(
                paddle.reader.shuffle(reader.file_reader(file_name),
                                      buf_size=args.traindata_shuffle_buffer),
                batch_size=args.batch_size),
                                                    places=place)
        else:
            pyreader.decorate_sample_list_generator(paddle.batch(
                reader.file_reader(file_name, mode=mode),
                batch_size=args.batch_size),
                                                    places=place)

    elif model == 'ernie':
        # create ernie pyreader
        if reader == None:
            reader = task_reader.SequenceLabelReader(
                vocab_path=args.vocab_path,
                label_map_config=args.label_map_config,
                max_seq_len=args.max_seq_len,
                do_lower_case=args.do_lower_case,
                in_tokens=False,
                random_seed=args.random_seed)

        if mode == 'train':
            pyreader.decorate_batch_generator(reader.data_generator(
                file_name,
                args.batch_size,
                args.epoch,
                shuffle=True,
                phase="train"),
                                              places=place)
        else:
            pyreader.decorate_batch_generator(
                reader.data_generator(file_name,
                                      args.batch_size,
                                      epoch=1,
                                      shuffle=False,
                                      phase=mode),
                places=place)

    if return_reader:
        return pyreader, reader
    else:
        return pyreader
Beispiel #2
0
def create_pyreader(args,
                    file_name,
                    feed_list,
                    place,
                    model='lac',
                    reader=None,
                    return_reader=False,
                    mode='train'):
    # init reader
    device_count = len(fluid.cuda_places()) if args.use_cuda else len(
        fluid.cpu_places())

    if model == 'lac':
        pyreader = fluid.io.DataLoader.from_generator(feed_list=feed_list,
                                                      capacity=50,
                                                      use_double_buffer=True,
                                                      iterable=True)

        if reader == None:
            reader = Dataset(args)

        # create lac pyreader
        if mode == 'train':
            pyreader.set_sample_list_generator(fluid.io.batch(
                fluid.io.shuffle(reader.file_reader(file_name),
                                 buf_size=args.traindata_shuffle_buffer),
                batch_size=args.batch_size / device_count),
                                               places=place)
        else:
            pyreader.set_sample_list_generator(fluid.io.batch(
                reader.file_reader(file_name, mode=mode),
                batch_size=args.batch_size / device_count),
                                               places=place)

    elif model == 'ernie':
        # create ernie pyreader
        pyreader = fluid.io.DataLoader.from_generator(feed_list=feed_list,
                                                      capacity=50,
                                                      use_double_buffer=True,
                                                      iterable=True)
        if reader == None:
            reader = SequenceLabelReader(
                vocab_path=args.vocab_path,
                label_map_config=args.label_map_config,
                max_seq_len=args.max_seq_len,
                do_lower_case=args.do_lower_case,
                random_seed=args.random_seed)

        if mode == 'train':
            pyreader.set_batch_generator(reader.data_generator(file_name,
                                                               args.batch_size,
                                                               args.epoch,
                                                               shuffle=True,
                                                               phase="train"),
                                         places=place)
        else:
            pyreader.set_batch_generator(reader.data_generator(file_name,
                                                               args.batch_size,
                                                               epoch=1,
                                                               shuffle=False,
                                                               phase=mode),
                                         places=place)
    if return_reader:
        return pyreader, reader
    else:
        return pyreader