'learning_rate_0') propeller.summary.scalar('lr', scheduled_lr) predictions = [ logits, ] train_hooks = [lr_step_hook] return propeller.ModelSpec(loss=loss, mode=mode, metrics=metrics, predictions=predictions, train_hooks=train_hooks) if __name__ == '__main__': parser = propeller.ArgumentParser('DAN model with Paddle') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--max_seqlen', type=int, default=128) parser.add_argument('--data_dir', type=str, required=True) parser.add_argument('--from_pretrained', type=str, required=True) parser.add_argument('--warm_start_from', type=str) parser.add_argument('--epoch', type=int, default=3) parser.add_argument('--use_amp', action='store_true') args = parser.parse_args() P.enable_static() if not os.path.exists(args.from_pretrained): raise ValueError('--from_pretrained not found: %s' % args.from_pretrained)
weight_decay=self.config.weight_decay, scheduler="linear_warmup_decay", ) propeller.summary.scalar('lr', scheduled_lr) def metrics(self, predictions, labels): predictions = L.argmax(predictions, axis=1) labels = L.argmax(labels, axis=1) #predictions = L.unsqueeze(predictions, axes=[1]) acc = propeller.metrics.Acc(labels, predictions) #auc = propeller.metrics.Auc(labels, predictions) return {'acc': acc} if __name__ == '__main__': parser = propeller.ArgumentParser('distill model with ERNIE') parser.add_argument('--max_seqlen', type=int, default=128) parser.add_argument('--vocab_file', type=str, required=True) parser.add_argument('--teacher_vocab_file', type=str, required=True) parser.add_argument('--teacher_max_seqlen', type=int, default=128) parser.add_argument('--data_dir', type=str) parser.add_argument('--server_batch_size', type=int, default=64) parser.add_argument('--num_coroutine', type=int, default=1) parser.add_argument('--teacher_host', type=str, required=True) args = parser.parse_args() run_config = propeller.parse_runconfig(args) hparams = propeller.parse_hparam(args) teacher_vocab = { j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(args.teacher_vocab_file, 'rb'))
startup_prog=F.default_startup_program(), weight_decay=self.config.weight_decay, scheduler="linear_warmup_decay",) propeller.summary.scalar('lr', scheduled_lr) def metrics(self, predictions, labels): predictions = L.argmax(predictions, axis=1) labels = L.argmax(labels, axis=1) #predictions = L.unsqueeze(predictions, axes=[1]) acc = propeller.metrics.Acc(labels, predictions) #auc = propeller.metrics.Auc(labels, predictions) return {'acc': acc} if __name__ == '__main__': parser = propeller.ArgumentParser('Distill model with Paddle') parser.add_argument('--max_seqlen', type=int, default=128) parser.add_argument('--vocab_file', type=str, required=True) parser.add_argument('--unsupervise_data_dir', type=str, required=True) parser.add_argument('--data_dir', type=str) args = parser.parse_args() run_config = propeller.parse_runconfig(args) hparams = propeller.parse_hparam(args) vocab = {j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(args.vocab_file, 'rb'))} unk_id = vocab['[UNK]'] char_tokenizer = utils.data.CharTokenizer(vocab.keys()) space_tokenizer = utils.data.SpaceTokenizer(vocab.keys()) supervise_feature_column = propeller.data.FeatureColumns([
import paddle.fluid.dygraph as FD import paddle.fluid.layers as L from propeller import log import propeller.paddle as propeller log.setLevel(logging.DEBUG) logging.getLogger().setLevel(logging.DEBUG) #from model.bert import BertConfig, BertModelLayer from ernie.modeling_ernie import ErnieModel, ErnieModelForSequenceClassification from ernie.tokenizing_ernie import ErnieTokenizer, ErnieTinyTokenizer from ernie.optimization import AdamW, LinearDecay if __name__ == '__main__': parser = propeller.ArgumentParser('classify model with ERNIE') parser.add_argument('--from_pretrained', type=str, required=True, help='pretrained model directory or tag') parser.add_argument( '--max_seqlen', type=int, default=128, help='max sentence length, should not greater than 512') parser.add_argument('--bsz', type=int, default=32, help='batchsize') parser.add_argument('--data_dir', type=str, required=True, help='data directory includes train / develop data') parser.add_argument(
return ds def after(*features): return utils.data.expand_dims(*features) dataset = propeller.data.Dataset.from_generator_func(stdin_gen) dataset = read_bio_data(dataset) dataset = reseg_token_label(dataset) dataset = convert_to_ids(dataset) dataset = dataset.padded_batch(batch_size).map(after) dataset.name = name return dataset if __name__ == '__main__': parser = propeller.ArgumentParser('NER model with ERNIE') parser.add_argument('--max_seqlen', type=int, default=128) parser.add_argument('--data_dir', type=str, required=True) parser.add_argument('--vocab_file', type=str, required=True) parser.add_argument('--do_predict', action='store_true') parser.add_argument('--use_sentence_piece_vocab', action='store_true') parser.add_argument('--warm_start_from', type=str) args = parser.parse_args() run_config = propeller.parse_runconfig(args) hparams = propeller.parse_hparam(args) vocab = { j.strip().split('\t')[0]: i for i, j in enumerate(open(args.vocab_file, 'r', encoding='utf8')) } tokenizer = utils.data.CharTokenizer(
scheduled_lr, _ = optimization( loss=loss, warmup_steps=int(self.run_config.max_steps * self.hparam['warmup_proportion']), num_train_steps=self.run_config.max_steps, learning_rate=self.hparam['learning_rate'], train_program=F.default_main_program(), startup_prog=F.default_startup_program(), weight_decay=self.hparam['weight_decay'], scheduler="linear_warmup_decay", ) propeller.summary.scalar('lr', scheduled_lr) if __name__ == '__main__': parser = propeller.ArgumentParser('ranker model with ERNIE') parser.add_argument('--do_predict', action='store_true') parser.add_argument('--predict_model', type=str, default=None) parser.add_argument('--max_seqlen', type=int, default=128) parser.add_argument('--vocab_file', type=str, required=True) parser.add_argument('--data_dir', type=str, required=True) parser.add_argument('--warm_start_from', type=str) parser.add_argument('--sentence_piece_model', type=str, default=None) parser.add_argument('--word_dict', type=str, default=None) args = parser.parse_args() run_config = propeller.parse_runconfig(args) hparams = propeller.parse_hparam(args) vocab = { j.strip().split(b'\t')[0].decode('utf8'): i for i, j in enumerate(open(args.vocab_file, 'rb'))