def _init_model(self, transformer_lm_encoder: dict, with_cuda: bool, output_nonlinearity, label_smoothing=None): config = TransformerLMEncoderConfig(**transformer_lm_encoder) if hasattr(self, "word_vocab"): config.vocab_size = len(self.word_vocab) if hasattr(self, "token_type_vocab"): config.num_token_types = len(self.token_type_vocab) if hasattr(self, "node_type_vocab"): config.num_node_types = len(self.node_type_vocab) xl_net_lm_encoder = XLNetLMEncoder(config) if label_smoothing is None: loss_fct = CrossEntropyLoss(ignore_index=-1) else: loss_fct = LabelSmoothingLoss(label_smoothing) if hasattr(self.dataset_train, 'num_sub_tokens_output'): num_sub_tokens_output = self.dataset_train.num_sub_tokens_output else: num_sub_tokens_output = 5 self.model_manager = XLNetLMModelManager() self.model_lm = XLNetLanguageModel( xl_net_lm_encoder, output_nonlinearity=output_nonlinearity, loss_fct=loss_fct, output_sub_tokens_per_token=num_sub_tokens_output) self.with_cuda = with_cuda
def generate_transformer_lm_encoder_config(self, transformer_lm_encoder: dict) -> TransformerLMEncoderConfig: config = TransformerLMEncoderConfig(**transformer_lm_encoder) if self.use_pretrained_model: loaded_config = self.pretrained_transformer_encoder_config if not config == self.pretrained_transformer_encoder_config: print(f"pretrained configuration differs from given configuration. Pretrained: " f"{self.pretrained_transformer_encoder_config}, Given: {config}. Try merging...") loaded_config.input_nonlinearity = config.input_nonlinearity loaded_config.transformer['encoder_layer']['dropout'] = config.transformer['encoder_layer']['dropout'] loaded_config.transformer['encoder_layer']['activation'] \ = config.transformer['encoder_layer']['activation'] config = loaded_config transformer_config = dict(config.transformer) if hasattr(self, "word_vocab"): config.vocab_size = len(self.word_vocab) if hasattr(self, "token_type_vocab"): if hasattr(self, "use_only_ast") and self.use_only_ast: config.num_token_types = None else: config.num_token_types = len(self.token_type_vocab) if hasattr(self, "node_type_vocab"): config.num_node_types = len(self.node_type_vocab) if hasattr(self, "relative_distances"): encoder_layer_config = dict(transformer_config['encoder_layer']) encoder_layer_config['num_relative_distances'] = len(self.relative_distances) transformer_config['encoder_layer'] = encoder_layer_config if hasattr(self, "num_sub_tokens"): config.subtokens_per_token = self.num_sub_tokens if hasattr(self, 'num_languages'): config.num_languages = self.num_languages config.transformer = transformer_config return config
def test_fails_no_distances(self): dataloader = self.setup_mini_dataset() config = CodeTransformerCoreConfig( encoder_layer=CodeTransformerLayerConfig(d_model=512, nhead=8, dim_feedforward=2048, activation="gelu", num_relative_distances=0, use_token_distances=False, use_content_content=True, use_content_pos=True, use_pos_content=True, use_pos_pos=True), num_layers=4, ) language_model_config = TransformerLMDecoderConfig( lm_encoder=TransformerLMEncoderConfig( config, vocab_size=len(self.word_vocab.vocabulary), num_node_types=len(self.node_type_vocab.vocabulary), num_token_types=len(self.token_type_vocab.vocabulary)), sos_id=-1, ) with self.assertRaises(Exception): transformer_lm = TransformerLanguageModel( transformer_lm_encoder=language_model_config['lm_encoder'], output_nonlinearity=language_model_config[ 'output_nonlinearity'], loss_fct=language_model_config['loss_fct']) batch: CTBatch = next(iter(dataloader)) transformer_lm.forward_batch(batch)
def _init_transfer_learning(self, use_pretrained_model=False, model_type=None, run_id=None, snapshot_iteration=None, cpu=False, freeze_encoder_layers=None): assert not use_pretrained_model or ( run_id is not None and snapshot_iteration is not None and model_type is not None), "model_type, run_id and snapshot_iteration have to be provided if " \ "use_pretrained_model is set" self.use_pretrained_model = use_pretrained_model if use_pretrained_model: print( f"Using Transfer Learning. Loading snapshot snapshot-{snapshot_iteration} from run {run_id} in collection " f"{model_type} ") if model_type == 'ct_code_summarization': model_manager = CodeTransformerModelManager() pretrained_model = model_manager.load_model(run_id, snapshot_iteration, gpu=not cpu) self.pretrained_model = pretrained_model elif model_type == 'ct_lm': model_manager = CodeTransformerLMModelManager() pretrained_model = model_manager.load_model(run_id, snapshot_iteration, gpu=not cpu) self.pretrained_model = pretrained_model else: model_manager = ModelManager(MODELS_SAVE_PATH, model_type) self.pretrained_model_params = model_manager.load_parameters(run_id, snapshot_iteration, gpu=not cpu) encoder_config = model_manager.load_config(run_id)['model']['transformer_lm_encoder'] self.pretrained_transformer_encoder_config = TransformerLMEncoderConfig(**encoder_config) if freeze_encoder_layers is not None: self.freeze_encoder_layers = freeze_encoder_layers
def generate_lm_encoder_config(self): return TransformerLMEncoderConfig( transformer=self.generate_transformer_config(), vocab_size=113, num_node_types=5, num_token_types=13, subtokens_per_token=5, input_nonlinearity="tanh")
def init_model(): encoder_config['transformer'] = transformer_config decoder_config['lm_encoder'] = XLNetLMEncoder( TransformerLMEncoderConfig(**encoder_config)) model = XLNetTransformerDecoder( TransformerLMDecoderConfig(**decoder_config)) num_params = sum( [len(params.view(-1)) for params in model.parameters()]) print(f"Model has {num_params} parameters") return model
def init_model(): transformer_config['encoder_layer'] = CodeTransformerLayer( **layer_config) encoder_config['transformer'] = CodeTransformer( CodeTransformerCoreConfig(**transformer_config)) decoder_config['lm_encoder'] = TransformerLMEncoder( TransformerLMEncoderConfig(**encoder_config)) model = CodeTransformerDecoder( TransformerLMDecoderConfig(**decoder_config)) num_params = sum( [len(params.view(-1)) for params in model.parameters()]) print(f"Model has {num_params} parameters") return model
def generate_language_model_default_config(self, transformer_config: CodeTransformerCoreConfig = None) \ -> TransformerLMDecoderConfig: if transformer_config is None: transformer_config = TestCodeTransformer.generate_transformer_default_config( ) encoder_conf = TransformerLMEncoderConfig(transformer_config, vocab_size=113, num_node_types=5, num_token_types=13, subtokens_per_token=5, input_nonlinearity="tanh") return TransformerLMDecoderConfig(encoder_conf, sos_id=-1, output_nonlinearity=None)
def __init__(self, transformer_lm_encoder: Union[TransformerLMEncoder, TransformerLMEncoderConfig], output_nonlinearity=None, loss_fct=nn.CrossEntropyLoss(ignore_index=-1), **kwargs): super(TransformerLanguageModel, self).__init__() if not isinstance(transformer_lm_encoder, TransformerLMEncoder): self.transformer_lm_encoder = TransformerLMEncoder(TransformerLMEncoderConfig(**transformer_lm_encoder)) else: self.transformer_lm_encoder = transformer_lm_encoder self.d_model = self.transformer_lm_encoder.d_model self.token_linear_up = nn.Linear(self.d_model, self.transformer_lm_encoder.subtokens_per_token * self.d_model) self.output_nonlinearity = None if output_nonlinearity is not None: self.output_nonlinearity = _get_activation_fn(output_nonlinearity) self.loss_fct = loss_fct self._reset_parameters()
def __init__(self, config: TransformerLMDecoderConfig): if not isinstance(config.lm_encoder, nn.Module): config.lm_encoder = XLNetLMEncoder( TransformerLMEncoderConfig(**config.lm_encoder)) super(XLNetTransformerDecoder, self).__init__(config)
def test_mini_dataset(self): def evaluate_predictions(logits, labels, loss=None): correct = logits.argmax(-1) == labels all_correct = correct.prod(-1) correct_tokens = all_correct.float().mean().cpu().item() ret = dict(correct_tokens=correct_tokens) if loss is not None: ret['loss'] = loss.detach().cpu().item() return ret BATCH_SIZE = 13 NUM_PREDICT = 5 dataloader = self.setup_mini_dataset() config = CodeTransformerCoreConfig( encoder_layer=CodeTransformerLayerConfig(d_model=16, nhead=8, dim_feedforward=32, activation="gelu", num_relative_distances=4, use_token_distances=True, use_content_content=True, use_content_pos=True, use_pos_content=True, use_pos_pos=True), num_layers=4, ) language_model_config = TransformerLMDecoderConfig( lm_encoder=TransformerLMEncoderConfig( config, vocab_size=len(self.word_vocab.vocabulary), num_node_types=len(self.node_type_vocab.vocabulary), num_token_types=len(self.token_type_vocab.vocabulary)), sos_id=-1) transformer_lm = TransformerLanguageModel( transformer_lm_encoder=language_model_config['lm_encoder'], output_nonlinearity=language_model_config['output_nonlinearity'], loss_fct=language_model_config['loss_fct']) batch: CTBatch = next(iter(dataloader)) cuda = torch.cuda.is_available() and RUN_TESTS_ON_GPU if cuda: transformer_lm = transformer_lm.cuda() opt = optim.Adam(transformer_lm.parameters(), lr=1e-4) tq = tqdm(range(500)) if RUN_TESTS_ON_GPU: with self.assertRaises(RuntimeError): # CPU input on CUDA model should fail output = transformer_lm.forward_batch(batch) batch = batch_to_device(batch, "cuda") assert not (batch.labels == self.word_vocab['</s>']).any().item() for _ in tq: output = transformer_lm.forward_batch(batch) output.loss.backward() opt.step() opt.zero_grad() evaluation = evaluate_predictions(output.logits, batch.labels) acc = evaluation['correct_tokens'] tq.set_postfix(loss=output.loss.cpu().item(), acc=acc) predicted_tokens = output.logits.argmax(-1) generated_text = batch_decode(self.word_vocab, predicted_tokens) generated_text2 = [ " ".join([ "_".join([ self.word_vocab.reverse_lookup(subtoken.item()) for subtoken in token ]) for token in sample ]) for sample in predicted_tokens ] assert list(generated_text) == generated_text2 assert acc > 0.98