def __init__(self, cfg: DictConfig, trainer: Trainer): # All of the lines below need to be set when the parent class calls self._build_tokenizer() self.encoder_tokenizer_library = cfg.encoder_tokenizer.get( 'library', 'yttm') self.decoder_tokenizer_library = cfg.decoder_tokenizer.get( 'library', 'yttm') self.special_tokens = {} self.src_language = cfg.get("src_language", None) self.tgt_language = cfg.get("tgt_language", None) self.multilingual = cfg.get("multilingual", False) self.multilingual_ids = [] self.validate_input_ids = cfg.get("validate_input_ids", True) if self.multilingual: if isinstance(self.src_language, ListConfig) and isinstance( self.tgt_language, ListConfig): raise ValueError( "cfg.src_language and cfg.tgt_language cannot both be lists. We only support many-to-one or one-to-many multilingual models." ) elif isinstance(self.src_language, ListConfig): pass elif isinstance(self.tgt_language, ListConfig): for lng in self.tgt_language: self.special_tokens["<" + lng + ">"] = "<" + lng + ">" else: raise ValueError( "Expect either cfg.src_language or cfg.tgt_language to be a list when multilingual=True." ) super().__init__(cfg, trainer=trainer)
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer) self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) # TODO: Fix this once apex patches FusedScaledMaskedSoftmax. # This is a workaround for the fact that `masked_softmax_fusion` has issues with certain input sizes that may be present while finetuning. t5_cfg = MegatronT5Model.restore_from( self.register_artifact('language_model.nemo_file', cfg.language_model.get('nemo_file', None)), trainer=trainer, return_config=True, ) OmegaConf.set_struct(t5_cfg, True) with open_dict(t5_cfg): t5_cfg.masked_softmax_fusion = False t5_cfg.megatron_amp_O2 = self.megatron_amp_o2 self.model = MegatronT5Model.restore_from( self.register_artifact('language_model.nemo_file', cfg.language_model.get('nemo_file', None)), trainer=trainer, override_config_path=t5_cfg, ) # self.model = MegatronT5Model.restore_from( # self.register_artifact('language_model.nemo_file', cfg.language_model.get('nemo_file', None)), # trainer=trainer) self.tokenizer = self.model.tokenizer self.float_type = self.model.enc_dec_model.enc_dec_model.encoder.model.layers[0].dtype if not cfg.use_lm_finetune: self.model.freeze() hidden_size = self.model.cfg.hidden_size # register the file containing the labels into the artifacts to get stored in the '.nemo' file later self.word_embeddings = self.model.enc_dec_model.encoder_embedding.word_embeddings self.position_embeddings = self.model.enc_dec_model.encoder_embedding.position_embeddings # self.vocab = self.tokenizer.tokenizer.get_vocab() self.template = cfg.prompt_encoder.template self.prompt_encoder = PromptEncoder( template=cfg.prompt_encoder.template, hidden_size=hidden_size, lstm_dropout=cfg.prompt_encoder.dropout, num_layers=cfg.prompt_encoder.num_layers, ) # load prompt encoder self.hidden_size = hidden_size self.tokenizer.add_special_tokens([cfg.pseudo_token]) self.pseudo_token_id = self.tokenizer.special_token_to_id[cfg.pseudo_token] self.pad_token_id = self.tokenizer.pad_id if self.tokenizer.pad_id is not None else self.tokenizer.unk_id self.spell_length = sum(self.template) self._reduced_loss_buffer = [] self.decoder_seq_length = cfg.get('decoder_seq_length', 10)
def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): # FIXME: switch to self._cfg if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) # this prevents base constructor from initializing tokenizer self.tokenizer = None super().__init__(cfg, trainer=trainer, no_lm_init=no_lm_init) # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self._cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), pipeline_model_parallel_size=cfg.get( 'pipeline_model_parallel_size', 1), pipeline_model_parallel_split_rank=cfg.get( 'pipeline_model_parallel_split_rank', 0), micro_batch_size=cfg.get('micro_batch_size'), global_batch_size=cfg.get('global_batch_size'), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get( 'apex_transformer_log_level', 30), ) self.grad_clip_pl_default = False # use pytorch default for gradient clipping. Default False if hasattr( self._cfg, "tokenizer") or (hasattr(self._cfg, "encoder_tokenizer") and hasattr(self._cfg, "decoder_tokenizer")): # build tokenizer (defaults to nemo supported tokenizers) self._build_tokenizer() # manipulate vocabulary (e.g., pad vocabulary for better efficiency) self._build_vocab()
def __init__(self, cfg: DictConfig, trainer: Trainer): # FIXME: switch to self._cfg if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) super().__init__(cfg, trainer=trainer) # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self._cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), seed=self._cfg.get('seed', 1234), )
def __init__(self, cfg: DictConfig, trainer: Trainer): app_state = AppState() if not app_state._is_megatron_initialized: logging.info( f"Initializing megatron since it hasn't been initialized by the model. This is normal if you are using a NeMo model with Megatron dataloaders." ) app_state.global_rank = trainer.global_rank app_state.world_size = trainer.world_size app_state.model_parallel_size = 1 app_state.model_parallel_rank = trainer.global_rank initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get( 'tensor_model_parallel_size', 1), seed=self.cfg.get('seed', 1234), ) try: from nemo.collections.nlp.data.language_modeling.megatron.dataset_utils import compile_helper compile_helper() logging.info('Megatron dataset helper compiled successfully.') from nemo.collections.nlp.data.language_modeling.megatron import helpers except ImportError: raise ImportError( f'Could not compile megatron dataset C++ helper functions and therefore cannot import helpers python file.' )
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) if cfg.get('pipeline_model_parallel_size', 1) > 1: if cfg.get('pipeline_model_parallel_split_rank', 0) <= 0: raise ValueError( f"pipeline_model_parallel_split_rank must be > 0 when using pipeline_model_parallel_size > 1" ) # Make sure trainer.accumulate_grad_batches is 1. self._validate_trainer() # TODO: Not sure how to use lists of modules with PTL. # This means we can only use pipeline parallelism without the interleaved schedule. self.enc_dec_model = build_model( model_provider_func=self.model_provider_func, wrap_with_ddp=False, model_type=ModelType.encoder_and_decoder, )[0] # We don't need to call it explicitly? Since it is a pytorch lightning hook function # self.setup_optimizer_param_groups() self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) if self.megatron_amp_o2: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type self.enc_dec_model.cuda(torch.cuda.current_device()) # Model wrapper to convert both model and inputs to half precision self.enc_dec_model = Float16Module(module=self.enc_dec_model, precision=cfg.precision) if self.cfg.precision == 32: self.autocast_dtype = torch.float elif self.cfg.precision == 16: self.autocast_dtype = torch.half elif self.cfg.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]') self.enc_dec_model.model_type = ModelType.encoder_and_decoder
def _setup_eval_dataloader_from_config(self, cfg: DictConfig, dataset): rank = parallel_state.get_data_parallel_rank() world_size = parallel_state.get_data_parallel_world_size() dataloaders = [] for _dataset in dataset: sampler = torch.utils.data.distributed.DistributedSampler( _dataset, num_replicas=world_size, rank=rank, shuffle=False) dataloaders.append( torch.utils.data.DataLoader( dataset=_dataset, batch_size=1, sampler=sampler, num_workers=cfg.get("num_workers", 0), pin_memory=cfg.get("pin_memory", False), drop_last=cfg.get("drop_last", False), shuffle=False, )) return dataloaders
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) # Make sure trainer.accumulate_grad_batches is 1. self._validate_trainer() # build tokenizer (defaults to nemo supported tokenizers) self._build_tokenizer() # manipulate vocabulary (e.g., pad vocabulary for better efficiency) self._build_vocab() # TODO: Not sure how to use lists of modules with PTL. # This means we can only use pipeline parallelism without the interleaved schedule. self.enc_dec_model = build_model( model_provider_func=self.model_provider_func, wrap_with_ddp=False, model_type=ModelType.encoder_and_decoder, )[0] self.setup_optimizer_param_groups() self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) if self.megatron_amp_o2: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type self.enc_dec_model.cuda(torch.cuda.current_device()) # Model wrapper to convert both model and inputs to half precision self.enc_dec_model = Float16Module(module=self.enc_dec_model, precision=cfg.precision) if self.cfg.precision == 32: self.autocast_dtype = torch.float elif self.cfg.precision == 16: self.autocast_dtype = torch.half elif self.cfg.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]') self.enc_dec_model.model_type = ModelType.encoder_and_decoder
def __init__(self, cfg: DictConfig, trainer: Trainer): app_state = AppState() if not app_state._is_megatron_initialized: logging.info( f"Initializing megatron since it hasn't been initialized by the model. This is normal if you are using a NeMo model with Megatron dataloaders." ) app_state.global_rank = trainer.global_rank app_state.world_size = trainer.world_size app_state.model_parallel_size = 1 app_state.model_parallel_rank = trainer.global_rank initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get( 'tensor_model_parallel_size', 1), seed=self.cfg.get('seed', 1234), )
def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) # this prevents base constructor from initializing tokenizer self.tokenizer = None super().__init__(cfg, trainer=trainer, no_lm_init=True) self._validate_trainer() # TODO: Not sure how to use lists of modules with PTL. # This means we can only use pipeline parallelism without the interleaved schedule. self.model = build_model(model_provider_func=self.model_provider_func, wrap_with_ddp=False)[0] # We don't need to call it explicitly? Since it is a pytorch lightning hook function # self.setup_optimizer_param_groups() self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) if self.megatron_amp_o2: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type self.model.cuda(torch.cuda.current_device()) # Model wrapper to convert both model and inputs to half precision self.model = Float16Module(module=self.model, precision=cfg.precision) if self.trainer.precision == 32: self.autocast_dtype = torch.float elif self.trainer.precision == 16: self.autocast_dtype = torch.half elif self.trainer.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]') # configuration used for inference self._inference_config = None
def __init__(self, cfg: DictConfig, trainer: Trainer): # FIXME: switch to self._cfg if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) # this prevents base constructor from initializing tokenizer self.tokenizer = None super().__init__(cfg, trainer=trainer, no_lm_init=True) # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self._cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] if cfg.get('pipeline_model_parallel_size', 1) > 1: if cfg.get('pipeline_model_parallel_split_rank', 0) <= 0: raise ValueError( f"pipeline_model_parallel_split_rank must be > 0 when using pipeline_model_parallel_size > 1" ) initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), pipeline_model_parallel_size=cfg.get( 'pipeline_model_parallel_size', 1), pipeline_model_parallel_split_rank=cfg.get( 'pipeline_model_parallel_split_rank', 0), micro_batch_size=cfg.get('micro_batch_size'), global_batch_size=cfg.get('global_batch_size'), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get( 'apex_transformer_log_level', 30), )
def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) super().__init__(cfg, trainer) self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) # TODO: Fix this once apex patches FusedScaledMaskedSoftmax. # This is a workaround for the fact that `masked_softmax_fusion` has issues with certain input sizes that may be present while finetuning. t5_cfg = MegatronT5Model.restore_from(self.register_artifact( 't5_base_model', cfg.restore_from_path), trainer=trainer, return_config=True) OmegaConf.set_struct(t5_cfg, True) with open_dict(t5_cfg): t5_cfg.masked_softmax_fusion = False t5_cfg.megatron_amp_O2 = self.megatron_amp_o2 self.model = MegatronT5Model.restore_from( self.register_artifact('t5_base_model', cfg.restore_from_path), trainer=trainer, override_config_path=t5_cfg, ) self.setup_optimizer_param_groups()
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) self.cfg = cfg # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self.cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] self._reduced_lm_loss_buffer = [] self._reduced_sop_loss_buffer = [] initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), seed=self.cfg.get('seed', 1234), ) self.tokenizer = get_nmt_tokenizer( library=self.cfg.tokenizer.library, model_name=self.cfg.tokenizer.type, tokenizer_model=self.register_artifact("tokenizer_model", self.cfg.tokenizer.model), vocab_file=self.register_artifact("vocab_file", self.cfg.tokenizer.vocab_file), merges_file=self.register_artifact("merges_file", self.cfg.tokenizer.merge_file), ) vocab_size = self.tokenizer.vocab_size padded_vocab_size = self._vocab_size_with_padding( orig_vocab_size=vocab_size, make_vocab_size_divisible_by=cfg.get( 'make_vocab_size_divisible_by', 128), tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), ) num_tokentypes = 2 if cfg.bert_binary_head else 0 self.model = BertModel( vocab_size=padded_vocab_size, hidden_size=cfg.hidden_size, max_position_embeddings=cfg.max_position_embeddings, num_layers=cfg.num_layers, num_attention_heads=cfg.num_attention_heads, apply_query_key_layer_scaling=cfg.get( 'apply_query_key_layer_scaling', True), kv_channels=cfg.get('kv_channels', None), ffn_hidden_size=cfg.ffn_hidden_size, num_tokentypes=num_tokentypes, parallel_output=True, pre_process=cfg.get('pre_process', True), post_process=cfg.get('post_process', True), init_method_std=cfg.get('init_method_std', 0.02), fp16_lm_cross_entropy=cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=cfg.get('use_cpu_initialization', False), hidden_dropout=cfg.get('hidden_dropout', 0.1), precision=cfg.get('precision', 16), fp32_residual_connection=cfg.get('fp32_residual_connection', False), activations_checkpoint_method=cfg.get( 'activations_checkpoint_method', None), activations_checkpoint_num_layers=cfg.get( 'activations_checkpoint_num_layers', 1), layernorm_epsilon=cfg.get('layernorm_epsilon', 1e-5), onnx_safe=cfg.get('onnx_safe', False), add_binary_head=cfg.bert_binary_head, )
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer) self.cfg = cfg # Load pretrained GPT model and tokenizer self.model = MegatronGPTModel.restore_from( self.register_artifact('language_model_path', cfg.get('language_model_path', None)), trainer=trainer, save_restore_connector=NLPSaveRestoreConnector(), ) # Freeze all GPT model weights for prompt-tuning/p-tuning if not cfg.lm_finetune: self.model.freeze() self.tokenizer = self.model.tokenizer self.float_type = self.model.model.language_model.encoder.layers[ 0].dtype self.hidden_size = self.model.cfg.hidden_size self.word_embeddings = self.model.model.language_model.embedding.word_embeddings self.existing_tasks = list(self.cfg.get('existing_tasks', [])) self.new_tasks = list(self.cfg.get('new_tasks', [])) # Load templates for assigning virtual prompt token positions self.load_task_templates(self.cfg.task_templates) # Prompt table stores all task embeddings, p-tuning virtual prompts get added to the table after training self.prompt_table = PromptTable( existing_tasks=self.existing_tasks, task_templates=self.task_templates, task_id_num_to_name=self.task_id_num_to_name, hidden_size=self.hidden_size, ) # Prepare pseudo token ids for virtual/virtual prompt tokens self.pseudo_token_base = cfg.pseudo_token_base self.pseudo_tokens = [ self.pseudo_token_base + str(i) for i in range(self.max_virtual_tokens) ] self.tokenizer.add_special_tokens( {'additional_special_tokens': self.pseudo_tokens}) self.pseudo_token_ids = self.tokenizer.tokens_to_ids( self.pseudo_tokens) self.pseudo_token_ids_start = self.pseudo_token_ids[0] self.pad_token_id = self.tokenizer.pad_id if self.tokenizer.pad_id is not None else self.tokenizer.unk_id self.virtual_prompt_style = cfg.virtual_prompt_style.lower() # Prompt tuning stores virtual prompts in the prompt table and tunes their weight directly if self.virtual_prompt_style in ['prompt-tuning', 'inference']: self.virtual_prompt_source = 'prompt-table' # P-Tuning uses an LSTM Encoder to produce virtual token embeddings elif self.virtual_prompt_style == 'p-tuning': self.virtual_prompt_source = 'prompt-encoder' else: raise ValueError( f"\nvirtual prompt style '{cfg.virtual_prompt_type}' not recognized, please use one of 'prompt-tuning' or 'p-tuning'" ) self._reduced_loss_buffer = [] self._inference_config = None if self.trainer.precision == 32: self.autocast_dtype = torch.float elif self.trainer.precision == 16: self.autocast_dtype = torch.half elif self.trainer.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]')
def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) # this prevents base constructor from initializing tokenizer self.tokenizer = None super().__init__(cfg, trainer=trainer, no_lm_init=True) self._validate_trainer() # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self.cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), pipeline_model_parallel_size=cfg.get( 'pipeline_model_parallel_size', 1), micro_batch_size=cfg.get('micro_batch_size'), global_batch_size=cfg.get('global_batch_size'), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get( 'apex_transformer_log_level', 30), ) self.tokenizer = get_nmt_tokenizer( library=self.cfg.tokenizer.library, model_name=self.cfg.tokenizer.type, tokenizer_model=self.register_artifact("tokenizer.model", self.cfg.tokenizer.model), vocab_file=self.register_artifact("tokenizer.vocab_file", self.cfg.tokenizer.vocab_file), merges_file=self.register_artifact("tokenizer.merge_file", self.cfg.tokenizer.merge_file), delimiter=self.cfg.tokenizer.get('delimiter', None), ) vocab_size = self.tokenizer.vocab_size self.padded_vocab_size = self._vocab_size_with_padding( orig_vocab_size=vocab_size, make_vocab_size_divisible_by=cfg.get( 'make_vocab_size_divisible_by', 128), tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), ) # TODO: Not sure how to use lists of modules with PTL. # This means we can only use pipeline parallelism without the interleaved schedule. self.model = build_model(model_provider_func=self.model_provider_func, wrap_with_ddp=False)[0] self.setup_optimizer_param_groups() self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) if self.megatron_amp_o2: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type self.model.cuda(torch.cuda.current_device()) # Model wrapper to convert both model and inputs to half precision self.model = Float16Module(module=self.model, precision=cfg.precision) if self.trainer.precision == 32: self.autocast_dtype = torch.float elif self.trainer.precision == 16: self.autocast_dtype = torch.half elif self.trainer.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]') # configuration used for inference self._inference_config = None
def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) super().__init__(cfg, trainer=trainer, no_lm_init=False) self.cfg = cfg # used in NVIDIA NGC PyTorch containers # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_lm_loss_buffer = [] self._reduced_sop_loss_buffer = [] num_tokentypes = 2 if cfg.bert_binary_head else 0 self.model = BertModel( vocab_size=self.padded_vocab_size, hidden_size=cfg.hidden_size, max_position_embeddings=cfg.max_position_embeddings, num_layers=cfg.num_layers, num_attention_heads=cfg.num_attention_heads, apply_query_key_layer_scaling=cfg.get( 'apply_query_key_layer_scaling', True), kv_channels=cfg.get('kv_channels', None), ffn_hidden_size=cfg.ffn_hidden_size, num_tokentypes=num_tokentypes, parallel_output=True, pre_process=cfg.get('pre_process', True), post_process=cfg.get('post_process', True), init_method_std=cfg.get('init_method_std', 0.02), fp16_lm_cross_entropy=cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=cfg.get('use_cpu_initialization', False), hidden_dropout=cfg.get('hidden_dropout', 0.1), precision=cfg.get('precision', 16), fp32_residual_connection=cfg.get('fp32_residual_connection', False), activations_checkpoint_method=cfg.get( 'activations_checkpoint_method', None), activations_checkpoint_num_layers=cfg.get( 'activations_checkpoint_num_layers', 1), layernorm_epsilon=cfg.get('layernorm_epsilon', 1e-5), masked_softmax_fusion=cfg.get('masked_softmax_fusion', True), bias_gelu_fusion=cfg.get('bias_gelu_fusion', True), onnx_safe=cfg.get('onnx_safe', False), add_binary_head=cfg.bert_binary_head, megatron_legacy=cfg.get('megatron_legacy', False), ) # not using amp o2 self.megatron_amp_o2 = False
def __init__(self, cfg: DictConfig, trainer: Trainer): if not HAVE_APEX: raise ImportError( "Apex was not found. Please see the NeMo README for installation instructions: https://github.com/NVIDIA/NeMo#megatron-gpt." ) super().__init__(cfg, trainer=trainer) self.cfg = cfg # used in NVIDIA NGC PyTorch containers self._enable_nvidia_optimizations() if self.cfg.get('use_cpu_initialization', False) is False: torch.cuda.set_device(trainer.local_rank) # buffer used during train_step for logging average loss over gradient accumulation steps self._reduced_loss_buffer = [] self._reduced_lm_loss_buffer = [] self._reduced_sop_loss_buffer = [] # not saved as part of nemo model graph but required during export to ONNX input_names = ['input_ids', 'attention_mask', 'token_type_ids'] initialize_model_parallel_for_nemo( world_size=trainer.world_size, global_rank=trainer.global_rank, local_rank=trainer.local_rank, tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), seed=self.cfg.get('seed', 1234), ) self.tokenizer = get_nmt_tokenizer( library=self.cfg.tokenizer.library, model_name=self.cfg.tokenizer.type, tokenizer_model=self.register_artifact("tokenizer.model", self.cfg.tokenizer.model), vocab_file=self.register_artifact("tokenizer.vocab_file", self.cfg.tokenizer.vocab_file), merges_file=self.register_artifact("tokenizer.merge_file", self.cfg.tokenizer.merge_file), ) vocab_size = self.tokenizer.vocab_size padded_vocab_size = self._vocab_size_with_padding( orig_vocab_size=vocab_size, make_vocab_size_divisible_by=cfg.get( 'make_vocab_size_divisible_by', 128), tensor_model_parallel_size=cfg.get('tensor_model_parallel_size', 1), ) num_tokentypes = 2 if cfg.bert_binary_head else 0 self.model = BertModel( vocab_size=padded_vocab_size, hidden_size=cfg.hidden_size, max_position_embeddings=cfg.max_position_embeddings, num_layers=cfg.num_layers, num_attention_heads=cfg.num_attention_heads, apply_query_key_layer_scaling=cfg.get( 'apply_query_key_layer_scaling', True), kv_channels=cfg.get('kv_channels', None), ffn_hidden_size=cfg.ffn_hidden_size, num_tokentypes=num_tokentypes, parallel_output=True, pre_process=cfg.get('pre_process', True), post_process=cfg.get('post_process', True), init_method_std=cfg.get('init_method_std', 0.02), fp16_lm_cross_entropy=cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=cfg.get('use_cpu_initialization', False), hidden_dropout=cfg.get('hidden_dropout', 0.1), precision=cfg.get('precision', 16), fp32_residual_connection=cfg.get('fp32_residual_connection', False), activations_checkpoint_method=cfg.get( 'activations_checkpoint_method', None), activations_checkpoint_num_layers=cfg.get( 'activations_checkpoint_num_layers', 1), layernorm_epsilon=cfg.get('layernorm_epsilon', 1e-5), masked_softmax_fusion=cfg.get('masked_softmax_fusion', True), bias_gelu_fusion=cfg.get('bias_gelu_fusion', True), onnx_safe=cfg.get('onnx_safe', False), add_binary_head=cfg.bert_binary_head, megatron_legacy=cfg.get('megatron_legacy', False), )
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer) self.cfg = cfg # Load pretrained GPT model and tokenizer if cfg.get('language_model_path', None): self.frozen_model = MegatronGPTModel.restore_from( cfg.get('language_model_path'), trainer=trainer, save_restore_connector=NLPSaveRestoreConnector(), ) # Freeze all GPT model weights for prompt-tuning/p-tuning self.frozen_model.freeze() self.tokenizer = self.frozen_model.tokenizer self.float_type = self.frozen_model.model.language_model.encoder.layers[ 0].dtype self.hidden_size = self.frozen_model.cfg.hidden_size self.word_embeddings = self.frozen_model.model.language_model.embedding.word_embeddings self.existing_tasks = list(self.cfg.get('existing_tasks', [])) self.new_tasks = list(self.cfg.get('new_tasks', [])) # Load templates for assigning virtual prompt token positions self.load_task_templates(self.cfg.task_templates) # Prompt table stores all task embeddings, p-tuning virtual prompts get added to the table after training self.prompt_table = PromptTable( existing_tasks=self.existing_tasks, task_templates=self.task_templates, task_id_num_to_name=self.task_id_num_to_name, hidden_size=self.hidden_size, ) self._prompt_table_key = VirtualPromptSource.PROMPT_TABLE.value self._prompt_encoder_key = VirtualPromptSource.PROMPT_ENCODER.value # Prepare pseudo token ids for virtual/virtual prompt tokens self.pseudo_tokens = get_pseudo_tokens(self.max_virtual_tokens) self.tokenizer.add_special_tokens( {'additional_special_tokens': self.pseudo_tokens}) self.pseudo_token_ids = self.tokenizer.tokens_to_ids( self.pseudo_tokens) self.pseudo_token_ids_start = self.pseudo_token_ids[0] self.pad_token_id = self.tokenizer.pad_id if self.tokenizer.pad_id is not None else self.tokenizer.unk_id self.virtual_prompt_style = VirtualPromptStyle( cfg.virtual_prompt_style) # Prompt tuning stores virtual prompts in the prompt table and tunes their weight directly if self.virtual_prompt_style in [ VirtualPromptStyle.PROMPT_TUNING, VirtualPromptStyle.INFERENCE ]: self.virtual_prompt_source = VirtualPromptSource.PROMPT_TABLE # P-Tuning uses an LSTM Encoder to produce virtual token embeddings elif self.virtual_prompt_style == VirtualPromptStyle.P_TUNING: self.virtual_prompt_source = VirtualPromptSource.PROMPT_ENCODER else: raise ValueError( f"\nvirtual prompt style '{cfg.virtual_prompt_style}' not recognized, please use one of 'prompt-tuning' or 'p-tuning'" ) self._reduced_loss_buffer = [] self._inference_config = None if self.trainer.precision == 32: self.autocast_dtype = torch.float elif self.trainer.precision == 16: self.autocast_dtype = torch.half elif self.trainer.precision == 'bf16': self.autocast_dtype = torch.bfloat16 else: raise ValueError('precision must be in [32, 16, "bf16"]') # make sure the default pytorch lightning gradient clipping in the basemodel self.grad_clip_pl_default = True # no support of amp o2 self.megatron_amp_o2 = False
def __init__(self, cfg: DictConfig, trainer: Trainer): super().__init__(cfg, trainer=trainer) # build tokenizer (defaults to nemo supported tokenizers) self._build_tokenizer() # manipulate vocabulary (e.g., pad vocabulary for better efficiency) self._build_vocab() # TODO: create get_encoder_decoder_model()here for different losses (e..g, nll, vae, mim) self.enc_dec_model = MegatronTokenLevelEncoderDecoderModule( encoder_arch=cfg.encoder_arch, decoder_arch=cfg.decoder_arch, vocab_size=self.padded_vocab_size, hidden_size=cfg.hidden_size, max_position_embeddings=cfg.max_position_embeddings, num_layers=cfg.num_layers, num_attention_heads=cfg.num_attention_heads, apply_query_key_layer_scaling=cfg.get('apply_query_key_layer_scaling', True), kv_channels=cfg.get('kv_channels', None), ffn_hidden_size=cfg.ffn_hidden_size, num_tokentypes=0, parallel_output=True, pre_process=cfg.get('pre_process', True), post_process=cfg.get('post_process', True), init_method_std=cfg.get('init_method_std', 0.02), fp16_cross_entropy=cfg.get('fp16_lm_cross_entropy', False), use_cpu_initialization=cfg.get('use_cpu_initialization', False), hidden_dropout=cfg.get('hidden_dropout', 0.1), attention_dropout=cfg.get('attention_dropout', 0.1), precision=cfg.get('precision', 16), fp32_residual_connection=cfg.get('fp32_residual_connection', False), activations_checkpoint_method=cfg.get('activations_checkpoint_method', None), activations_checkpoint_num_layers=cfg.get('activations_checkpoint_num_layers', 1), layernorm_epsilon=cfg.get('layernorm_epsilon', 1e-5), persist_layer_norm=cfg.get('persist_layer_norm', False), bias_gelu_fusion=cfg.get('bias_gelu_fusion', True), masked_softmax_fusion=cfg.get('masked_softmax_fusion', True), onnx_safe=cfg.get('onnx_safe', False), activation=cfg.get('activation', 'gelu'), ) self.setup_optimizer_param_groups() self.megatron_amp_o2 = cfg.get('megatron_amp_O2', False) if self.megatron_amp_o2: # Pre-allocate the model on GPU to have master parameters allocated on the same device with matching data type self.enc_dec_model.cuda(torch.cuda.current_device()) # Model wrapper to convert both model and inputs to half precision self.enc_dec_model = Float16Module(module=self.enc_dec_model, precision=cfg.precision)
class Component(Device): """customize phidl.Device Allow name to be set like Component('arc') or Component(name = 'arc') - get/write JSON metadata - get ports by type (optical, electrical ...) - set data_analysis and test_protocols Args: name: component_name Properties: info: includes full: full list of settings that create the function changed: changed settings default: includes the default signature of the component - derived properties - external metadata (test_protocol, docs, ...) - simulation_settings - function_name - name: for the component - name_long: for the component """ def __init__(self, name: str = "Unnamed", *args, **kwargs) -> None: self.__ports__ = {} self.aliases = {} self.uid = str(uuid.uuid4())[:8] if "with_uuid" in kwargs or name == "Unnamed": name += "_" + self.uid super(Component, self).__init__(name=name, exclude_from_current=True) self.info = DictConfig(self.info) self.name = name # overwrie PHIDL's incremental naming convention self.name_long = None @classmethod def __get_validators__(cls): yield cls.validate @classmethod def validate(cls, v): """pydantic assumes component is valid if: - name characters < MAX_NAME_LENGTH - is not empty (has references or polygons) """ assert isinstance(v, Component) assert (len(v.name) <= MAX_NAME_LENGTH ), f"name `{v.name}` {len(v.name)} > {MAX_NAME_LENGTH} " # assert v.references or v.polygons, f"No references or polygons in {v.name}" return v @property def ports_layer(self) -> Dict[str, str]: """Returns a mapping from layer0_layer1_E0: portName""" return map_ports_layer_to_orientation(self.ports) def port_by_orientation_cw(self, key: str, **kwargs): """Returns port by indexing them clockwise""" m = map_ports_to_orientation_cw(self.ports, **kwargs) if key not in m: raise KeyError(f"{key} not in {list(m.keys())}") key2 = m[key] return self.ports[key2] def port_by_orientation_ccw(self, key: str, **kwargs): """Returns port by indexing them clockwise""" m = map_ports_to_orientation_ccw(self.ports, **kwargs) if key not in m: raise KeyError(f"{key} not in {list(m.keys())}") key2 = m[key] return self.ports[key2] def get_ports_xsize(self, **kwargs) -> float: """Returns a the xdistance from east to west ports Args: kwargs: orientation, port_type, layer """ ports_cw = self.get_ports_list(clockwise=True, **kwargs) ports_ccw = self.get_ports_list(clockwise=False, **kwargs) return snap_to_grid(ports_ccw[0].x - ports_cw[0].x) def get_ports_ysize(self, **kwargs) -> float: """Returns a the ydistance from east to west ports""" ports_cw = self.get_ports_list(clockwise=True, **kwargs) ports_ccw = self.get_ports_list(clockwise=False, **kwargs) return snap_to_grid(ports_ccw[0].y - ports_cw[0].y) def plot_netlist(self, with_labels: bool = True, font_weight: str = "normal") -> nx.Graph: """plots a netlist graph with networkx https://networkx.github.io/documentation/stable/reference/generated/networkx.drawing.nx_pylab.draw_networkx.html Args: with_labels: label nodes font_weight: normal, bold """ netlist = self.get_netlist() connections = netlist["connections"] placements = netlist["placements"] G = nx.Graph() G.add_edges_from([(",".join(k.split(",")[:-1]), ",".join(v.split(",")[:-1])) for k, v in connections.items()]) pos = {k: (v["x"], v["y"]) for k, v in placements.items()} labels = {k: ",".join(k.split(",")[:1]) for k in placements.keys()} nx.draw( G, with_labels=with_labels, font_weight=font_weight, labels=labels, pos=pos, ) return G def get_netlist_yaml(self) -> str: """Return YAML netlist.""" return OmegaConf.to_yaml(self.get_netlist()) def write_netlist(self, filepath: str, full_settings: bool = False) -> None: """Write netlist in YAML""" netlist = self.get_netlist(full_settings=full_settings) OmegaConf.save(netlist, filepath) def write_netlist_dot(self, filepath: Optional[str] = None) -> None: """Write netlist graph in DOT format.""" from networkx.drawing.nx_agraph import write_dot filepath = filepath or f"{self.name}.dot" G = self.plot_netlist() write_dot(G, filepath) def get_netlist(self, full_settings: bool = False) -> Any: """Returns netlist dict(instances, placements, connections, ports) instances = {instances} placements = {instance_name,uid,x,y: dict(x=0, y=0, rotation=90), ...} connections = {instance_name_src_x_y,portName: instance_name_dst_x_y,portName} ports: {portName: instace_name,portName} Args: full_settings: exports all info, when false only settings_changed """ from gdsfactory.get_netlist import get_netlist return get_netlist(component=self, full_settings=full_settings) def get_name_long(self) -> str: """returns the long name if it's been truncated to MAX_NAME_LENGTH""" if self.name_long: return self.name_long else: return self.name def get_parent_name(self) -> str: """Returns parent name if it has parent, else returns its own name. Returns the original parent name for hierarchical components and for non-hierarchical it just returns the component name """ return self.info.get("parent_name", self.name) def assert_ports_on_grid(self, nm: int = 1) -> None: """Asserts that all ports are on grid.""" for port in self.ports.values(): port.assert_on_grid(nm=nm) def get_ports_dict(self, **kwargs) -> Dict[str, Port]: """Returns a dict of ports. Args: layer: port GDS layer prefix: for example "E" for east, "W" for west ... """ return select_ports(self.ports, **kwargs) def get_ports_list(self, **kwargs) -> List[Port]: """Returns a list of ports. Args: layer: port GDS layer prefix: for example "E" for east, "W" for west ... orientation: angle in degrees for the port """ return list(select_ports(self.ports, **kwargs).values()) def get_ports_array(self) -> Dict[str, ndarray]: """returns ports as a dict of np arrays""" ports_array = { port_name: np.array([ port.x, port.y, int(port.orientation), port.width, port.layer[0], port.layer[1], ]) for port_name, port in self.ports.items() } return ports_array def ref( self, position: Coordinate = (0, 0), port_id: Optional[str] = None, rotation: int = 0, h_mirror: bool = False, v_mirror: bool = False, ) -> ComponentReference: """Returns Component reference.""" _ref = ComponentReference(self) if port_id and port_id not in self.ports: raise ValueError(f"port {port_id} not in {self.ports.keys()}") if port_id: origin = self.ports[port_id].position else: origin = (0, 0) if h_mirror: _ref.reflect_h(port_id) if v_mirror: _ref.reflect_v(port_id) if rotation != 0: _ref.rotate(rotation, origin) _ref.move(origin, position) return _ref def ref_center(self, position=(0, 0)): """returns a reference of the component centered at (x=0, y=0)""" si = self.size_info yc = si.south + si.height / 2 xc = si.west + si.width / 2 center = (xc, yc) _ref = ComponentReference(self) _ref.move(center, position) return _ref def __repr__(self) -> str: return f"{self.name}: uid {self.uid}, ports {list(self.ports.keys())}, aliases {list(self.aliases.keys())}, {len(self.polygons)} polygons, {len(self.references)} references" @property def pprint(self) -> None: """Prints component info.""" print(OmegaConf.to_yaml(self.info)) @property def pprint_ports(self) -> None: """Prints component netlists.""" ports_list = self.get_ports_list() for port in ports_list: print(port) @property def info_child(self) -> DictConfig: """Returns info from child if any, otherwise returns its info""" info = self.info while info.get("child"): info = info.get("child") return info def add_port( self, name: Optional[Union[str, int, object]] = None, midpoint: Tuple[float, float] = ( 0.0, 0.0, ), width: float = 1.0, orientation: int = 45, port: Optional[Port] = None, layer: Tuple[int, int] = (1, 0), port_type: str = "optical", cross_section: Optional[CrossSection] = None, ) -> Port: """Can be called to copy an existing port like add_port(port = existing_port) or to create a new port add_port(myname, mymidpoint, mywidth, myorientation). Can also be called to copy an existing port with a new name add_port(port = existing_port, name = new_name)""" if port: if not isinstance(port, Port): raise ValueError(f"add_port() needs a Port, got {type(port)}") p = port.copy(new_uid=True) if name is not None: p.name = name p.parent = self elif isinstance(name, Port): p = name.copy(new_uid=True) p.parent = self name = p.name else: half_width = width / 2 half_width_correct = snap_to_grid(half_width, nm=1) if not np.isclose(half_width, half_width_correct): warnings.warn( f"port width = {width} will create off-grid points.\n" f"You can fix it by changing width to {2*half_width_correct}\n" f"port {name}, {midpoint} {orientation} deg", stacklevel=3, ) p = Port( name=name, midpoint=(snap_to_grid(midpoint[0]), snap_to_grid(midpoint[1])), width=snap_to_grid(width), orientation=orientation, parent=self, layer=layer, port_type=port_type, cross_section=cross_section, ) if name is not None: p.name = name if p.name in self.ports: raise ValueError( f"add_port() Port name {p.name} exists in {self.name}") self.ports[p.name] = p return p def add_ports(self, ports: Union[List[Port], Dict[str, Port]], prefix: str = ""): ports = ports if isinstance(ports, list) else ports.values() for port in ports: name = f"{prefix}{port.name}" if prefix else port.name self.add_port(name=name, port=port) def snap_ports_to_grid(self, nm: int = 1) -> None: for port in self.ports.values(): port.snap_to_grid(nm=nm) def remove_layers( self, layers: Union[List[Tuple[int, int]], Tuple[int, int]] = (), include_labels: bool = True, invert_selection: bool = False, recursive: bool = True, ) -> Device: """Remove a list of layers.""" layers = [_parse_layer(layer) for layer in layers] all_D = list(self.get_dependencies(recursive)) all_D += [self] for D in all_D: for polygonset in D.polygons: polygon_layers = zip(polygonset.layers, polygonset.datatypes) polygons_to_keep = [(pl in layers) for pl in polygon_layers] if not invert_selection: polygons_to_keep = [(not p) for p in polygons_to_keep] polygonset.polygons = [ p for p, keep in zip(polygonset.polygons, polygons_to_keep) if keep ] polygonset.layers = [ p for p, keep in zip(polygonset.layers, polygons_to_keep) if keep ] polygonset.datatypes = [ p for p, keep in zip(polygonset.datatypes, polygons_to_keep) if keep ] if include_labels: new_labels = [] for label in D.labels: original_layer = (label.layer, label.texttype) original_layer = _parse_layer(original_layer) if invert_selection: keep_layer = original_layer in layers else: keep_layer = original_layer not in layers if keep_layer: new_labels += [label] D.labels = new_labels return self def extract( self, layers: Union[List[Tuple[int, int]], Tuple[int, int]] = (), ) -> Device: """Extract polygons from a Component. adapted from phidl.geometry. """ from gdsfactory.name import clean_value component = Component(f"{self.name}_{clean_value(layers)}") if type(layers) not in (list, tuple): raise ValueError("layers needs to be a list or tuple") poly_dict = self.get_polygons(by_spec=True) parsed_layer_list = [_parse_layer(layer) for layer in layers] for layer, polys in poly_dict.items(): if _parse_layer(layer) in parsed_layer_list: component.add_polygon(polys, layer=layer) return component def copy(self) -> Device: return copy(self) def copy_child_info(self, component) -> None: """Copy info from another component. great for hiearchical components that propagate child cells info. """ self.info.child = component.info @property def size_info(self) -> SizeInfo: """size info of the component""" # if self.__size_info__ == None: # self.__size_info__ = SizeInfo(self.bbox) return SizeInfo(self.bbox) # self.__size_info__ def add_ref(self, D: Device, alias: Optional[str] = None) -> "ComponentReference": """Takes a Component and adds it as a ComponentReference to the current Device.""" if not isinstance(D, Component) and not isinstance(D, Device): raise TypeError( f"Component.add_ref() type = {type(D)} needs to be a Component." ) ref = ComponentReference( D) # Create a ComponentReference (CellReference) self.add( ref) # Add ComponentReference (CellReference) to Device (Cell) if alias is not None: self.aliases[alias] = ref return ref def get_layers( self) -> Union[Set[Tuple[int, int]], Set[Tuple[int64, int64]]]: """returns a set of (layer, datatype) .. code :: import gdsfactory as gf gf.components.straight().get_layers() == {(1, 0), (111, 0)} """ layers = set() for element in itertools.chain(self.polygons, self.paths): for layer, datatype in zip(element.layers, element.datatypes): layers.add((layer, datatype)) for reference in self.references: for layer, datatype in reference.ref_cell.get_layers(): layers.add((layer, datatype)) for label in self.labels: layers.add((label.layer, 0)) return layers def _repr_html_(self): """Print component, show geometry in matplotlib and in klayout when using jupyter notebooks """ self.show(show_ports=False) self.plot() return self.__str__() def plot( self, clear_cache: bool = False, ) -> None: """Plot component in matplotlib""" from phidl import quickplot as plot from gdsfactory.cell import clear_cache as clear_cache_function plot(self) if clear_cache: clear_cache_function() def show( self, show_ports: bool = True, show_subports: bool = False, clear_cache: bool = False, ) -> None: """Show component in klayout if show_subports = True We add pins in a new component that contains a reference to the old component so we don't modify the original component Args: show_ports: shows component with port markers and labels show_subports: add ports markers and labels to component references clear_cache: after showing component clears cache (useful for jupyter) """ from gdsfactory.add_pins import add_pins_container, add_pins_to_references from gdsfactory.show import show if show_subports: component = add_pins_to_references(component=self) component.name = self.name + "_show_subports" elif show_ports: component = add_pins_container(component=self) component.name = self.name + "_show_ports" else: component = self show(component, clear_cache=clear_cache) def plotqt(self): from phidl.quickplotter import quickplot2 quickplot2(self) def write_gds( self, gdspath: Optional[PathType] = None, gdsdir: PathType = tmp, unit: float = 1e-6, precision: float = 1e-9, auto_rename: bool = False, timestamp: Optional[datetime.datetime] = _timestamp2019, ) -> Path: """Write component to GDS and returs gdspath Args: component: gf.Component. gdspath: GDS file path to write to. unit unit size for objects in library. precision: for the dimensions of the objects in the library (m). remove_previous_markers: clear previous ones to avoid duplicates. auto_rename: If True, fixes any duplicate cell names. timestamp: datetime object or boolean Sets the GDSII timestamp. Default = 2019-10-25 07:36:32.827300 If None, defaults to Now. Returns: gdspath """ gdsdir = pathlib.Path(gdsdir) gdspath = gdspath or gdsdir / (self.name + ".gds") gdspath = pathlib.Path(gdspath) gdsdir = gdspath.parent gdsdir.mkdir(exist_ok=True, parents=True) cells = self.get_dependencies() cell_names = [cell.name for cell in list(cells)] cell_names_unique = set(cell_names) if len(cell_names) != len(set(cell_names)): for cell_name in cell_names_unique: cell_names.remove(cell_name) cell_names_duplicated = "\n".join(set(cell_names)) raise ValueError( f"Duplicated cell names in {self.name}:\n{cell_names_duplicated}" ) referenced_cells = list(self.get_dependencies(recursive=True)) all_cells = [self] + referenced_cells lib = gdspy.GdsLibrary(unit=unit, precision=precision) lib.write_gds(gdspath, cells=all_cells, timestamp=timestamp) self.path = gdspath return gdspath def write_gds_with_metadata(self, *args, **kwargs) -> Path: """Write component in GDS and metadata (component settings) in YAML""" gdspath = self.write_gds(*args, **kwargs) metadata = gdspath.with_suffix(".yml") metadata.write_text(self.to_yaml) return gdspath @property def to_dict_config(self) -> DictConfig: """Returns a DictConfig representation of the compoment.""" d = DictConfig({}) ports = {port.name: port.settings for port in self.get_ports_list()} clean_dict(ports) d.ports = ports d.info = self.info d.version = 1 d.cells = recurse_structures(self) return OmegaConf.create(d) @property def to_dict(self) -> str: return OmegaConf.to_container(self.to_dict_config) @property def to_yaml(self) -> str: return OmegaConf.to_yaml(self.to_dict) @property def to_dict_polygons(self) -> DictConfig: """Returns a dict representation of the flattened compoment.""" d = DictConfig({}) polygons = {} layer_to_polygons = self.get_polygons(by_spec=True) for layer, polygons_layer in layer_to_polygons.items(): for polygon in polygons_layer: layer_name = f"{layer[0]}_{layer[1]}" polygons[layer_name] = [ tuple(snap_to_grid(v)) for v in polygon ] ports = {port.name: port.settings for port in self.get_ports_list()} clean_dict(ports) clean_dict(polygons) d.info = self.info d.polygons = polygons d.ports = ports return OmegaConf.create(d) def auto_rename_ports(self, **kwargs) -> None: auto_rename_ports(self, **kwargs) def auto_rename_ports_counter_clockwise(self, **kwargs) -> None: auto_rename_ports_counter_clockwise(self, **kwargs) def auto_rename_ports_layer_orientation(self, **kwargs) -> None: auto_rename_ports_layer_orientation(self, **kwargs) def auto_rename_ports_orientation(self, **kwargs) -> None: auto_rename_ports_orientation(self, **kwargs) def move(self, *args, **kwargs): raise MutabilityError( "Don't move Components. Create a reference and move the reference instead." ) def rotate(self, angle: int = 90): """Returns a new component with a rotated reference to the original component Args: angle: in degrees """ from gdsfactory.rotate import rotate return rotate(component=self, angle=angle)