def _get_seq_metadata(self, num_doc_classes, num_word_classes): labels = [] if num_doc_classes: vocab = Vocab(Counter()) vocab.itos = ["C_{}".format(i) for i in range(num_doc_classes)] label_meta = FieldMeta() label_meta.vocab_size = num_doc_classes label_meta.vocab = vocab labels.append(label_meta) w_vocab = Vocab(Counter()) w_vocab.itos = W_VOCAB seq_feat_meta = FieldMeta() seq_feat_meta.unk_token_idx = UNK_IDX seq_feat_meta.pad_token_idx = PAD_IDX seq_feat_meta.vocab_size = W_VOCAB_SIZE seq_feat_meta.vocab = w_vocab seq_feat_meta.vocab_export_name = "seq_tokens_vals" seq_feat_meta.pretrained_embeds_weight = None seq_feat_meta.dummy_model_input = SeqFeatureField.dummy_model_input meta = CommonMetadata() meta.features = {DatasetFieldName.TEXT_FIELD: seq_feat_meta} meta.target = labels if len(labels) == 1: [meta.target] = meta.target meta.label_names = [label.vocab.itos for label in labels] meta.feature_itos_map = { f.vocab_export_name: f.vocab.itos for _, f in meta.features.items() } return meta
def _get_metadata(self, num_doc_classes, num_word_classes): labels = [] if num_doc_classes: vocab = Vocab(Counter()) vocab.itos = ["C_{}".format(i) for i in range(num_doc_classes)] label_meta = FieldMeta() label_meta.vocab_size = num_doc_classes label_meta.vocab = vocab labels.append(label_meta) if num_word_classes: vocab = Vocab(Counter()) vocab.itos = ["W_{}".format(i) for i in range(num_word_classes)] label_meta = FieldMeta() label_meta.vocab_size = num_word_classes label_meta.vocab = vocab label_meta.pad_token_idx = 0 labels.append(label_meta) w_vocab = Vocab(Counter()) dict_vocab = Vocab(Counter()) c_vocab = Vocab(Counter()) d_vocab = Vocab(Counter()) w_vocab.itos = W_VOCAB dict_vocab.itos = DICT_VOCAB c_vocab.itos = CHAR_VOCAB d_vocab.itos = [] text_feat_meta = FieldMeta() text_feat_meta.unk_token_idx = UNK_IDX text_feat_meta.pad_token_idx = PAD_IDX text_feat_meta.vocab_size = W_VOCAB_SIZE text_feat_meta.vocab = w_vocab text_feat_meta.vocab_export_name = "tokens_vals" text_feat_meta.pretrained_embeds_weight = None text_feat_meta.dummy_model_input = TextFeatureField.dummy_model_input dict_feat_meta = FieldMeta() dict_feat_meta.vocab_size = DICT_VOCAB_SIZE dict_feat_meta.vocab = dict_vocab dict_feat_meta.vocab_export_name = "dict_vals" dict_feat_meta.pretrained_embeds_weight = None dict_feat_meta.dummy_model_input = DictFeatureField.dummy_model_input char_feat_meta = FieldMeta() char_feat_meta.vocab_size = CHAR_VOCAB_SIZE char_feat_meta.vocab = c_vocab char_feat_meta.vocab_export_name = "char_vals" char_feat_meta.pretrained_embeds_weight = None char_feat_meta.dummy_model_input = CharFeatureField.dummy_model_input dense_feat_meta = FieldMeta() dense_feat_meta.vocab_size = 0 dense_feat_meta.vocab = d_vocab dense_feat_meta.vocab_export_name = "dense_vals" dense_feat_meta.pretrained_embeds_weight = None # ugh, dims are fixed dense_feat_meta.dummy_model_input = torch.tensor( [[1.0] * DENSE_FEATURE_DIM, [1.0] * DENSE_FEATURE_DIM], dtype=torch.float, device="cpu", ) meta = CommonMetadata() meta.features = { DatasetFieldName.TEXT_FIELD: text_feat_meta, DatasetFieldName.DICT_FIELD: dict_feat_meta, DatasetFieldName.CHAR_FIELD: char_feat_meta, DatasetFieldName.DENSE_FIELD: dense_feat_meta, } meta.target = labels if len(labels) == 1: [meta.target] = meta.target meta.label_names = [label.vocab.itos for label in labels] meta.feature_itos_map = { f.vocab_export_name: f.vocab.itos for _, f in meta.features.items() } return meta