Beispiel #1
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        self.vocab = vocab
        V = len(vocab)
        D = n_dim
        self.hidden_dim = n_dim

        #video_encoder_layer = nn.TransformerEncoderLayer(d_model=300, nhead=6, dim_feedforward=1024, dropout=0.1, activation='gelu')
        #self.video_encoder = nn.TransformerEncoder(video_encoder_layer, num_layers=1)
        self.video_encoder = nn.GRU(image_dim + 21,
                                    150,
                                    bidirectional=True,
                                    batch_first=True)

        multimodal_encoder_layer = nn.TransformerEncoderLayer(
            d_model=n_dim,
            nhead=6,
            dim_feedforward=1024,
            dropout=0.5,
            activation='gelu')
        self.transformer = nn.TransformerEncoder(multimodal_encoder_layer,
                                                 num_layers=2)

        self.embedding = nn.Embedding(V, D)
        n_dim = args.n_dim
        image_dim = args.image_dim

        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.language_model = RobertaModel.from_pretrained('roberta-base',
                                                           return_dict=True)
        #for param in self.language_model.base_model.parameters():
        #    param.requires_grad = False

        # Update config to finetune token type embeddings
        #self.language_model.config.type_vocab_size = 3

        # Create a new Embeddings layer, with 2 possible segments IDs instead of 1
        #self.language_model.embeddings.token_type_embeddings = nn.Embedding(3, self.language_model.config.hidden_size)

        # Initialize it
        #self.language_model.embeddings.token_type_embeddings.weight.data.normal_(mean=0.0, std=self.language_model.config.initializer_range)
        '''
        # Freeze the first 10 layers
        modules = [self.language_model.encoder.layer[:10]]
        for module in modules:
            for param in module.parameters():
                param.requires_grad = False
        '''

        #self.cmat = ContextMatching(n_dim * 3)
        #self.lstm_raw = RNNEncoder(300, 150, bidirectional=True, dropout_p=0, n_layers=1, rnn_type="lstm")
        self.lstm_script = RNNEncoder(321,
                                      150,
                                      bidirectional=True,
                                      dropout_p=0,
                                      n_layers=1,
                                      rnn_type="lstm")
        self.script_on = "script" in args.stream_type
        self.vbb_on = "visual_bb" in args.stream_type
        self.vmeta_on = "visual_meta" in args.stream_type
        #self.conv_pool = Conv1d(n_dim*4+1, n_dim*2)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.lang_proj = nn.Linear(768, 300)
        self.visual_proj = nn.Linear(2048, 300)

        #self.mh_video = nn.MultiheadAttention(300, 6)
        #self.context_gru = nn.GRU(300, 150, bidirectional=True, batch_first=True)
        self.cross1 = UtilityLayer(300)
        self.cross2 = UtilityLayer(300)
        self.cross3 = UtilityLayer(300)
        self.context_proj = nn.Linear(5 * 300, 300)

        self.char_classifier = nn.Linear(300, 21)
        self.mask_classifier = nn.Linear(300, self.tokenizer.vocab_size)

        self.output = nn.Linear(300, 1)

        self.answer_rnn = nn.LSTM(300, 300, 1, batch_first=True, dropout=0)

        speaker_name = [
            'None',  # index 0: unknown speaker 
            'Anna',
            'Chairman',
            'Deogi',
            'Dokyung',
            'Gitae',
            'Haeyoung1',
            'Haeyoung2',
            'Heeran',
            'Hun',
            'Jeongsuk',
            'Jinsang',
            'Jiya',
            'Kyungsu',
            'Sangseok',
            'Seohee',
            'Soontack',
            'Sukyung',
            'Sungjin',
            'Taejin',
            'Yijoon'
        ]
        self.speaker_to_index = {
            name: index
            for index, name in enumerate(speaker_name)
        }
        self.index_to_speaker = {
            v: k
            for k, v in self.speaker_to_index.items()
        }

        if self.script_on:
            self.lstm_script = RNNEncoder(321,
                                          150,
                                          bidirectional=True,
                                          dropout_p=0,
                                          n_layers=1,
                                          rnn_type="lstm")
            self.classifier_script = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                   nn.Softmax(dim=1))
            self.mhattn_script = CharMatching(4, D, D)

        if self.vmeta_on:
            self.lstm_vmeta = RNNEncoder(321,
                                         150,
                                         bidirectional=True,
                                         dropout_p=0,
                                         n_layers=1,
                                         rnn_type="lstm")
            self.classifier_vmeta = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                  nn.Softmax(dim=1))
            self.mhattn_vmeta = CharMatching(4, D, D)

        if self.vbb_on:
            self.lstm_vbb = RNNEncoder(image_dim + 21,
                                       150,
                                       bidirectional=True,
                                       dropout_p=0,
                                       n_layers=1,
                                       rnn_type="lstm")
            self.vbb_fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(image_dim, n_dim),
                nn.Tanh(),
            )
            self.classifier_vbb = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                nn.Softmax(dim=1))

            self.mhattn_vbb = CharMatching(4, D, D)
Beispiel #2
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        self.vocab = vocab
        V = len(vocab)
        D = n_dim

        self.hidden_dim = n_dim

        self.embedding = nn.Embedding(V, D)
        n_dim = args.n_dim
        image_dim = args.image_dim

        bert_vocab_size = 30525
        self.bert = BertModel.from_pretrained('bert-base-cased')
        self.bert.resize_token_embeddings(bert_vocab_size)
        self.bert_dim = 768

        self.cmat = ContextMatching(n_dim * 3)
        self.lstm_raw = RNNEncoder(self.bert_dim,
                                   150,
                                   bidirectional=True,
                                   dropout_p=0,
                                   n_layers=1,
                                   rnn_type="lstm")
        self.lstm_script = RNNEncoder(321,
                                      150,
                                      bidirectional=True,
                                      dropout_p=0,
                                      n_layers=1,
                                      rnn_type="lstm")
        self.script_on = "script" in args.stream_type
        self.vbb_on = "visual_bb" in args.stream_type
        self.vmeta_on = "visual_meta" in args.stream_type
        self.conv_pool = Conv1d(n_dim * 4 + 1, n_dim * 2)

        self.util = UtilityLayer(hidden_dim=300,
                                 feedforward_dim=600,
                                 n_head=10,
                                 dropout=0.1)
        self.util2 = UtilityLayer(hidden_dim=300,
                                  feedforward_dim=600,
                                  n_head=10,
                                  dropout=0.1)
        self.summary_s = SummaryAttn(300, 8, 0.1)
        self.summary_m = SummaryAttn(300, 8, 0.1)
        self.summary_b = SummaryAttn(300, 8, 0.1)
        self.summary_q = SummaryAttn(300, 5, 0.1)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.output = nn.Sequential(nn.Linear(4 * 300, 300), nn.PReLU())

        self.linear_addit = nn.Sequential(nn.Linear(1800, 300), nn.PReLU())

        if self.script_on:
            self.lstm_script = RNNEncoder(self.bert_dim + 21,
                                          150,
                                          bidirectional=True,
                                          dropout_p=0,
                                          n_layers=1,
                                          rnn_type="lstm")
            self.classifier_script = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                   nn.Softmax(dim=1))
            self.mhattn_script = CharMatching(4, D, D)

        if self.vmeta_on:
            self.lstm_vmeta = RNNEncoder(self.bert_dim + 21,
                                         150,
                                         bidirectional=True,
                                         dropout_p=0,
                                         n_layers=1,
                                         rnn_type="lstm")
            self.classifier_vmeta = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                  nn.Softmax(dim=1))
            self.mhattn_vmeta = CharMatching(4, D, D)

        if self.vbb_on:
            self.lstm_vbb = RNNEncoder(image_dim + 21,
                                       150,
                                       bidirectional=True,
                                       dropout_p=0,
                                       n_layers=1,
                                       rnn_type="lstm")
            self.vbb_fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(image_dim, n_dim),
                nn.Tanh(),
            )
            self.classifier_vbb = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                nn.Softmax(dim=1))

            self.mhattn_vbb = CharMatching(4, D, D)
Beispiel #3
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        D = n_dim

        self.hidden_dim = n_dim
        n_dim = args.n_dim
        image_dim = args.image_dim

        #bert_vocab_size = 30540
        self.bert_dim = 768
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        #self.bert.resize_token_embeddings(bert_vocab_size)
        for param in self.bert.parameters():
            param.requires_grad = False

        self.cmat = ContextMatching(n_dim * 3)
        self.lstm_raw = RNNEncoder(self.bert_dim,
                                   150,
                                   bidirectional=True,
                                   dropout_p=0,
                                   n_layers=1,
                                   rnn_type="lstm")
        #self.lstm_script = RNNEncoder(321, 150, bidirectional=True, dropout_p=0, n_layers=1, rnn_type="lstm")

        self.util_video = UtilityLayer(hidden_dim=300,
                                       feedforward_dim=1024,
                                       n_head=6,
                                       dropout=0.1)
        self.util_subs = UtilityLayer(hidden_dim=300,
                                      feedforward_dim=1024,
                                      n_head=6,
                                      dropout=0.1)
        self.util_person = UtilityLayer(hidden_dim=300,
                                        feedforward_dim=1024,
                                        n_head=6,
                                        dropout=0.1)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.image_feature_projection = nn.Sequential(nn.Linear(512, 300),
                                                      nn.ReLU())

        self.person_feature_projection = nn.Sequential(nn.Linear(512, 300),
                                                       nn.ReLU())

        self.bert_qa_projection = nn.Sequential(nn.Linear(768, 300), nn.ReLU())

        self.bert_emotion_projection = nn.Sequential(nn.Linear(768, 300),
                                                     nn.ReLU())

        self.bert_script_projection = nn.Sequential(nn.Linear(768, 300),
                                                    nn.ReLU())
Beispiel #4
0
    model_args = checkpoint['args']
    with open(model_args.data, 'rb') as rf:
        data = pickle.load(rf)
    rvd_pairs = data[args.test]
    dic_embed = data['dic_embed']
    def_embed = data['def_embed']
    dic_word2ix = data['dic_word2ix']
    def_word2ix = data['def_word2ix']
    rvd_candidates = set(data['rvd_candidates'])
    rvd_pairs = [(w, s) for w, s in rvd_pairs if w in rvd_candidates]
    print('[1] load data OK:', model_args.data, len(rvd_pairs))

    # load model
    model_args.gpu = -1
    encoder = {
        'gru': RNNEncoder(def_word2ix, model_args),
        'bow': BOWEncoder(def_word2ix, model_args)
    }[model_args.rnn]
    encoder.load_state_dict(checkpoint['state_dict'])
    print('[2] load model OK!')

    # load ground embed; calculate output embed
    sens = [s for _, s in rvd_pairs]
    grd_words = [w for w, _ in rvd_pairs]
    out_embs = encoder.estimate_from_idxsens(sens)
    out_embs = util.normalize_matrix_by_row(out_embs)
    print('[3] calculate embedding OK!')

    # output file
    outdir = '../output'
    util.mkdir(outdir)
Beispiel #5
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        self.vocab = vocab
        V = len(vocab)
        D = n_dim

        self.hidden_dim = n_dim

        self.embedding = nn.Embedding(V, D)
        n_dim = args.n_dim
        image_dim = args.image_dim

        self.cmat = ContextMatching(n_dim * 3)
        self.lstm_raw = RNNEncoder(300,
                                   150,
                                   bidirectional=True,
                                   dropout_p=0,
                                   n_layers=1,
                                   rnn_type="lstm")
        self.lstm_script = RNNEncoder(321,
                                      150,
                                      bidirectional=True,
                                      dropout_p=0,
                                      n_layers=1,
                                      rnn_type="lstm")
        self.script_on = "script" in args.stream_type
        self.vbb_on = "visual_bb" in args.stream_type
        self.vmeta_on = "visual_meta" in args.stream_type
        self.conv_pool = Conv1d(n_dim * 4 + 1, n_dim * 2)

        self.util = UtilityLayer(hidden_dim=300,
                                 feedforward_dim=1024,
                                 n_head=10,
                                 dropout=0.1)
        self.util2 = UtilityLayer(hidden_dim=300,
                                  feedforward_dim=1024,
                                  n_head=10,
                                  dropout=0.1)
        self.util3 = UtilityLayer(hidden_dim=300,
                                  feedforward_dim=1024,
                                  n_head=10,
                                  dropout=0.1)
        self.summary_s = SummaryAttn(300, 3, 0.1)
        self.summary_m = SummaryAttn(300, 3, 0.1)
        self.summary_b = SummaryAttn(300, 3, 0.1)
        self.summary_f = SummaryAttn(300, 3, 0.1)
        self.summary_q = SummaryAttn(300, 3, 0.1)
        self.summary_addit = SummaryAttn(300, 3, 0.1)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.image_projection = nn.Sequential(nn.Linear(512, 300), nn.PReLU())

        self.output = nn.Sequential(nn.Linear(4 * 300, 300), nn.PReLU())

        self.linear_addit = nn.Sequential(nn.Linear(1800 + 3, 300), nn.PReLU())

        self.mh_bb = nn.MultiheadAttention(embed_dim=300, num_heads=6)
        self.mh_script = nn.MultiheadAttention(embed_dim=300, num_heads=6)
        self.mh_meta = nn.MultiheadAttention(embed_dim=300, num_heads=6)
        self.mh_answers = nn.MultiheadAttention(embed_dim=300, num_heads=6)

        if self.script_on:
            self.lstm_script = RNNEncoder(321,
                                          150,
                                          bidirectional=True,
                                          dropout_p=0,
                                          n_layers=1,
                                          rnn_type="lstm")
            self.classifier_script = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                   nn.Softmax(dim=1))
            self.mhattn_script = CharMatching(4, D, D)

        if self.vmeta_on:
            self.lstm_vmeta = RNNEncoder(321,
                                         150,
                                         bidirectional=True,
                                         dropout_p=0,
                                         n_layers=1,
                                         rnn_type="lstm")
            self.classifier_vmeta = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                  nn.Softmax(dim=1))
            self.mhattn_vmeta = CharMatching(4, D, D)

        if self.vbb_on:
            self.lstm_vbb = RNNEncoder(image_dim + 21,
                                       150,
                                       bidirectional=True,
                                       dropout_p=0,
                                       n_layers=1,
                                       rnn_type="lstm")
            self.vbb_fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(image_dim, n_dim),
                nn.Tanh(),
            )
            self.classifier_vbb = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                nn.Softmax(dim=1))

            self.mhattn_vbb = CharMatching(4, D, D)
Beispiel #6
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        self.vocab = vocab
        V = len(vocab)
        D = n_dim

        # set appropriate CLS, SEP tokens here (from BERT tokenizer)
        self.CLS = 101
        self.SEP = 102

        self.hidden_dim = n_dim

        self.embedding = nn.Embedding(V, D)
        n_dim = args.n_dim
        image_dim = args.image_dim

        #bert_vocab_size = 30543
        self.bert_dim = 768
        #self.bert = BertModel.from_pretrained('bert-base-uncased')
        self.bert = RobertaModel.from_pretrained('roberta-base')
        #self.bert.resize_token_embeddings(bert_vocab_size)
        for param in self.bert.parameters():
            param.requires_grad = False

        self.cmat = ContextMatching(n_dim * 3)
        self.lstm_raw = RNNEncoder(self.bert_dim,
                                   150,
                                   bidirectional=True,
                                   dropout_p=0,
                                   n_layers=1,
                                   rnn_type="lstm")
        #self.lstm_script = RNNEncoder(321, 150, bidirectional=True, dropout_p=0, n_layers=1, rnn_type="lstm")
        self.script_on = "script" in args.stream_type
        self.vbb_on = "visual_bb" in args.stream_type
        self.vmeta_on = "visual_meta" in args.stream_type
        self.conv_pool = Conv1d(n_dim * 4 + 1, n_dim * 2)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.visual_projection = nn.Sequential(nn.Linear(512, 300), nn.ReLU())

        self.person_projection = nn.Sequential(nn.Linear(512, 300), nn.ReLU())

        self.output = nn.Sequential(nn.Linear(768, 1), nn.PReLU())

        if self.script_on:
            #self.lstm_script = RNNEncoder(321, 150, bidirectional=True, dropout_p=0, n_layers=1, rnn_type="lstm")
            self.lstm_script = RNNEncoder(self.bert_dim + 21,
                                          150,
                                          bidirectional=True,
                                          dropout_p=0,
                                          n_layers=1,
                                          rnn_type="lstm")
            self.classifier_script = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                   nn.Softmax(dim=1))
            self.mhattn_script = CharMatching(4, D, D)

        if self.vmeta_on:
            #self.lstm_vmeta = RNNEncoder(321, 150, bidirectional=True, dropout_p=0, n_layers=1, rnn_type="lstm")
            self.lstm_vmeta = RNNEncoder(self.bert_dim + 21,
                                         150,
                                         bidirectional=True,
                                         dropout_p=0,
                                         n_layers=1,
                                         rnn_type="lstm")
            self.classifier_vmeta = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                  nn.Softmax(dim=1))
            self.mhattn_vmeta = CharMatching(4, D, D)

        if self.vbb_on:
            self.lstm_vbb = RNNEncoder(image_dim + 21,
                                       150,
                                       bidirectional=True,
                                       dropout_p=0,
                                       n_layers=1,
                                       rnn_type="lstm")
            self.vbb_fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(image_dim, n_dim),
                nn.Tanh(),
            )
            self.classifier_vbb = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                nn.Softmax(dim=1))

            self.mhattn_vbb = CharMatching(4, D, D)
Beispiel #7
0
    def __init__(self,
                 args,
                 vocab,
                 n_dim,
                 image_dim,
                 layers,
                 dropout,
                 num_choice=5):
        super().__init__()
        self.vocab = vocab
        V = len(vocab)
        D = n_dim

        self.hidden_dim = n_dim

        #self.bert = BertModel.from_pretrained('bert-base-uncased')

        encoder_layer = nn.TransformerEncoderLayer(d_model=n_dim,
                                                   nhead=6,
                                                   dim_feedforward=1024,
                                                   dropout=0.5,
                                                   activation='gelu')
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=2)
        #self.transformer = nn.Transformer(d_model=n_dim, nhead=6)

        self.embedding = nn.Embedding(V, D)
        n_dim = args.n_dim
        image_dim = args.image_dim

        self.tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
        self.language_model = RobertaModel.from_pretrained('roberta-base',
                                                           return_dict=True)
        for param in self.language_model.base_model.parameters():
            param.requires_grad = False

        self.cmat = ContextMatching(n_dim * 3)
        self.lstm_raw = RNNEncoder(300,
                                   150,
                                   bidirectional=True,
                                   dropout_p=0,
                                   n_layers=1,
                                   rnn_type="lstm")
        self.lstm_script = RNNEncoder(321,
                                      150,
                                      bidirectional=True,
                                      dropout_p=0,
                                      n_layers=1,
                                      rnn_type="lstm")
        self.script_on = "script" in args.stream_type
        self.vbb_on = "visual_bb" in args.stream_type
        self.vmeta_on = "visual_meta" in args.stream_type
        self.conv_pool = Conv1d(n_dim * 4 + 1, n_dim * 2)

        self.character = nn.Parameter(torch.randn(22,
                                                  D,
                                                  device=args.device,
                                                  dtype=torch.float),
                                      requires_grad=True)
        self.norm1 = Norm(D)

        self.output = nn.Sequential(nn.Linear(5 * 300, 5), nn.Softmax(dim=1))

        if self.script_on:
            self.lstm_script = RNNEncoder(321,
                                          150,
                                          bidirectional=True,
                                          dropout_p=0,
                                          n_layers=1,
                                          rnn_type="lstm")
            self.classifier_script = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                   nn.Softmax(dim=1))
            self.mhattn_script = CharMatching(4, D, D)

        if self.vmeta_on:
            self.lstm_vmeta = RNNEncoder(321,
                                         150,
                                         bidirectional=True,
                                         dropout_p=0,
                                         n_layers=1,
                                         rnn_type="lstm")
            self.classifier_vmeta = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                  nn.Softmax(dim=1))
            self.mhattn_vmeta = CharMatching(4, D, D)

        if self.vbb_on:
            self.lstm_vbb = RNNEncoder(image_dim + 21,
                                       150,
                                       bidirectional=True,
                                       dropout_p=0,
                                       n_layers=1,
                                       rnn_type="lstm")
            self.vbb_fc = nn.Sequential(
                nn.Dropout(0.5),
                nn.Linear(image_dim, n_dim),
                nn.Tanh(),
            )
            self.classifier_vbb = nn.Sequential(nn.Linear(n_dim * 2, 1),
                                                nn.Softmax(dim=1))

            self.mhattn_vbb = CharMatching(4, D, D)
                            map_location=lambda storage, loc: storage)

    # load data
    model_args = checkpoint['args']
    with open(model_args.data, 'rb') as rf:
        data = pickle.load(rf)
    dic_embed = data['dic_embed']
    def_embed = data['def_embed']
    dic_word2ix = data['dic_word2ix']
    def_word2ix = data['def_word2ix']
    rvd_candidates = set(data['rvd_candidates'])
    print('[1] load data OK:', model_args.data)

    # load model
    model_args.gpu = args.gpu
    encoder = RNNEncoder(def_word2ix, model_args)
    if args.gpu > -1:
        encoder = encoder.cuda()
    encoder.load_state_dict(checkpoint['state_dict'])
    print('[2] load model OK!')

    # load definition data
    with open(args.data, 'rb') as rf:
        word2strsen = pickle.load(rf)
    word2emb = dict()
    for word, strsen in tqdm(word2strsen.items(), ncols=10):
        strsen = strsen.replace('-', ' ')
        strsen = re.sub(' +', ' ', strsen)
        strsen = ' '.join(word_tokenize(strsen))
        word2emb[word] = encoder.estimate_from_strsens(strsen)[0]
Beispiel #9
0
                            map_location=lambda storage, loc: storage)

    # load data
    model_args = checkpoint['args']
    with open(model_args.data, 'rb') as rf:
        data = pickle.load(rf)
    dic_embed = data['dic_embed']
    def_embed = data['def_embed']
    dic_word2ix = data['dic_word2ix']
    def_word2ix = data['def_word2ix']
    train_pairs = data['train_pairs']
    print('[1] load data OK:', model_args.data)

    # load model
    model_args.gpu = args.gpu
    encoder = RNNEncoder(def_word2ix, model_args)
    if args.gpu > -1:
        torch.cuda.set_device(args.gpu)
        encoder = encoder.cuda()
    encoder.load_state_dict(checkpoint['state_dict'])
    print('[2] load model OK!')

    # get word2sens
    word2sens = defaultdict(list)
    for word, sen in train_pairs:
        word2sens[word].append(sen)
    word2sens = dict(word2sens)

    # get topk
    new_train_pairs = list()
    new_word2sens = defaultdict(list)
Beispiel #10
0
        random.shuffle(train_pairs)
        train_pairs = train_pairs[:args.num_train]
        valid_pairs = valid_pairs[:args.num_train]
    num_train_pairs = len(train_pairs)
    num_train_vocab = len({word for word, _ in train_pairs})

    train_grd_embed = [dic_embed[w] for w, _ in train_pairs]
    valid_grd_embed = [dic_embed[w] for w, _ in valid_pairs]

    # init model and dataset
    if args.rnn == 'bow':
        encoder = BOWEncoder(def_word2ix, args)
        validset = dataset.get_bow_dataset(valid_pairs, dic_embed, def_embed, def_word2ix, args.batch_size)
        trainset = dataset.get_bow_dataset(train_pairs, dic_embed, def_embed, def_word2ix, args.batch_size)
    elif args.rnn == 'gru' or args.rnn == 'lstm':
        encoder = RNNEncoder(def_word2ix, args)
        trainset = dataset.get_padded_dataset(train_pairs, dic_embed, args.pad_size, def_word2ix, args.batch_size, is_train=True)
        validset = dataset.get_padded_dataset(valid_pairs, dic_embed, args.pad_size, def_word2ix, args.batch_size, is_train=False)
    encoder.init_def_embedding(def_embed)
    print('load data: {} train, {} valid'.format(len(train_pairs), len(valid_pairs)))

    # optimizer
    lr = args.lr
    params = list(filter(lambda t: t.requires_grad, encoder.parameters()))
    print('# params:', len(params))
    opts = {
        'Adam': optim.Adam(params, lr=lr),
        'Adadelta': optim.Adadelta(params, lr=1),
        'SGD': optim.SGD(params, lr=lr, momentum=0.9),
    }
    optimizer = opts[args.optim]