예제 #1
0
    def __init__(self, env, listener, tok):
        self.env = env
        self.feature_size = self.env.feature_size
        self.tok = tok
        self.tok.finalize()
        self.listener = listener

        # Model
        print("VOCAB_SIZE", self.tok.vocab_size())
        self.encoder = model.SpeakerEncoder(self.feature_size +
                                            args.angle_feat_size,
                                            args.rnn_dim,
                                            args.dropout,
                                            bidirectional=args.bidir).cuda()
        self.decoder = model.SpeakerDecoder(self.tok.vocab_size(), args.wemb,
                                            self.tok.word_to_index['<PAD>'],
                                            args.rnn_dim, args.dropout).cuda()
        self.encoder_optimizer = args.optimizer(self.encoder.parameters(),
                                                lr=args.lr)
        self.decoder_optimizer = args.optimizer(self.decoder.parameters(),
                                                lr=args.lr)

        # Evaluation
        self.softmax_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.tok.word_to_index['<PAD>'])

        # Will be used in beam search
        self.nonreduced_softmax_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.tok.word_to_index['<PAD>'],
            size_average=False,
            reduce=False)
예제 #2
0
    def __init__(self, env, listener, tok):
        self.env = env
        self.feature_size = self.env.feature_size
        self.tok = tok
        self.tok.finalize()
        self.listener = listener

        # Model
        self.glove_dim = 300
        with open(
                '/students/u5399302/MatterportData/data/obj_stats2/objectvocab_filtered_dyno4.txt',
                'r') as f_ov:
            self.obj_vocab = [k.strip() for k in f_ov.readlines()]
        glove_matrix = utils.get_glove_matrix(self.obj_vocab, self.glove_dim)
        self.objencoder = ObjEncoder(glove_matrix.size(0),
                                     glove_matrix.size(1),
                                     glove_matrix).cuda()

        print("VOCAB_SIZE", self.tok.vocab_size())
        self.encoder = model.SpeakerEncoder(self.feature_size +
                                            args.angle_feat_size,
                                            args.rnn_dim,
                                            args.dropout,
                                            bidirectional=args.bidir).cuda()
        self.decoder = model.SpeakerDecoder(self.tok.vocab_size(), args.wemb,
                                            self.tok.word_to_index['<PAD>'],
                                            args.rnn_dim, args.dropout).cuda()
        self.encoder_optimizer = args.optimizer(self.encoder.parameters(),
                                                lr=args.lr)
        self.decoder_optimizer = args.optimizer(self.decoder.parameters(),
                                                lr=args.lr)

        # Evaluation
        self.softmax_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.tok.word_to_index['<PAD>'])

        # Will be used in beam search
        self.nonreduced_softmax_loss = torch.nn.CrossEntropyLoss(
            ignore_index=self.tok.word_to_index['<PAD>'],
            size_average=False,
            reduce=False)