Beispiel #1
0
 def add_pretrain(self, vocabulary, vocabulary_inv):
     print 'model_variaton:', self.model_variation
     if self.model_variation == 'pretrain':
         embedding_weights = load_word2vec(self.pretrain_type,
                                           vocabulary_inv,
                                           self.embedding_dim)
     elif self.model_variation == 'random':
         embedding_weights = None
     else:
         raise ValueError('Unknown model variation')
     self.embedding_weights = embedding_weights
     self.vocab_size = len(vocabulary)
def eval(fold_ix=1):
    test_data = data.Data(
        fold=fold_ix, is_test_data=True
    )  # the dataset merges self.real_U_file and self.fake_U_file
    test_data_loader = DataLoader(test_data,
                                  batch_size=50,
                                  shuffle=True,
                                  num_workers=8)
    # self.discriminator(validation_data_loader)

    X_trn, Y_trn, Y_trn_o, X_tst, Y_tst, Y_tst_o, vocabulary, vocabulary_inv = data_helpers.load_data(
        fold=fold_ix)

    X_trn = X_trn.astype(np.int32)
    X_tst = X_tst.astype(np.int32)
    Y_trn = Y_trn.astype(np.int32)
    Y_tst = Y_tst.astype(np.int32)

    args.num_classes = Y_trn.shape[1]

    embedding_weights = load_word2vec('glove',
                                      vocabulary_inv,
                                      num_features=300)

    capsule_net = CapsNet_Text(args, embedding_weights)
    path = 'save/model/fold' + str(fold_ix) + '/nlp-capsule50.pth'
    capsule_net.load_state_dict(torch.load(path))
    # discriminator.eval()

    with torch.no_grad():
        for _, (input, labels) in enumerate(test_data_loader):
            # input, labels = input, labels
            _, preds = capsule_net(
                input, labels)  # (None, vocab_size, sequence_len+1)

            # print('preds without round: ' , preds.squeeze(2))

            preds = preds.round().squeeze(2)
            # print('input : ', input)
            # print('preds squeeze 2 : ', preds)
            # print('labels: ', labels)

            # print('preds 0: ' , preds.shape[0])
            # print('preds 1: ' , preds.shape[1])
            # print('labels 0: ', labels.shape[0])
            # print('labels 1: ', labels.shape[1])

            report = metrics.classification_report(labels, preds)
            print('report on fold ' + str(fold_ix) + ': ')
            print(report)
    def add_pretrain(self, vocabulary, vocabulary_inv):
        print('model_variaton:', self.model_variation)
        if self.model_variation == 'pretrain':
            embedding_weights = load_word2vec(self.pretrain_type,
                                              vocabulary_inv,
                                              self.embedding_dim)
        elif self.model_variation == 'random':
            embedding_weights = None
        else:
            raise ValueError('Unknown model variation')
        self.embedding_weights = embedding_weights
        self.vocab_size = len(vocabulary)

        self.embedding = nn.Embedding(num_embeddings=self.vocab_size,
                                      embedding_dim=self.embedding_dim)
        if self.model_variation == "pretrain":
            self.embedding.weight.data.copy_(
                torch.from_numpy(embedding_weights).type(torch.FloatTensor))
            self.embedding.weight.requires_grad = False
        elif self.model_variation != 'random':
            raise NotImplementedError('Unknown model_variation')
Beispiel #4
0
        # train_one_step(model_shared, model_PDN, PDN_Trloader, args, device, optimizer, current_lr, criterion_bce, "bce")
        # test_loss_minPDN, Accs = test_one_step_multilabel(model_shared, model_PDN, PDN_Teloader, args, device, test_loss_minPDN, criterion_bce)

        # if test_loss_min<test_loss_flag:
        #     final = Accs
        #     test_loss_flag = test_loss_min
    # return final, test_loss_flag


if __name__ == "__main__":
    print("==== Loading Data ...")
    # train_set, test_set, dict_word, dict_word_reverse, seq_length_words, vocab_size = load_data(args)
    Sent_Train, Sent_Test, PND_Train, PND_Test, dict_word, dict_word_reverse, vocab_size = load_data(
        args)
    embedding_weights = load_word2vec('glove', dict_word, args.embedding_dim)

    # fw_res = open("res.txt","w")

    device = torch.device("cuda")
    model_input = MTL_Input(args, device, vocab_size, embedding_weights)
    model_shared = SoGMTL(args, device)
    # model_shared = CNN_base(args, device, vocab_size, embedding_weights)
    model_Sent = Sent_NN(args, output_dim=6)
    model_PDN = PDN_NN(args, output_dim=5)
    model_parameters = []
    model_parameters_input = filter(lambda p: p.requires_grad,
                                    model_input.parameters())
    model_parameters_shared = filter(lambda p: p.requires_grad,
                                     model_shared.parameters())
    model_parameters_Sent = filter(lambda p: p.requires_grad,
    Epochs=list(range(1, num_epochs+1)) 
    Loss_data = {'Epochs': Epochs, 'Loss': Loss}

    df = pd.DataFrame(Loss_data, columns = ['Epochs', 'Loss'])
    df.to_csv(path)



X_trn, Y_trn, X_tst, Y_tst, vocabulary, vocabulary_inv = datasets.get_cross_domain_doctor_doctor()
X_trn = X_trn.astype(np.int32)
X_tst = X_tst.astype(np.int32)
Y_trn = Y_trn.astype(np.int32)
Y_tst = Y_tst.astype(np.int32)

args.num_classes = Y_trn.shape[1]
embedding_weights = load_word2vec('glove', vocabulary_inv, num_features=300)
    
capsule_net = CapsNet_Text(args, embedding_weights)
current_lr = args.learning_rate
optimizer = Adam(capsule_net.parameters(), lr=current_lr)
# capsule_net = nn.DataParallel(capsule_net).cuda()


losses = []
for epoch in range(args.num_epochs):
    # torch.cuda.empty_cache()

    nr_trn_num = X_trn.shape[0]
    nr_batches = int(np.ceil(nr_trn_num / float(args.tr_batch_size)))

    if epoch > args.learning_rate_decay_start and args.learning_rate_decay_start >= 0:
Beispiel #6
0
args = parser.parse_args()
params = vars(args)
print(json.dumps(params, indent = 2))

X_trn, Y_trn, Y_trn_o, X_tst, Y_tst, Y_tst_o, vocabulary, vocabulary_inv = data_helpers.load_data(args.dataset,
                                                                           max_length=args.sequence_length,
                                                                           vocab_size=args.vocab_size)
Y_trn = Y_trn.toarray()
Y_tst = Y_tst.toarray()

X_trn = X_trn.astype(np.int32)
X_tst = X_tst.astype(np.int32)
Y_trn = Y_trn.astype(np.int32)
Y_tst = Y_tst.astype(np.int32)

embedding_weights = load_word2vec('glove', vocabulary_inv, args.vec_size)

args.num_classes = Y_trn.shape[1]

capsule_net = CapsNet_Text(args, embedding_weights)
capsule_net = nn.DataParallel(capsule_net).cuda()


def transformLabels(labels):
    label_index = list(set([l for _ in labels for l in _]))
    label_index.sort()

    variable_num_classes = len(label_index)
    target = []
    for _ in labels:
        tmp = np.zeros([variable_num_classes], dtype=np.float32)