def train():
    dataset = trainDataset()
    model = MainModel(dataset.vocab_size()[0], dataset.vocab_size()[1])
    #model.load_state_dict(torch.load("model2.pth"))
    model = model.cuda()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=1,
                                             pin_memory=True)

    for epoch in range(EPOCHS):
        print(f"EPOCH: {epoch + 1}/{EPOCHS}")
        losses = []

        for idx, data in tqdm(enumerate(dataloader)):
            outputs = model(data["source"].cuda(), data["target"].cuda(),
                            data["alignment"].cuda())
            loss = torch.nn.functional.binary_cross_entropy(
                outputs.view(-1), data["predictions"].cuda().view(-1).float())
            # print(loss.item())
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            losses.append(loss.detach())
        print(f"Mean Loss for Epoch: {epoch} is {sum(losses) / len(losses)}")
        torch.save(model.state_dict(), f"model_lstm.pth")
Example #2
0
    def __init__(self, num_classes):
        #debug
        self.seed = 13
        self.seed_torch()

        self.debug_mode = False
        # info about data:
        self.dataset_dir = Path('./data/origin_data/')
        self.preprocessed_dir = Path('./data/preprocessed_data/')
        self.csv_dir = Path('./data/csv_files')
        self.csv_file = {
            'train_curated': self.csv_dir / 'train_curated.csv',
            'train_noisy': self.csv_dir / 'trn_noisy_best50s.csv',
            'sample_submission': self.csv_dir / 'sample_submission.csv'
        }
        self.dataset = {
            'train_curated': self.dataset_dir / 'train_curated',
            'train_noisy': self.dataset_dir / 'train_noisy',
            'test': self.dataset_dir / 'test'
        }

        self.mels = {
            'train_curated': self.preprocessed_dir / 'mels_train_curated.pkl',
            'train_noisy':
            self.preprocessed_dir / 'mels_trn_noisy_best50s.pkl',
            'test': self.preprocessed_dir /
            'mels_test.pkl',  # NOTE: this data doesn't work at 2nd stage
        }

        self.num_classes = num_classes

        self.model = MainModel('Simple', num_classes=self.num_classes).model

        # info about CPU:
        self.n_jobs = cpu_count() // 2 + 4  # save 4 threads for work
        os.environ['MKL_NUM_THREADS'] = str(self.n_jobs)
        os.environ['OMP_NUM_THREADS'] = str(self.n_jobs)

        # preprocessing parameters:

        self.preprocessing_type = 'log_melspectrogram'

        self.sampling_rate = 44100
        self.duration = 6  # in seconds
        self.n_mels = 128  # mel coefficients
        self.hop_length = 347 // 128 * self.n_mels * self.duration  # to make time steps 128
        self.fmin = 20  # minimum frequency
        self.fmax = self.sampling_rate // 2  # maximum frequency
        self.n_fft = self.n_mels * 100  # fft coeffs
        self.padmode = 'constant'  # padding for made
        self.samples = self.sampling_rate * self.duration  # elements in one audio file
        self.window_type = 'hann'

        # neural net info
        self.num_epochs = 500
        self.batch_size = 64
        self.test_batch_size = 256
        self.lr = 3e-3
        self.eta_min = 1e-5
        self.t_max = 20
Example #3
0
def main ():
    from model import MainModel
    from controller import MainController
    from view import MainView

    m = MainModel()
    v = MainView()
    c = MainController(m, v)

    gtk.main()
Example #4
0
def main():
    embeddings = {
        'word':
        np.array([[0., 0., 0.], [0.4, 0.5, 0.6], [-0.1, -0.2, -0.3],
                  [0.1, -0.4, 0.8]],
                 dtype='float32'),
        'anchor':
        np.array([[0., 0., 0., 0.], [0.1, 0.1, -0.3, 0.2],
                  [-0.3, 0.2, 0.2, -0.3], [-0.1, -0.6, 0.6, -0.3],
                  [-0.3, 0.2, 0.1, -0.3], [0.1, 0.0, -0.3, 0.2]],
                 dtype='float32')
    }

    params = {
        'embeddings': embeddings,
        'update_embs': True,
        'features': OrderedDict([('word', 0), ('anchor', 0)]),
        'features_dim': OrderedDict([('word', 3), ('anchor', 4)]),
        'use_bin_fea': False,
        'len_contexts': 5,
        'num_category': 10,
        'wed_window': 2,
        'cnn_filter_num': 3,
        'cnn_filter_wins': [2],
        'cnn_multilayer_nn': [4],
        'batch_size': 3,
        'dropout': 0.0,
        'regularizer': 0.0,
        'lr': 0.01,
        'norm_lim': 0.0,
        'optimizer': 'adadelta',
        'kGivens': dict()
    }

    words = [[0, 1, 2, 3, 0], [1, 3, 2, 1, 1], [2, 2, 3, 2, 2]]

    anchor = [[0, 2, 3, 4, 0], [1, 2, 3, 4, 5], [1, 2, 3, 4, 5]]

    anchor_position = [2, 2, 2]

    candidate = [[2, 0, 5, 0, 0], [3, 1, 2, 4, 0], [4, 3, 6, 7, 8]]

    key = [0, 1, 2]

    M = MainModel(params)

    print '\nTraining ...\n'
    for i in range(1000):
        cost = M.f_grad_shared(words, anchor, anchor_position, candidate, key)
        M.f_update_param(params['lr'])
        for fea in M.container['embeddings']:
            M.container['set_zero'][fea](M.container['zero_vecs'][fea])
        print '>>> Epoch', i, ': cost = ', cost

    print '\nTesting ...\n'
    print M.f_pred(words, anchor, anchor_position, candidate)
Example #5
0
def main(train_dir,
         val_dir,
         checkpoint_dir,
         batch_size,
         image_size=512,
         num_epochs=10,
         checkpoint_name=None,
         num_workers=1,
         pin_memory=True,
         log_dir="logs",
         model_name=None,
         train_csv=None,
         val_csv=None):

    # declare datasets
    train_ds = DataFolder(root_dir=train_dir,
                          transform=transform(image_size, is_training=True),
                          csv_path=train_csv)
    val_ds = DataFolder(root_dir=val_dir,
                        transform=transform(image_size, is_training=False),
                        csv_path=val_csv)
    train_loader = DataLoader(train_ds,
                              batch_size=batch_size,
                              num_workers=num_workers,
                              pin_memory=pin_memory,
                              shuffle=True)
    val_loader = DataLoader(val_ds,
                            batch_size=batch_size,
                            num_workers=num_workers,
                            pin_memory=pin_memory,
                            shuffle=True)

    #init model
    model = MainModel(128, model_name)

    # configure parameter
    loss_fn = nn.CrossEntropyLoss()
    model = model.to(device)
    optimizer = optim.Adam(model.parameters(), lr=1e-4)
    scaler = torch.cuda.amp.GradScaler()
    # checkpoint = {'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}
    # save_checkpoint(checkpoint, os.path.join(checkpoint_dir, f"checkpoint_initialilze.pth.tar"))
    # return

    if checkpoint_name:
        ckp_path = os.path.join(checkpoint_dir, checkpoint_name)
        load_checkpoint(torch.load(ckp_path), model, optimizer)

    check_accuracy(val_loader, model, device)

    #training
    for epoch in range(num_epochs):
        train_fn(train_loader,
                 model,
                 optimizer,
                 loss_fn,
                 scaler,
                 device,
                 epoch,
                 log_dir=log_dir)
        check_accuracy(val_loader, model, device)
        checkpoint = {
            'state_dict': model.state_dict(),
            'optimizer': optimizer.state_dict()
        }
        save_checkpoint(
            checkpoint,
            os.path.join(checkpoint_dir, f"checkpoint_{epoch}.pth.tar"))
Example #6
0
def main(path_in='/scratch/wl1191/codes/data/Semcor_processed/',
         path_out='/scratch/wl1191/codes/out/',
         path_key='/scratch/wl1191/codes/data/Semcor/',
         path_scorer_event='/scratch/wl1191/codes/scorers/eventScorer.py',
         path_scorer_wsd='/scratch/wl1191/codes/scorers/scorer2',
         path_kGivens='/scratch/wl1191/codes/out0-14/params14.pkl',
         update_embs=True,
         use_bin_fea=False,
         wed_window=2,
         cnn_filter_num=300,
         cnn_filter_wins=(2, 3, 4, 5),
         cnn_multilayer_nn=(1200, ),
         batch_size=50,
         regularizer=0.0,
         lr=0.01,
         lr_decay=False,
         norm_lim=9.0,
         optimizer='adadelta',
         dropout=0.5,
         seed=3435,
         nepochs=20):

    # Prepare parameters
    embeddings, map_item2idx, features_dim, maxes, len_contexts = cPickle.load(
        open(path_in + 'data.pkl', 'r'))
    map_idx2cate = dict(
        (v, k) for k, v in map_item2idx['category'].iteritems())

    features = OrderedDict([('word', 0), ('anchor', 0)])
    if use_bin_fea:
        features['bin_fea'] = 1

    kGivens = dict()
    if path_kGivens is not None:
        kGivens = cPickle.load(open(path_kGivens, 'r'))

    params = {
        'update_embs': update_embs,
        'features': features,
        'features_dim': features_dim,
        'use_bin_fea': use_bin_fea,
        'len_contexts': len_contexts,
        'num_category': len(map_item2idx['category']),
        'wed_window': wed_window,
        'cnn_filter_num': cnn_filter_num,
        'cnn_filter_wins': list(cnn_filter_wins),
        'cnn_multilayer_nn': list(cnn_multilayer_nn),
        'batch_size': batch_size,
        'dropout': dropout,
        'regularizer': regularizer,
        'lr': lr,
        'norm_lim': norm_lim,
        'optimizer': optimizer,
        'kGivens': kGivens
    }

    print 'Saving model configuration ...'
    cPickle.dump(params, open(path_out + 'model_config.pkl', 'w'))

    params['embeddings'] = embeddings

    # Prepare datasets
    datasets_names = [
        'train', 'valid', 'sense02', 'sense03', 'sense07', 'eventValid',
        'eventTest'
    ]
    # datasets_names = ['train', 'valid']
    datasets = {}
    for dn in datasets_names:
        datasets[dn] = TextIterator(path_in + dn + '.dat', maxes, len_contexts,
                                    batch_size, dn != 'train')

    print 'Building model ...'
    np.random.seed(seed)
    random.seed(seed)
    model = MainModel(params)

    data_train = datasets['train']
    data_evaluate = OrderedDict([('valid', datasets['valid']),
                                 ('sense02', datasets['sense02']),
                                 ('sense03', datasets['sense03']),
                                 ('sense07', datasets['sense07']),
                                 ('eventValid', datasets['eventValid']),
                                 ('eventTest', datasets['eventTest'])])

    perfs = OrderedDict()
    best_perf = OrderedDict()
    best_f1 = -np.inf
    best_epoch = -1
    curr_lr = lr
    sys.stdout.flush()
    for epoch in xrange(nepochs):
        train(model, data_train, features, params['lr'], epoch)
        for eval_name in data_evaluate:
            inst_ids, types, predictions = predict(model,
                                                   data_evaluate[eval_name],
                                                   features, map_idx2cate)
            file_name = path_out + eval_name + '.pred' + str(epoch)
            write_out(file_name, inst_ids, types, predictions)
            perfs[eval_name] = score(file_name, path_key + eval_name + '.key',
                                     eval_name, path_scorer_event,
                                     path_scorer_wsd)

        print '\n', 'Saving parameters'
        model.save(path_out + 'params' + str(epoch) + '.pkl')

        print print_perf(perfs)
        if perfs['valid']['f1'] > best_f1:
            best_f1 = perfs['valid']['f1']
            best_epoch = epoch
            for data_eval in perfs:
                best_perf[data_eval] = perfs[data_eval]
            print 'NEW BEST: Epoch', epoch

        # learning rate decay if no improvement in 10 epochs
        if lr_decay and abs(best_epoch - epoch) >= 10:
            curr_lr *= 0.5
        if curr_lr < 1e-5:
            break
        sys.stdout.flush()

    print '\n', '=' * 80, '\n'
    print 'BEST RESULT: Epoch', best_epoch
    print_perf(best_perf, 'Best Performance')
    def __init__(self, batch_size=32, lr=0.00015, p_horizontalflip=0.4, model_type='ResNet101', training_mode='only_new'):
        ## INFO ABOUT EXPERIMENT
        self.logsFileName = 'LOGS'
        self.logsFileName_finetuning = 'LOGS_finetuning'
        self.seed = 13

        seed_torch(self.seed)

        if os.path.exists('./Logs/' + self.logsFileName + '.csv'):
            if training_mode == 'only_new':
                self.df_logger = Logger(self.logsFileName + '.csv', 'df')
                self.experiment_name = 'exp{}'.format(len(self.df_logger.logsFile)) + '_end_epoch'
                self.df_logger.save()
            elif training_mode == 'finetuning':
                self.df_logger = Logger(self.logsFileName_finetuning + '.csv', 'df')
                self.experiment_name = 'exp{}'.format(len(self.df_logger.logsFile)) + '_end_epoch'
                self.df_logger.save()
        else:
            self.experiment_name = 'exp{}'.format(0) + '_end_epoch'
        self.exper_type = 'data_imgsize_300'
        self.img_size = 300
        # self.img_size_crop = 300

        ## MODEL PARAMETERS
        self.weights_dir = './Model_weights/'
        self.weights_dir_finetuning = './Model_weights_finetuning/'
        self.model_type = model_type

        self.model = MainModel(model_type=self.model_type).model

        self.pytorch_total_params = sum(p.numel() for p in self.model.parameters() if p.requires_grad)

        self.lr = lr
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr, weight_decay=1e-5)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, factor=0.5, patience=2, verbose=True)
        self.criterion = nn.MSELoss()#AdaptiveLossFunction(num_dims=1, float_dtype=np.float32, device='cuda:0')
        # self.num_classes = 5
        self.model_param_list = [self.model, self.optimizer, self.scheduler]

        ## EARLY STOPPING
        self.early_stopping_patience = 10
        self.early_stopping = EarlyStopping(self.early_stopping_patience)
        self.early_stopping_loss = 'pytorch' #kappa

        ## TRAINING & VALIDATION SETUP

        self.num_workers = 16
        self.n_epochs = 200
        self.batch_size = batch_size
        self.valid_type = 'holdout' #CV
        self.valid_size = 0.2
        self.n_folds = 5 ## for CV!



        ## TRANSFORMER AND DATASET
        self.p_horizontalflip = p_horizontalflip
        self.data_type = 'new'

        ## PRINT FREQUENCY
        self.print_frequency = 50
Example #8
0
def main(test_dir,
         checkpoint_path,
         batch_size,
         num_workers=1,
         pin_memory=True,
         test_csv=None,
         model_name='efficientnet-b3'):

    # declare datasets
    test_ds = DataFolder(root_dir=test_dir,
                         transform=transform(is_training=False),
                         is_test=True,
                         csv_path=test_csv)
    test_loader = DataLoader(test_ds,
                             batch_size=batch_size,
                             num_workers=num_workers,
                             pin_memory=pin_memory,
                             shuffle=True)

    #init model
    model = MainModel(test_ds.__num_class__(), model_name)
    model = model.to(device)

    # load checkpoint
    load_checkpoint(torch.load(checkpoint_path), model)

    model.eval()

    iterator = tqdm(test_loader)

    num_correct = 0
    num_samples = 0

    preds = []
    groundtruths = []
    print(test_ds.class_names)

    with torch.no_grad():
        for x, y, image_paths in iterator:

            #convert to device
            x = x.to(device=device)
            y = y.to(device=device)

            # inference
            scores = torch.sigmoid(model(x))

            # get prediction
            max_score = torch.argmax(scores, dim=1)

            # add to global comparing value
            preds += max_score.to("cpu").numpy().tolist()
            groundtruths += y.to("cpu").numpy().tolist()

            #calculate score
            predictions = max_score.float()
            num_correct += (predictions == y).sum()
            num_samples += predictions.shape[0]
            iterator.set_postfix(
                accuracy=f'{float(num_correct) / float(num_samples) * 100:.2f}'
            )
            # break
    print(
        classification_report(groundtruths,
                              preds,
                              zero_division=0,
                              target_names=test_ds.class_names))
Example #9
0
    args = parser.parse_args()
    model_dir = dirname(args.model_dir)
    vocab_file = model_dir + '/vocab.txt'
    test_name = basename(args.data_file)[-9:-4]

    vocabulary = load_vocab(vocab_file)

    index_to_char = {}
    for item in vocabulary:
        index_to_char[vocabulary[item]] = item

    _, x_data, y_data = parse_dataset(args.data_file, vocabulary)
    x_padded = tf.constant(pad_data(x_data, vocabulary))
    true_labels = tf.math.argmax(y_data, axis=1)

    #model = keras.models.load_model(model_dir)
    config = load_model_config(model_dir + '/config.txt')
    model = MainModel(**config)
    #model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.BinaryCrossentropy(), metrics
    model.load_weights(model_dir + '/checkpoint.ckpt')

    predictions = model.predict(x_padded)
    category_predictions = tf.math.argmax(predictions, axis=1)

    with open(model_dir + "/" + test_name + "_pred.txt", 'w') as f:
        for i in range(len(x_data)):
            string = indices_to_text(x_data[i], index_to_char)
            true_label = 'TRUE' if true_labels[i] == 0 else 'FALSE'
            predicted_label = 'TRUE' if category_predictions[i] == 0 else 'FALSE'
            f.write(string + '\t' + true_label + '\t' + predicted_label + '\n')
Example #10
0
def run():
    if FLAGS.log_file_name:
        if not os.path.exists('log'):
            os.makedirs('log')
        sys.stdout = open(FLAGS.log_file_name, 'w')
    print_time()
    print('############### Loading Data ###############')
    print('\nload NRC_VAD... ')
    NRC = load_dict_json(FLAGS.NRC_filepath)
    print('NRC_VAD words: {}'.format(len(NRC)))
    print('load NRC_VAD done!\n')

    print('\nload ConceptNet dict... ')
    concept_dict = load_dict_json(FLAGS.conceptnet_dict_filepath)
    print('ConceptNet dict lenth: {}'.format(len(concept_dict)))
    print('load concpetnet dict done!\n')

    print('\nload Dataset... ')
    X, y = load_dataset(FLAGS.dataset_path)
    print('All data length: {}'.format(len(X)))
    print('load dataset done!\n')

    print('############### Loading Embedding ###############')
    print('\nload origin word2vector... ')
    origin_word2vec = KeyedVectors.load_word2vec_format(FLAGS.w2v_ori_file,
                                                        binary=False)
    print('Origin word2vector size: {}'.format(
        (len(origin_word2vec.wv.vocab), origin_word2vec.wv.vector_size)))
    print('load origin word2vector done!\n')

    print('\nload conceptNet word2vector... ')
    conceptnet_word2vec = KeyedVectors.load_word2vec_format(
        FLAGS.w2v_concept_file, binary=False)
    print('ConceptNet word2vector size: {}'.format(
        (len(conceptnet_word2vec.wv.vocab),
         conceptnet_word2vec.wv.vector_size)))
    print('load conceptNet word2vector done!\n')

    # Training Code Block
    emo_list, cause_list = [], []

    for fold in range(FLAGS.folds):
        print('############# fold {} begin ###############'.format(fold))

        X_train, y_train, X_test, y_test = get_k_fold_data(
            FLAGS.folds, fold, X, y)

        print('############### Start Encoding ###############')
        train_emotion_labels = get_onehot_encoding(y_train[:, 0])
        test_emotion_labels = get_onehot_encoding(y_test[:, 0])
        train_cause_labels = get_onehot_encoding(y_train[:, 1])
        test_cause_labels = get_onehot_encoding(y_test[:, 1])

        train_original_text = get_sent_embedding_integrated_conceptnet(
            X_train[:, 0], FLAGS.max_doc_len, concept_dict, FLAGS.lam,
            origin_word2vec, conceptnet_word2vec, NRC, FLAGS.integrate_ek)
        test_original_text = get_sent_embedding_integrated_conceptnet(
            X_test[:, 0], FLAGS.max_doc_len, concept_dict, FLAGS.lam,
            origin_word2vec, conceptnet_word2vec, NRC, FLAGS.integrate_ek)
        train_event_text = get_sent_embedding_integrated_conceptnet(
            X_train[:, 1], FLAGS.max_event_len, concept_dict, FLAGS.lam,
            origin_word2vec, conceptnet_word2vec, NRC, FLAGS.integrate_ek)
        test_event_text = get_sent_embedding_integrated_conceptnet(
            X_test[:, 1], FLAGS.max_event_len, concept_dict, FLAGS.lam,
            origin_word2vec, conceptnet_word2vec, NRC, FLAGS.integrate_ek)

        with tf.name_scope("input_module"):
            original_text_input = layers.Input(
                batch_shape=(None, FLAGS.max_doc_len, FLAGS.embedding_dim))
            event_input = layers.Input(batch_shape=(None, FLAGS.max_event_len,
                                                    FLAGS.embedding_dim))
            emotion_labels_input = layers.Input(
                batch_shape=(None, FLAGS.n_emotion_class))
            # predictions = layers.Input(batch_shape=(None, 1))

        net = MainModel()
        output = net.model_build(FLAGS.ori_doc_module_type,
                                 FLAGS.candi_event_module_type,
                                 original_text_input, event_input)

        model = tf.keras.Model(inputs=[original_text_input, event_input],
                               outputs=output)

        optimizer = tf.keras.optimizers.RMSprop(FLAGS.learning_rate)
        model.compile(optimizer=optimizer,
                      loss={
                          'out_cause': 'binary_crossentropy',
                          'out_emotion': 'categorical_crossentropy'
                      },
                      loss_weights={
                          'out_cause': 0.5,
                          'out_emotion': 0.5
                      },
                      metrics=['accuracy'])
        # model.summary()

        current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
        train_log_dir = "logs/loss" + current_time + 'train'
        # train_summary_writer = tf.summary.create_file_writer(train_log_dir)

        early_stopping = EarlyStopping(monitor='val_loss',
                                       patience=2,
                                       verbose=1,
                                       mode='auto')
        reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                      factor=0.1,
                                      patience=1,
                                      verbose=1,
                                      mode='auto',
                                      epsilon=0.0001,
                                      cooldown=0,
                                      min_lr=0)

        # tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir='fit_logs/', histogram_freq=1)

        model.fit(x=[train_original_text, train_event_text],
                  y=[train_emotion_labels, train_cause_labels],
                  epochs=FLAGS.epoch,
                  shuffle=True,
                  validation_data=([test_original_text, test_event_text],
                                   [test_emotion_labels, test_cause_labels]),
                  batch_size=FLAGS.batch_size,
                  callbacks=[early_stopping, reduce_lr])

        cur_time = time.localtime(time.time())
        # model.save(os.path.join('model/my_model',
        #                         'my_model_{}_{}_{}_{}_{}.h5'.format(cur_time.tm_mon, cur_time.tm_mday, cur_time.tm_hour,
        #                                                             cur_time.tm_min, cur_time.tm_sec)))

        # test
        prediction = model.predict([test_original_text, test_event_text])

        p, r, f1 = cal_prf(prediction, test_emotion_labels, 'emotion')
        emo_list.append([p, r, f1])
        print('emotion_prediction: test p {:.4f} r {:.4f} f1 {:.4f}'.format(
            p, r, f1))

        p, r, f1 = cal_prf(prediction, test_cause_labels, 'cause')
        cause_list.append([p, r, f1])
        print('cause_prediction: test p {:.4f} r {:.4f} f1 {:.4f}'.format(
            p, r, f1))

        print('############# fold {} end ###############'.format(fold))

    emo_list, cause_list = map(lambda x: np.array(x), [emo_list, cause_list])

    print('\nemotion_prediction: test f1 in {} fold: {}'.format(
        FLAGS.folds, emo_list[:, 2:]))
    p, r, f1 = emo_list.mean(axis=0)
    print('average : p {:.4f} r {:.4f} f1 {:.4f}\n'.format(p, r, f1))

    print('\ncause_prediction: test f1 in {} fold: {}'.format(
        FLAGS.folds, cause_list[:, 2:]))
    p, r, f1 = cause_list.mean(axis=0)
    print('average : p {:.4f} r {:.4f} f1 {:.4f}\n'.format(p, r, f1))
import torch
from model import MainModel
from dataloader import testDataset
from tqdm import tqdm
import numpy as np
from sklearn.metrics import f1_score

dataset = testDataset()

model = MainModel(10310, 15439).cuda()
model.load_state_dict(torch.load("model_lstm.pth"))

total = 0
correct = 0

outs = []
preds = []

with torch.no_grad():
    for idx, data in tqdm(enumerate(dataset)):

        outputs = model(data["source"].unsqueeze(0).cuda(),
                        data["target"].unsqueeze(0).cuda(),
                        data["alignment"].unsqueeze(0).cuda()).view(-1).cpu()
        # print(outputs.numpy() > 0.5)
        ans = np.sum((outputs.numpy() > 0.5) == data["predictions"].numpy())
        outs.extend((outputs.numpy() > 0.5))
        preds.extend(data["predictions"].numpy())
        # print(ans, outputs.size()[0])
        total += outputs.size()[0]
        correct += ans
Example #12
0
    tf.random.set_seed(333)

    x_train = tf.constant(pad_data(x_train, vocabulary))
    x_val = tf.constant(pad_data(x_val, vocabulary))
    y_train = tf.constant(y_train)
    y_val = tf.constant(y_val)

    config = {
        'vocab_size': len(vocabulary),
        'embed_dim': args.embed_dim,
        'dropout': args.dropout,
        'rnn_type': args.rnn_type,
        'bidi': args.bidi
    }

    model = MainModel(**config)
    model.compile(optimizer=keras.optimizers.Adam(),
                  loss=keras.losses.BinaryCrossentropy(),
                  metrics=[keras.metrics.BinaryAccuracy()])

    checkpoint_path = model_path + '/checkpoint.ckpt'
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path,
        save_weights_only=True,
        monitor='val_acc',
        mode='max')

    log_dir = model_path + "/logs/fit/" + datetime.datetime.now().strftime(
        "%Y%m%d-%H%M%S")
    tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir,
                                                          histogram_freq=1)
Example #13
0
head = 2
candi_size = 583

train_data_num = ceil(cascade_num / 10 * 9)
syn_small_marker = torch.load("data/" + dataset +
                              "/marker.pkl").cuda(0)[train_data_num:, :]
syn_small_time = torch.load("data/" + dataset +
                            "/time.pkl").cuda(0)[train_data_num:, :]
syn_small_mask = torch.load("data/" + dataset +
                            "/mask.pkl").cuda(0)[train_data_num:, :]
if small:
    syn_small_adj_mat = torch.load("data/" + dataset + "/adj_mat.pkl")
syn_small_adj_list = torch.load("data/" + dataset + "/adj_list.pkl")

syn_small_main = MainModel(marker_num, 5, d_model, d_model, 2 * d_model,
                           d_model, d_model, d_model, head, candi_size,
                           max_time, 0.3, 0, 10, 0.99, 0.001, 0.1)
syn_small_main.load_state_dict(torch.load("models/main-" + dataset +
                                          "-512.pt"))

syn_small_main.cpu().eval()
if small:
    syn_small_evaluator = Small_Network_Evaluator(marker_num, neg_num,
                                                  test_size, syn_small_adj_mat)
else:
    syn_small_evaluator = Large_Network_Evaluator(marker_num, neg_num,
                                                  test_size,
                                                  syn_small_adj_list)

main = []
main_mean = []
Example #14
0
 def __init__(self, scriptpath, sys_argv):
     super(App, self).__init__(sys_argv)
     self.model = MainModel()
     self.main_view = MainView(self.model, scriptpath)
     self.main_view.show()  #Maximized()
     self.model.gridChanged.emit()
Example #15
0
        print(p, r, f1)
        return p, r, f1

    def sparse_seq_predict(self, model, test_markers, test_times, test_masks,
                           neigh_list, type_of_eval):
        # type_of_eval: 2 / 4 / 6 / 8 : "0" ;    1 / 3 / 5 / 7 / 9 : "1"
        sampled_indices = torch.multinomial(
            torch.ones(1,
                       test_markers.size()[0]),
            self.test_size)[0].unsqueeze(-1)
        sampled_marker = test_markers[sampled_indices].squeeze(1)
        sampled_time = test_times[sampled_indices].squeeze(1)
        sampled_mask = test_masks[sampled_indices].squeeze(1)

        res_accu, res_mle = model.generator.test_predict(
            sampled_marker, sampled_time, sampled_mask, neigh_list,
            model.marker_embeddings, type_of_eval)
        return res_accu, res_mle


if __name__ == "__main__":
    from model import MainModel
    m = MainModel(10, 5, 2, 10, 20, 10, 10, 10, 4, 5, 0.3, 0, 5, 0.99, 0.001,
                  0.1)
    a_list = [[], [1, 2, 3], [1, 2, 3], [4, 5, 6],
              [0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3],
              [1, 2, 3, 4, 5, 6, 7, 8], [1, 2, 3], [1, 2, 3], [1, 2, 3]]
    e = Large_Network_Evaluator(10, 4, 3, 6, a_list)
    p, r, f1 = e.sparse_network_reconstruct(m)
    print(p, r, f1)
Example #16
0
parser.add_argument('-neg_size', default=5, type=int)
parser.add_argument('-pos_size', default=[25, 30, 35], type=list)
parser.add_argument('-test_sample', default=100, type=int)

args = parser.parse_args()
log_dir = args.model + '-' + args.data + '-' + str(args.batch)
model_dir = args.model + '-' + args.data + '-' + str(args.batch)
logging.basicConfig(filename="log/" + log_dir + ".log", level=logging.DEBUG)

torch.manual_seed(args.seed)

data_dir = "data/" + args.data + "/"

if args.model == "main":
    network = MainModel(args.d_marker, args.neighbor, args.d_model, args.d_inner, args.d_model,
                        args.d_model, args.d_model, args.d_head, args.max_time, args.embed_ratio,
                        args.cuda, args.sample, args.discount, args.regular, dropout=args.dropout).cuda(args.cuda)
if args.model == "pr":
    network = PR_Model(args.d_marker, args.neighbor, args.d_model, args.d_inner, args.d_model,
                       args.d_model, args.d_model, args.d_head, args.max_time, args.embed_ratio,
                       args.cuda, args.sample, args.discount, args.regular, dropout=args.dropout).cuda(args.cuda)

if args.model == "rnn":
    network = RNN_Model(args.d_marker, args.neighbor, args.d_model, args.max_time, args.embed_ratio, args.cuda,
                        args.sample, args.discount, args.regular, dropout=args.dropout).cuda(args.cuda)

network.generator.sample_linear.cpu()

marker_data = torch.load(data_dir + "marker.pkl").cuda(args.cuda)
time_data = torch.load(data_dir + "time.pkl").cuda(args.cuda)
mask_data = torch.load(data_dir + "mask.pkl").cuda(args.cuda)
Example #17
0
 def __init__(self):
     super().__init__(sys.argv)
     self.main_model = MainModel()
     self.main_controller = MainController(self.main_model)
     self.main_view = MainView(self.main_model, self.main_controller)
     self.main_view.show()