예제 #1
0
def train(args):
    assert args.num_classes
    common.make_dir(args.checkout_dir)
    nnet = DNN((args.left_context + args.right_context + 1) * args.feat_dim, hidden_layer, \
            hidden_size, args.num_classes, dropout=dropout)
    print(nnet)
    nnet.cuda()

    criterion = nn.CrossEntropyLoss()
    optimizer = th.optim.Adam(nnet.parameters(), lr=args.learning_rate)

    train_dataset = THCHS30(root=args.data_dir, data_type='train', left_context=left_context,
            right_context=right_context, model_type='dnn')
    train_loader  = data.DataLoader(dataset=train_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    test_dataset = THCHS30(root=args.data_dir, data_type='test', left_context=left_context,
            right_context=right_context, model_type='dnn')
    test_loader  = data.DataLoader(dataset=test_dataset, batch_size=args.min_batch,
                                    shuffle=True, num_workers=6)

    cross_validate(-1, nnet, test_loader, test_dataset.num_frames)    
    for epoch in range(args.num_epochs):
        common.train_one_epoch(nnet, criterion, optimizer, train_loader)
        cross_validate(epoch, nnet, test_loader, test_dataset.num_frames)    
        th.save(nnet, common.join_path(args.checkout_dir, 'dnn.{}.pkl'.format(epoch + 1)))
예제 #2
0
def Generator(source, target, pixelda=False, is_resize = True, 
              dataset = 'NW', sensor_num = 0):  
    if is_resize:
        return source2target_square.Feature(dataset = dataset, 
                           sensor_num = sensor_num)
    else:
        return DNN.Feature()
예제 #3
0
    def __init__(self,
                 p,
                 lr,
                 game="CartPole-v1",
                 mean_bound=5,
                 reward_bound=495.0,
                 save_model=10):
        """
        Constructor of the agent class.
            - game="CartPole-v1" : Name of the game environment
            - mean_bound=5 : Number of last acquired rewards considered for mean reward
            - reward_bound=495.0 : Reward acquired for completing an episode properly
            - save_model=10 : Interval for saving the model

            - p : Percentile for selecting training data
            - lr : Learning rate for the CE model
        """

        # Environment variables
        self.game = game
        self.env = gym.make(self.game)
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n

        # Agent variables
        self.p = p * 100
        self.mean_bound = mean_bound
        self.reward_bound = reward_bound

        # DQN variables
        self.lr = lr
        self.model = DNN(self.num_states, self.num_actions, self.lr)
        self.save_model = save_model

        # File paths
        directory = os.path.dirname(__file__)
        self.path_model = os.path.join(directory, "../models/dnn.h5")
        self.path_plot = os.path.join(directory, "../plots/dnn.png")

        # Load model, if it already exists
        try:
            self.model.load(self.path_model)
        except:
            print(f"Model does not exist! Create new model...")
예제 #4
0
def inference(args, cnn_features):
    tf.reset_default_graph()
    with tf.Session() as sess:
        net = DNN(sess)
        net.build_model()
        net.inference(cnn_features=cnn_features,
                      label_file=args.label_file,
                      gen_from=args.gen_from,
                      out_path=args.output_folder,
                      bsize=args.bsize)
예제 #5
0
def train_dnn():
    params = {
        "offline_model_dir": "../weights",

        # deep part score fn
        "fc_type": "fc",
        "fc_dim": 32,
        "fc_dropout": 0.,
    }
    params.update(params_common)

    X_train, X_valid = load_data("train"), load_data("vali")
    X_test = load_data("test")

    model = DNN("ranking", params, logger)
    model.fit(X_train, validation_data=X_valid)
    model.save_session()
    model.predict(X_test, 'pred.txt')
예제 #6
0
def predict():
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    df = pd.read_csv(config.TEST_PATH, header=None)
    dataset = PicDataset(df.loc[:, 1:])
    preds = np.zeros((len(dataset), 256))

    for i in range(5):
        temp = np.zeros((len(dataset), 256))
        model = DNN()
        model.load_state_dict(torch.load(f'./models/model_{i}.bin'))
        model.to(device)
        model.eval()
        for j in range(len(dataset)):
            x, _ = dataset[j]
            x = x.to(device)
            y = model(x)
            temp[j, :] = y.detach().cpu().numpy()
        preds += temp

    preds /= 5
    df = pd.DataFrame(np.concatenate([np.arange(1, 921).reshape(-1, 1), preds], axis=1), columns=np.arange(257))
    df[0] = df[0].astype('int')
    df.to_csv('./predictions.csv', index=False, header=False)
예제 #7
0
def run():
    df = pd.read_csv(config.TRAIN_PATH)
    kfold = KFold(n_splits=5, random_state=config.SEED, shuffle=True)
    fold_losses = []

    for i, (train_idx, val_idx) in enumerate(kfold.split(df)):
        print("-------------------------------------------------------")
        print(f"Training fold {i}")
        print("-------------------------------------------------------")
        train = df.iloc[train_idx]
        validation = df.iloc[val_idx]
        train_dataset = PicDataset(train)
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=config.BATCH_SIZE
        )

        val_dataset = PicDataset(validation)
        val_data_loader = torch.utils.data.DataLoader(
            val_dataset,
            batch_size=config.BATCH_SIZE
        )

        device = 'cuda:0' if torch.cuda.is_available() else "cpu"
        model = DNN()
        model.to(device)
        optimizer = torch.optim.Adam(model.parameters(), lr=config.LR)
        loss = 0

        for _ in range(config.EPOCHS):
            engine.train_fn(train_data_loader, model, optimizer, device)
            loss = engine.eval_fn(val_data_loader, model, device)
        print(f"Loss on fold {i} is {loss}")
        fold_losses.append(loss)
        torch.save(model.state_dict(), f'./models/model_{i}.bin')

    print(f"Average loss on cross validation is {sum(fold_losses) / 5}")
X_train, y_train, k = fetch_data()
print('Done')
# split the data into train and test randomly
X_train, X_test, y_train, y_test = train_test_split(X_train,
                                                    y_train,
                                                    test_size=0.2)

scaler = MinMaxScaler(feature_range=(1, 10))
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = scaler.fit_transform(y_train.reshape([y_train.shape[0], 1]))
y_test = scaler.transform(y_test.reshape([y_test.shape[0], 1]))

# scaling input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# making target array a column vector
y_train = y_train.reshape((y_train.shape[0], 1))
y_test = y_test.reshape((y_test.shape[0], 1))

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
print('Starting')
print('Training the model')
# call our model
regr = DNN()
prediction = regr.fit(X_train, y_train, k, X_test)
error = np.mean(abs(1 - (y_test / (prediction + 1e-8))))
print('Test error is: ' + str(error * 100.00) + '%')
예제 #9
0
        os.makedirs(args.output)
    except OSError:
        pass
    try:
        os.makedirs(args.log)
    except OSError:
        pass

    # 加载训练数据和测试数据
    train_loader, test_loader = get_data(args)

    # 如果有cuda就用cuda
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_size = 28 * 28
    output_size = args.num_classes
    model = DNN(input_size=input_size, output_size=output_size).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 weight_decay=args.weight_decay)

    # 每训练完一个mini-batch就计算一次loss,acc
    model.train()
    for epoch in range(args.epochs):
        correct = 0
        total = 0
        for idx, (x, y) in enumerate(train_loader):
            x, y = x.to(device), y.to(device)
            y_pred = model(x)
            _, y_pred_t = torch.max(y_pred.data, 1)
            total += y.size(0)
예제 #10
0
                        required=True,
                        help='Path to load the trained DML model')
    parser.add_argument('-pl',
                        '--positive_labels',
                        default='ESLMV',
                        help='Labels in CC_WEB_VIDEO datasets that '
                        'considered posetive. default=\'ESLMV\'')
    args = vars(parser.parse_args())

    print 'loading data...'
    cc_dataset = pk.load(open('datasets/cc_web_video.pickle', 'rb'))
    cc_features = np.load(args['evaluation_set'])

    model = DNN(cc_features.shape[1],
                None,
                args['model_path'],
                load_model=True,
                trainable=False)
    cc_embeddings = model.embeddings(cc_features)
    print 'Evaluation set file: ', args['evaluation_set']
    print 'Path to DML model: ', args['model_path']
    print 'Positive labels: ', args['positive_labels']

    print '\nEvaluation Results'
    print '=================='
    similarities = calculate_similarities(cc_dataset['queries'], cc_embeddings)
    mAP, pr_curve = evaluate(cc_dataset['ground_truth'],
                             similarities,
                             positive_labels=args['positive_labels'],
                             all_videos=False)
    print 'CC_WEB_VIDEO mAP: ', mAP
예제 #11
0
#! -*- coding: utf-8 -*-
import os
import torch
import time

import torch.nn.functional as F

from args import args
from model import DNN
from utils import cos_sim
from processing import BuildExamples

# 加载模型
MODEL_PATH = os.path.join(args.get('data_path'), 'model/embedded_adam_0.001_19.model')
print(MODEL_PATH)
model = DNN(vocab_size=5265, embedding_size=200, hidden_size=512)
model.load_state_dict(torch.load(MODEL_PATH, map_location=torch.device('cpu')))

# 加载字典
corpus = BuildExamples()
corpus.load_vocabulary(path=os.path.join(args.get('data_path'), 'vocabulary.txt'))

MAX_LEN = args.get('max_len', 20)


def get_hidden_state(sentence):
    line = sentence.strip().split()
    line = ['bos'] + line + ['eos']
    sentence2id = torch.LongTensor([corpus.words2id.get(item, 0) for item in line]).view(1, -1)

    with torch.no_grad():
예제 #12
0
def optimize(X_train,
             y_train,
             X_val,
             y_val,
             n_hidden1=300,
             n_hidden2=100,
             dropout_rate=0.5,
             epochs=10,
             batch_size=64,
             learning_rate=1e-3):
    # Training
    # ==================================================

    # Generate batches
    def fetch_batch(batch_size, iteration, epoch):
        np.random.seed(epoch)
        shuffled_indices = np.random.permutation(X_train.shape[0])
        indices = shuffled_indices[batch_size * iteration:batch_size *
                                   (iteration + 1)]
        return X_train[indices, :], y_train[indices]

    dnn = DNN(height=HEIGHT,
              width=WIDTH,
              n_outputs=10,
              n_hidden1=n_hidden1,
              n_hidden2=n_hidden2,
              dropout_rate=dropout_rate,
              seed=RANDOM_STATE)

    n_train = X_train.shape[0]

    # Define Training procedure
    train_op = tf.train.AdamOptimizer(learning_rate).minimize(dnn.loss)
    extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)

    # Output directory for models and summaries
    out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", get_time()))
    info("Writing to {}\n".format(out_dir))

    # Summaries for loss
    loss_summary = tf.summary.scalar("loss", dnn.loss)

    # Train Summaries
    train_summary_op = tf.summary.merge([loss_summary])
    train_summary_dir = os.path.join(out_dir, "summaries", "train")
    train_summary_writer = tf.summary.FileWriter(train_summary_dir,
                                                 tf.get_default_graph())

    # Val summaries
    val_summary_op = tf.summary.merge([loss_summary])
    val_summary_dir = os.path.join(out_dir, "summaries", "val")
    val_summary_writer = tf.summary.FileWriter(val_summary_dir,
                                               tf.get_default_graph())

    # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it
    checkpoint_dir = os.path.abspath(os.path.join(out_dir, "checkpoints"))
    checkpoint_prefix = os.path.join(checkpoint_dir, "model")
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    saver = tf.train.Saver(max_to_keep=10)

    session_conf = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=session_conf) as sess:
        # Initialize all variables
        sess.run(tf.global_variables_initializer())

        for epoch in range(epochs):
            # Training loop. For each batch...
            for iteration in range(n_train // batch_size):
                X_batch, y_batch = fetch_batch(batch_size, iteration, epoch)
                sess.run([train_op, extra_update_ops],
                         feed_dict={
                             dnn.training: True,
                             dnn.X: X_batch,
                             dnn.y: y_batch
                         })

            # Evaluates model on a training set
            summary_train, acc_train = sess.run(
                [train_summary_op, dnn.accuracy],
                feed_dict={
                    dnn.X: X_batch,
                    dnn.y: y_batch
                })
            train_summary_writer.add_summary(summary_train, global_step=epoch)
            # Evaluates model on a validation set
            summary_val, acc_val = sess.run([val_summary_op, dnn.accuracy],
                                            feed_dict={
                                                dnn.X: X_val,
                                                dnn.y: y_val
                                            })
            val_summary_writer.add_summary(summary_val, global_step=epoch)
            info(
                "Epoch: {0:3d}, Train accuracy: {1:.4f}, Val accuracy: {2:.4f}"
                .format(epoch, acc_train, acc_val))

            # Save model
            path = saver.save(sess, checkpoint_prefix, global_step=epoch)
            debug("Saved model checkpoint to {}".format(path))

        cmd_text = 'python evaluate.py --checkpoint %s ...' % checkpoint_prefix
        info("Training complete. For evaluation: {}".format(cmd_text))
예제 #13
0
class Agent:
    """
    Class representing a learning agent acting in an environment.
    """
    def __init__(self,
                 p,
                 lr,
                 game="CartPole-v1",
                 mean_bound=5,
                 reward_bound=495.0,
                 save_model=10):
        """
        Constructor of the agent class.
            - game="CartPole-v1" : Name of the game environment
            - mean_bound=5 : Number of last acquired rewards considered for mean reward
            - reward_bound=495.0 : Reward acquired for completing an episode properly
            - save_model=10 : Interval for saving the model

            - p : Percentile for selecting training data
            - lr : Learning rate for the CE model
        """

        # Environment variables
        self.game = game
        self.env = gym.make(self.game)
        self.num_states = self.env.observation_space.shape[0]
        self.num_actions = self.env.action_space.n

        # Agent variables
        self.p = p * 100
        self.mean_bound = mean_bound
        self.reward_bound = reward_bound

        # DQN variables
        self.lr = lr
        self.model = DNN(self.num_states, self.num_actions, self.lr)
        self.save_model = save_model

        # File paths
        directory = os.path.dirname(__file__)
        self.path_model = os.path.join(directory, "../models/dnn.h5")
        self.path_plot = os.path.join(directory, "../plots/dnn.png")

        # Load model, if it already exists
        try:
            self.model.load(self.path_model)
        except:
            print(f"Model does not exist! Create new model...")

    def get_action(self, state):
        """
        Returns an action for a given state, based on the current policy.
            - state : Current state of the agent
        """

        state = state.reshape(1, -1)
        policy = self.model.predict(state)[0]
        action = np.random.choice(self.num_actions, p=policy)

        return action

    def sample(self, num_episodes):
        """
        Returns samples of state/action tuples for a given number of episodes.
            - num_episodes : Number of episodes to sample
        """

        episodes = [[] for _ in range(num_episodes)]
        rewards = [0.0 for _ in range(num_episodes)]

        for episode in range(num_episodes):
            state = self.env.reset()
            total_reward = 0.0

            while True:
                action = self.get_action(state)
                next_state, reward, done, _ = self.env.step(action)
                episodes[episode].append((state, action))
                state = next_state

                # Penalize agent if pole could not be balanced until end of episode.
                if done and reward < 499.0:
                    reward = -100.0

                total_reward += reward

                if done:
                    total_reward += 100.0
                    rewards[episode] = total_reward
                    break

        return rewards, episodes

    def get_training_data(self, episodes, rewards):
        """
        Returns training data for the CE model.
            - episodes : List of state/action tuples
            - rewards : List of gained rewards
        """

        x_train, y_train = [], []
        reward_bound = np.percentile(rewards, self.p)

        for episode, reward in zip(episodes, rewards):
            if reward >= reward_bound:
                states = [step[0] for step in episode]
                actions = [step[1] for step in episode]
                x_train.extend(states)
                y_train.extend(actions)

        x_train = np.asarray(x_train)
        y_train = to_categorical(y_train, num_classes=self.num_actions)

        return x_train, y_train, reward_bound

    def train(self, num_epochs, num_episodes, report_interval):
        """
        Trains the CE model for a given number of epochs and episodes. Outputting report information is controlled by a given time interval.
            - num_epochs : Number of epochs to train
            - num_episodes : Number of episodes to train
            - report_interval : Interval for outputting report information of training
        """

        total_rewards = []

        for epoch in range(1, num_epochs + 1):
            if epoch % self.save_model == 0:
                self.model.save(self.path_model)

            rewards, episodes = self.sample(num_episodes)
            x_train, y_train, reward_bound = self.get_training_data(
                episodes, rewards)

            mean_reward = np.mean(rewards)
            total_rewards.extend(rewards)
            mean_total_reward = np.mean(total_rewards[-self.mean_bound:])

            if epoch % report_interval == 0:
                print(f"Epoch: {epoch + 1}/{num_epochs}"
                      f"\tMean Reward: {mean_reward : .2f}"
                      f"\tReward Bound: {reward_bound : .2f}")

                self.plot_rewards(total_rewards)

            if mean_total_reward > self.reward_bound:
                self.model.save(self.path_model)

            self.model.fit(x_train, y_train)

        self.model.save(self.path_model)

    def play(self, num_episodes):
        for episode in range(1, num_episodes + 1):
            state = self.env.reset()
            total_reward = 0.0

            while True:
                self.env.render()
                action = self.get_action(state)
                state, reward, done, _ = self.env.step(action)
                total_reward += reward

                if done:
                    print(f"Episode: {episode + 1}/{num_episodes}"
                          f"\tReward: {total_reward : .2f}")

                    break

    def plot_rewards(self, total_rewards):
        x = range(len(total_rewards))
        y = total_rewards

        slope, intercept, _, _, _ = linregress(x, y)

        plt.plot(x, y, linewidth=0.8)
        plt.plot(x, slope * x + intercept, color="red", linestyle="-.")
        plt.xlabel("Episode")
        plt.ylabel("Reward")
        plt.title("CE-Learning")
        plt.savefig(self.path_plot)
예제 #14
0
Reg = None  #Regularization: L1,L2,None
alpha = 0.1

dropout = 0.5  #(0,1.0]  1.0 mean without dropout
iteration = 2000
step = 50
fig, ax = plt.subplots(num_layers + 1, 2)
#fig.suptitle('Weights/Bias Distribution')
loss_all = np.zeros([int(iteration / step), 2])
fig.tight_layout()
fig2 = plt.figure()
fig2.suptitle('Learning curve')
learning_curve = fig2.add_subplot(111)
##############################

mymodel = DNN.model(num_layers, num_neurons, input_size, output_size, Reg,
                    alpha)

with tf.Session() as sess:
    saver = tf.train.Saver(max_to_keep=10)
    writer = tf.summary.FileWriter(save_dir, sess.graph)
    sess.run(tf.global_variables_initializer())

    for i in range(iteration):
        #batch_xs, batch_ys = mnist.train.next_batch(200)
        now = i % 5
        batch_xs = mnist.train.images[i * 200:(i + 1) * 200, :]
        batch_ys = mnist.train.labels[i * 200:(i + 1) * 200, :]
        _, accu, loss, merge = sess.run([
            mymodel.optimize, mymodel.accuracy, mymodel.cross_entropy,
            mymodel.sum_train
        ],
def main():
    # do thing before training
    args = get_args()
    save_dir = os.path.join(args.sd, args.tm, args.ft)
    print(args)
    input("*****Please check the params  also --> {} <--, Enter to continue*****".format(save_dir))
    os.system('mkdir -p {}'.format(save_dir))
    mode = args.mode
    batch_size = args.bs
    feature_type = args.ft
    num_epochs = args.ne

    # loading train data
    if mode == "train":
        train_data, train_label = load_data("train", train_protocol, mode=mode, feature_type=feature_type)

        # for i in range(len(train_data)):
        #     mean = np.mean(train_data[i], axis=0)
        #     std = np.std(train_data[i], axis=0)
        #     train_data[i] = (train_data[i] - mean) / std

        train_dataset = ASVDataSet(train_data, train_label, mode=mode)
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True)

        dev_data, dev_label = load_data("dev", dev_protocol, mode=mode, feature_type=feature_type)

        # for i in range(len(dev_data)):
        #     mean = np.mean(dev_data[i], axis=0)
        #     std = np.std(dev_data[i], axis=0)
        #     dev_data[i] = (dev_data[i] - mean) / std

        dev_dataset = ASVDataSet(dev_data, dev_label, mode=mode)
        dev_dataloader = DataLoader(dev_dataset, batch_size=batch_size, num_workers=2, shuffle=False)
    elif mode == "final":
        train_data, train_label = load_data(["train", "dev"], final_protocol,
                                            mode=mode, feature_type=feature_type)
        train_data = np.array(train_data)
        mean = np.mean(train_data, axis=0)
        std = np.std(train_data, axis=0)
        train_data = (train_data - mean) / std
        train_dataset = ASVDataSet(train_data, train_label, mode="train")
        train_dataloader = DataLoader(train_dataset, batch_size=batch_size, num_workers=2, shuffle=True)

    if "lcnn" in args.tm:
        model = LCNN(input_dim=77, num_classes=2)
    elif "dnn" in args.tm:
        model = DNN(990, 512, 2)  # mfcc imfcc cqt 429 cqcc 990
    elif "vgg" in args.tm:
        model = VGG(77, "VGG11")
    elif "cnn" in args.tm:
        model = CNN(77, 2, 0)

    if use_cuda():
        model = model.cuda()
    print(model)
    cross_entropy = nn.CrossEntropyLoss()
    optimizer = optim.ASGD(params=model.parameters(), lr=args.lr, weight_decay=1e-4)
    scheduler = ReduceLROnPlateau(optimizer, patience=0, verbose=True, factor=0.1, min_lr=1e-7)

    best_dev_accuracy = 0
    best_train_accuracy = 0
    for epoch in range(num_epochs):
        correct = 0
        total = 0
        train_loss = 0
        model.train()

        for tmp in tqdm(train_dataloader, desc="Epoch {}".format(epoch + 1)):
            data = Variable(tmp['data'])
            label = Variable(tmp['label']).view(-1)
            if use_cuda():
                data, label = data.cuda(), label.cuda()

            optimizer.zero_grad()
            predict = model(data)

            _, predict_label = torch.max(predict.data, 1)
            correct += (predict_label.cpu() == label.cpu().data).sum()
            total += label.size(0)

            loss = cross_entropy(predict, label.long())
            loss.backward()
            optimizer.step()
            train_loss += loss.data[0]

        train_accuracy = correct / total
        if mode == "final":
            if train_accuracy > best_train_accuracy:
                best_train_accuracy = train_accuracy
                save_checkpoint(
                    {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy},
                    save_path=os.path.join(save_dir, "best_eval.pkl")
                )
            save_checkpoint(
                {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': train_accuracy},
                save_path=os.path.join(save_dir, "final_eval.pkl")
            )
            print("Epoch [%d/%d], Loss: %.4fe-4,  Train Acc %.2f%%" % (
                epoch+1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100))
            print(print_str.format("Best Acc: {}".format(best_train_accuracy)))

            scheduler.step(train_loss/total)

            if use_cuda():
                model.cuda()

        if mode == "train":
            dev_accuracy, dev_loss = get_test_accuracy(dev_dataloader, model, cross_entropy)

            save_checkpoint(
                {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy},
                save_path=os.path.join(save_dir, 'final_dev.pkl')
            )

            if dev_accuracy > best_dev_accuracy:
                best_dev_accuracy = dev_accuracy
                save_checkpoint(
                    {'state_dict': model.cpu(), 'epoch': epoch + 1, 'acc': dev_accuracy},
                    save_path=os.path.join(save_dir, 'best_dev.pkl')
                )

            if use_cuda():
                model.cuda()

            print("Epoch [%d/%d], Train Loss: %.4fe-4, Train Acc %.2f%% Dev Loss: %.4fe-4 Dev Acc %.2f%% " % (
                epoch + 1, num_epochs, 1e4 * train_loss / total, train_accuracy * 100,  dev_loss, dev_accuracy * 100
            ))
            print(print_str.format("Best Acc: {}".format(best_dev_accuracy)))
            scheduler.step(dev_loss)
예제 #16
0
    # 评估
    fm_pre = model(X_test)
    fm_pre = [1 if x > 0.5 else 0 for x in fm_pre]

    #**************** Statement 2 of Training *****************#
    # 获取FM训练得到的隐向量
    v = model.variables[2]  #[None, onehot_dim, k]

    X_train = tf.cast(tf.expand_dims(X_train, -1),
                      tf.float32)  #[None, onehot_dim, 1]
    X_train = tf.reshape(tf.multiply(X_train, v),
                         shape=(-1, v.shape[0] *
                                v.shape[1]))  #[None, onehot_dim*k]

    hidden_units = [256, 128, 64]
    model = DNN(hidden_units, 1, 'relu')
    optimizer = optimizers.SGD(0.0001)

    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
    train_dataset = train_dataset.batch(32).prefetch(
        tf.data.experimental.AUTOTUNE)
    model.compile(optimizer=optimizer,
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    model.fit(train_dataset, epochs=50)

    # 评估
    X_test = tf.cast(tf.expand_dims(X_test, -1), tf.float32)
    X_test = tf.reshape(tf.multiply(X_test, v),
                        shape=(-1, v.shape[0] * v.shape[1]))
    fnn_pre = model(X_test)
예제 #17
0
    for st in train_x1:
        for w in st:
            tmp.add(w)
    word_index = {w: i for i, w in enumerate(tmp)}
    #import pickle
    #with open("bow.pkl", "wb") as f:
    #    pickle.dump(word_index, f)

    x = torch.zeros(len(train_x1), len(word_index))
    for i in range(len(train_x1)):
        for w in train_x1[i]:
            x[i][word_index[w]] += 1
    print(x.size())

    print("\nConstructing model...", flush=True)
    model = DNN(x.size(1)).to(device)
    total_param = sum(p.numel() for p in model.parameters())
    trainable_param = sum(p.numel() for p in model.parameters()
                          if p.requires_grad)
    print("{} parameters with {} trainable".format(total_param,
                                                   trainable_param),
          flush=True)

    print("\nStart training...", flush=True)
    train_dataset1 = TwitterDataset(x, train_y1)
    train_loader1 = torch.utils.data.DataLoader(dataset=train_dataset1,
                                                batch_size=BATCH,
                                                shuffle=True,
                                                num_workers=4)
    train_model(train_loader1, model, device, LR)
예제 #18
0
def train_plot(training_data):
    #plot the training data
    plt.plot(training_data['loss'], linewidth=2, label='Train')
    plt.plot(training_data['val_loss'], linewidth=2, label='Valid')
    plt.legend(loc='upper right')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('Epoch')
    plt.show()


if __name__ == '__main__':

    #import data
    df = pd.read_csv('Book1.csv', index_col = 0)
    x_train, x_valid, y_train, y_valid, X, y = preprocessing(df)
    input_dim = x_train.shape[1]

    #create an instance of the model class
    model = DNN(input_dim)
    model.summary()

    #set save path and train
    model.set_save_path('dnn_v1.h5')
    output = model.fit(x_train, y_train, x_valid, y_valid)
    train_plot(output)

    #load model and predict
    model.load_model('dnn_v1.h5')
    result = model.predict(X)
    # create folder for generated data
    gen_data_path = os.path.join(out_path, gen_data_fdr)
    if not os.path.exists(gen_data_path):
        os.makedirs(gen_data_path)
    if not os.path.exists(scaler_dir):
        os.makedirs(scaler_dir)
    print('here')
    # create folder for model checkpoints
    checkpoint_path = os.path.join(out_path, checkpoint_fdr)
    if not os.path.exists(checkpoint_path):
        os.makedirs(checkpoint_path)

#Your statements here

    model = DNN(input_size, hidden_size, out_size)
    model = torch.nn.DataParallel(model.to(device), device_ids=use_devices)
    #print(model)
    #save_dir = "FPGA"
    checkpoint = torch.load(
        './segan_data_out/20200422_0713/checkpoints/state-20.pkl')
    #from collections import OrderedDict
    state_dict = checkpoint['DNN']
    #for k, v in state_dict.items():
    #    name = k[7:] # remove `module.`
    #    v = v.cpu().numpy()
    #    np.savetxt(os.path.join(save_dir,name), v, newline="\n")
    #    print(name, v.shape)
    scaler_path_input = os.path.join(scaler_dir, "scaler_input.p")
    scaler_input = pickle.load(open(scaler_path_input, 'rb'))
    scaler_path_label = os.path.join(scaler_dir, "scaler_label.p")
예제 #20
0
    print 'loading data...'
    train_set = np.load(args['train_set'])
    triplets = np.load(args['triplets'])

    if args.get('evaluation_set'):
        args['injection'] = np.min([args['injection'], 10000])
        print 'Evaluation set file: ', args['evaluation_set']
        print 'Evaluation triplet file: ', args['evaluation_triplets']
        print 'Injected triplet: ', args['injection']
        print 'loading data...'
        evaluation_set = np.load(args['evaluation_set'])
        eval_triplets = np.load(args['evaluation_triplets']) + len(train_set)
        np.random.shuffle(eval_triplets)
        train_set = np.concatenate([train_set, evaluation_set], axis=0)
        triplets = np.concatenate([triplets, eval_triplets[:args['injection']]], axis=0)

    try:
        layers = [int(l) for l in args['layers'].split(',') if l]
    except Exception as e:
        raise Exception('--layers argument is in wrong format. Specify the number '
                        'of neurons in each layer separated by a comma \',\'')

    model = DNN(train_set.shape[1],
                args['model_path'],
                hidden_layer_sizes=layers,
                learning_rate=args['learning_rate'],
                weight_decay=args['weight_decay'],
                gamma=args['gamma'])

    train_dml_network(model, train_set, triplets, args['epochs'], args['batch_sz'])
예제 #21
0
def DomainClassifier(source, target, is_resize = True, dataset = 'NW'):
    if is_resize:
        return source2target_square.DomainPredictor(dataset = dataset)
    else:
        return DNN.DomainPredictor()
예제 #22
0
def train(args, config, io):
    train_loader, validation_loader = get_loader(args, config)
    device = torch.device("cuda" if args.cuda else "cpu")
    # print(len(train_loader), len(validation_loader))

    #Try to load models
    model = DNN(args).to(device)
    """if device == torch.device("cuda"):
        model = nn.DataParallel(model)"""
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))

    # for para in list(model.parameters())[:-5]:
    #     para.requires_grad=False
    # print(model)

    if args.use_sgd:
        # print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        # print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)
        """opt = optim.Adam([
        {'params': list(model.parameters())[:-1], 'lr':args.lr/50, 'weight_decay': 1e-4},
        {'params': list(model.parameters())[-1], 'lr':args.lr, 'weight_decay': 1e-4}
        ])
        """

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()

    best_test_loss = 9999999.
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        train_dis = 0.0
        count = 0.0
        model.train()
        for data, label in train_loader:
            data, label = data.to(device), label.to(device)
            data = drop(jitter(data, device), device)
            # data = jitter(data, device, delta=0.05)
            batch_size = data.shape[0]
            logits = model(data)
            loss = criterion(logits, label)
            opt.zero_grad()
            loss.backward()
            opt.step()
            dis = distance(logits, label)
            count += batch_size
            train_loss += loss.item() * batch_size
            train_dis += dis.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f, distance: %.6f' % (
            epoch, train_loss * 1.0 / count, train_dis * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        test_dis = 0.0
        count = 0.0
        model.eval()
        with torch.no_grad():
            for data, label in validation_loader:
                data, label = data.to(device), label.to(device)
                batch_size = data.shape[0]
                logits = model(data)
                loss = criterion(logits, label)
                dis = distance(logits, label)
                count += batch_size
                test_loss += loss.item() * batch_size
                test_dis += dis.item() * batch_size
        outstr = 'Test %d, loss: %.6f, distance: %.6f' % (
            epoch, test_loss * 1.0 / count, test_dis * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
예제 #23
0
def RMSE(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1))


X_data_name_1 = '../../.npz'
y_data_name_1 = '../../.npz'
X_data_name_2 = '../../.npz'
y_data_name_2 = '../../.npz'
X_train, y_train = load_from_npz(X_data_name_1), load_from_npz(y_data_name_1)
X_test, y_test = load_from_npz(X_data_name_2), load_from_npz(y_data_name_2)

X_train, X_test = normalize(X_train, X_test)

# %% DNN (only fully-connected layer)

dnn = DNN()

X_train = np.reshape(X_train,
                     (len(X_train), len(X_train[1]) * len(X_train[2]) * 3))
X_test = np.reshape(X_test,
                    (len(X_test), len(X_train[1]) * len(X_train[2]) * 3))

#optimization details
adam = Adam(lr=lrf, decay=lr_decay)
dnn.compile(loss='mean_squared_error', optimizer=adam, metrics=[RMSE])

for epoch in range(1, maxepoches):

    if epoch % 25 == 0 and epoch > 0:
        lrf /= 2
        adam = Adam(lr=lrf, decay=lr_decay)
예제 #24
0
def main():
    print('> Starting execution...')

    # Training settings
    parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
    group = parser.add_mutually_exclusive_group()
    group.add_argument('--fit',
                       action='store_true',
                       help='fit the tuned model on digits 0-4')
    group.add_argument('--transfer',
                       action='store_true',
                       help='train a pretrained model on digits 5-9')

    parser.add_argument('--batch-size',
                        type=int,
                        default=256,
                        metavar='N',
                        help='input batch size for training (default: 256)')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        metavar='E',
                        help='number of epochs to train (default: 50)')
    parser.add_argument('--lr',
                        type=float,
                        default=1e-3,
                        metavar='L',
                        help='learning rate (default: 1e-3)')
    parser.add_argument('--early-stopping',
                        type=int,
                        default=7,
                        metavar='E',
                        help='early stopping (default: 7 epochs)')
    parser.add_argument(
        '--size',
        type=int,
        default=100,
        metavar='S',
        help='size of the training data for transfer learning (default: 100)')

    parser.add_argument('--seed',
                        type=int,
                        default=23,
                        metavar='S',
                        help='random seed (default: 23)')

    args = parser.parse_args()

    use_cuda = torch.cuda.is_available()  # use cuda if available
    device = torch.device("cuda" if use_cuda else "cpu")
    torch.manual_seed(args.seed)  # random seed

    print('> Loading MNIST data')
    train_set = datasets.MNIST(MNIST_DATA_DIR,
                               train=True,
                               download=True,
                               transform=transforms.Compose([
                                   transforms.ToTensor(),
                                   transforms.Normalize((0.1307, ), (0.3081, ))
                               ]))

    test_set = datasets.MNIST(MNIST_DATA_DIR,
                              train=False,
                              download=True,
                              transform=transforms.Compose([
                                  transforms.ToTensor(),
                                  transforms.Normalize((0.1307, ), (0.3081, ))
                              ]))

    train_digits_04 = np.where(train_set.train_labels < 5)[0]
    train_digits_59 = np.where(train_set.train_labels > 4)[0]

    test_digits_04 = np.where(test_set.test_labels < 5)[0]
    test_digits_59 = np.where(test_set.test_labels > 4)[0]

    if args.fit:
        # Training the tuned model on digits 0-4
        print('> Training a new model on MNIST digits 0-4')

        X_train_04, y_train_04, X_valid_04, y_valid_04 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_04, test_digits_04)

        torch.manual_seed(args.seed)

        print('> Initializing the model')

        model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM, batch_norm=True)
        model.apply(init_he_normal)  # He initialization

        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_04,
                                  y_train_04,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_04,
                                  y_valid=y_valid_04,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_04_PATH}')
        torch.save(model.state_dict(), MODEL_04_PATH)
    elif args.transfer:
        # Transfer learning
        print(
            '> Training a model on MNIST digits 5-9 from a pretrained model for digits 0-4'
        )

        if os.path.isfile(MODEL_04_PATH):
            print('> Loading the pretrained model')

            model = DNN(INPUT_DIM, OUTPUT_DIM, HIDDEN_DIM,
                        batch_norm=True).to(device)
            model.load_state_dict(torch.load(MODEL_04_PATH))

            for param in model.parameters():
                param.requires_grad = False

            # Parameters of newly constructed modules have requires_grad=True by default
            model.fc4 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.fc5 = nn.Linear(HIDDEN_DIM, HIDDEN_DIM)
            model.out = nn.Linear(HIDDEN_DIM, OUTPUT_DIM)

            print('> Using saved model state')
        else:
            print(
                '> Model state file is not found, fit a model before the transfer learning'
            )
            print('> Stopping execution')
            return

        X_train_59, y_train_59, X_valid_59, y_valid_59 = data_to_numpy(
            train_set, test_set, INPUT_DIM, train_digits_59[:args.size],
            test_digits_59)

        # fixing the issues with labels
        y_train_59 = y_train_59 - 5
        y_valid_59 = y_valid_59 - 5

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=args.lr)

        print('> Training the model')
        model, _, _ = train_model(model,
                                  device,
                                  X_train_59,
                                  y_train_59,
                                  criterion,
                                  optimizer,
                                  X_valid=X_valid_59,
                                  y_valid=y_valid_59,
                                  batch_size=args.batch_size,
                                  n_epochs=args.epochs,
                                  early_stopping=args.early_stopping)

        print(f'> Saving the model state at {MODEL_59_PATH}')
        torch.save(model.state_dict(), MODEL_59_PATH)
    else:
        print('> Incorrect mode, try either `--fit` or `--transfer`')
        print('> Stopping execution')
예제 #25
0
                                       model_path=args.word_model,
                                       dict_path=args.dict,
                                       seq_len=seq_len)
                test_data = DataLoader(test_words,
                                       batch_size=args.batch_size,
                                       shuffle=False)
                scores += model_manager.get_all_predictions(test_data)

        file = open(args.predict, 'w')
        file.write('id,label\n')
        for i, score in enumerate(scores):
            pred = 1 if score > threshold else 0
            file.write('{},{}\n'.format(i, pred))

    elif args.mode == 'bow':
        model = DNN()
        train_words = BOW(mode='train',
                          x_path=args.train_x,
                          y_path=args.train_y)
        valid_words = BOW(mode='valid',
                          x_path=args.train_x,
                          y_path=args.train_y)
        train_data = DataLoader(train_words,
                                batch_size=args.batch_size,
                                shuffle=True)
        valid_data = DataLoader(valid_words,
                                batch_size=args.batch_size,
                                shuffle=False)

        manager = Manager(model, args)
        manager.train(train_data, valid_data)
예제 #26
0
파일: main.py 프로젝트: hck0821/ML2017FALL
    if args.valid_ratio != 0:
        valid_size = int(len(train_x) * args.valid_ratio)
        print('Split %d/%d validation data...' % (valid_size, len(train_x)))
        train_x, train_y = shuffle(train_x, train_y)
        valid_x, valid_y = train_x[-valid_size:], train_y[-valid_size:]
        train_x, train_y = train_x[:-valid_size], train_y[:-valid_size]
    num_users = max(train_x[:, 0])
    num_movies = max(train_x[:, 1])
    print('Select %s Model' % args.model)
    if args.model == 'MF':
        model = Matrix_Factorization(num_users,
                                     num_movies,
                                     args.vector_dim,
                                     verbose=1)
    if args.model == 'DNN':
        model = DNN(num_users, num_movies, args.vector_dim, verbose=1)

    adam = Adam(lr=1e-4)
    csvlogger = CSVLogger(logger)
    earlystopping = EarlyStopping(monitor='val_loss',
                                  patience=10,
                                  verbose=1,
                                  mode='min')
    checkpoint = ModelCheckpoint(params,
                                 monitor='val_loss',
                                 save_best_only=True,
                                 save_weights_only=True,
                                 verbose=0,
                                 mode='min')
    model.compile(loss='mse', optimizer=adam)
    print('Start Training...')
예제 #27
0
        def organize_features(sample):

            y = [
                sample[Act_inx],
            ]
            features = list(sample[Act_inx + 1:])
            return (features, y)

        build_batch = (BuildBatch(BATCH_SIZE).by(0, 'vector',
                                                 float).by(1, 'number', float))

        if NET_ARCH == 'deep_net':
            model = deep_net(input_shape=(feature_dim, ))
            opti = Adam(lr=0.0001, beta_1=0.5)
        elif NET_ARCH == 'DNN':
            model = DNN(input_shape=(feature_dim, ))
            opti = sgd(lr=0.01, momentum=0.9, clipnorm=1.0)
        else:
            sys.exit("Network not defined correctly, check NET_ARCH. ")

        model.compile(optimizer=opti,
                      loss='mean_squared_error',
                      metrics=[Rsqured])

        def train_network_batch(sample):
            tloss = model.train_on_batch(sample[0], sample[1])
            return (tloss[0], tloss[1])

        def test_network_batch(sample):
            tloss = model.test_on_batch(sample[0], sample[1])
            return (tloss[0], )
def train(args, config, io):
    train_loader, validation_loader, unlabelled_loader = get_loader(
        args, config)

    device = torch.device("cuda" if args.cuda else "cpu")

    #Try to load models
    model = DNN(args).to(device)
    ema_model = DNN(args).to(device)
    for param in ema_model.parameters():
        param.detach_()
    if device == torch.device("cuda"):
        model = nn.DataParallel(model)
        ema_model = nn.DataParallel(ema_model)
    if args.model_path != "":
        model.load_state_dict(torch.load(args.model_path))
        ema_model.load_state_dict(torch.load(args.model_path))

    if args.use_sgd:
        print("Use SGD")
        opt = optim.SGD(model.parameters(),
                        lr=args.lr * 100,
                        momentum=args.momentum,
                        weight_decay=1e-4)
    else:
        print("Use Adam")
        opt = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-4)

    scheduler = CosineAnnealingLR(opt, args.epochs, eta_min=args.lr)

    criterion = nn.MSELoss()
    consistency_criterion = nn.MSELoss()

    best_test_loss = 9999999.
    global_step = 0
    for epoch in range(args.epochs):
        startTime = time.time()

        ####################
        # Train
        ####################
        train_loss = 0.0
        count = 0.0
        model.train()
        ema_model.train()
        i = -1
        for (data, label), (u, _) in zip(cycle(train_loader),
                                         unlabelled_loader):
            i = i + 1
            if data.shape[0] != u.shape[0]:
                bt_size = np.minimum(data.shape[0], u.shape[0])
                data = data[0:bt_size]
                label = label[0:bt_size]
                u = u[0:bt_size]
            data, label, u = data.to(device), label.to(device), u.to(device)
            batch_size = data.shape[0]
            logits = model(data)
            class_loss = criterion(logits, label)

            u_student = jitter(u, device)
            u_teacher = jitter(u, device)
            logits_unlabeled = model(u_student)
            ema_logits_unlabeled = ema_model(u_teacher)
            ema_logits_unlabeled = Variable(ema_logits_unlabeled.detach().data,
                                            requires_grad=False)
            consistency_loss = consistency_criterion(logits_unlabeled,
                                                     ema_logits_unlabeled)
            if epoch < args.consistency_rampup_starts:
                consistency_weight = 0.0
            else:
                consistency_weight = get_current_consistency_weight(
                    args, args.final_consistency, epoch, i,
                    len(unlabelled_loader))

            consistency_loss = consistency_weight * consistency_loss
            loss = class_loss + consistency_loss

            opt.zero_grad()
            loss.backward()
            opt.step()

            global_step += 1
            # print(global_step)
            update_ema_variables(model, ema_model, args.ema_decay, global_step)

            count += batch_size
            train_loss += loss.item() * batch_size
        scheduler.step()
        outstr = 'Train %d, loss: %.6f' % (epoch, train_loss * 1.0 / count)
        io.cprint(outstr)

        ####################
        # Evaluation
        ####################
        test_loss = 0.0
        count = 0.0
        model.eval()
        ema_model.eval()
        for data, label in validation_loader:
            data, label = data.to(device), label.to(device)
            batch_size = data.shape[0]
            logits = ema_model(data)
            loss = criterion(logits, label)
            count += batch_size
            test_loss += loss.item() * batch_size
        outstr = 'Test %d, loss: %.6f' % (epoch, test_loss * 1.0 / count)
        io.cprint(outstr)
        if test_loss <= best_test_loss:
            best_test_loss = test_loss
            torch.save(ema_model.state_dict(),
                       'checkpoints/%s/models/model.t7' % args.exp_name)
            torch.save(ema_model, (config.root + config.model_path))
        io.cprint('Time: %.3f sec' % (time.time() - startTime))
예제 #29
0
embedding = corpus.load_embedding(
    path=os.path.join(args.get('data_path'), 'embedding.json'))
embedding = torch.from_numpy(embedding).float()

if args['weighted']:
    weight = corpus.load_json(
        os.path.join(source_path, 'cross_entropy_loss_weight.json'))
    weight = torch.FloatTensor(list(weight.values())).to(device)

data = sorted(corpus.examples.get('seq'), key=lambda x: len(x), reverse=True)

vocab_size = len(corpus.words2id)
logging.info('vocabulary size: {}'.format(vocab_size))
model = DNN(vocab_size=vocab_size,
            embedding_size=200,
            hidden_size=512,
            embedding=embedding)

model.to(device)

loss_function = nn.CrossEntropyLoss(weight=weight)

optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
# optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

model.train()

total_data = len(data)
batch_size = args['batch_size']
total_step = math.ceil(total_data / batch_size)
last_training_loss = 1000000000000
예제 #30
0
def train():
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')
    data_dict, topic_dict = dh.load_data(
    )  # data_dict, [group2topic, mem2topic]

    train_data, train_label, dev_data, dev_label, test_data, test_label = dh.data_split(
        data_dict, topic_dict)
    train_dataset = dh.Dataset(train_data, train_label)
    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True)
    dev_dataset = dh.Dataset(dev_data, dev_label)
    dev_loader = DataLoader(dev_dataset, batch_size=128, shuffle=True)

    lambda1 = lambda epoch: (
        epoch / args.warm_up_step
    ) if epoch < args.warm_up_step else 0.5 * (math.cos(
        (epoch - args.warm_up_step) /
        (args.n_epoch * len(train_dataset) - args.warm_up_step) * math.pi) + 1)

    model = DNN(args).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=args.init_lr)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer,
        len(train_loader) * args.n_epoch)

    global_step = 0
    best_f1 = 0.
    loss_deq = collections.deque([], args.report_step)
    for epoch in range(args.n_epoch):
        for batch in tqdm(train_loader):
            optimizer.zero_grad()
            inputs = batch['input'].to(device)
            group_topic = batch['group_topic'].to(device)
            mem_topic = batch['mem_topic'].to(device)
            labels = batch['label'].to(device)
            output = model(inputs, mem_topic, group_topic, label=labels)
            loss = output[0]
            loss.backward()
            loss_deq.append(loss.item())
            optimizer.step()
            scheduler.step()
            global_step += 1

            if global_step % args.report_step == 0:
                logger.info('loss: {}, lr: {}, epoch: {}'.format(
                    np.average(loss_deq).item(),
                    optimizer.param_groups[0]['lr'],
                    global_step / len(train_dataset)))
            if global_step % args.eval_step == 0:
                model.eval()
                eval_result = evaluation(model,
                                         data_loader=dev_loader,
                                         device=device)
                logger.info(eval_result)
                if eval_result['f1'] > best_f1:
                    torch.save(model,
                               './model/{}/torch.pt'.format(args.task_name))
                    best_f1 = eval_result['f1']
                model.train()