Ejemplo n.º 1
0
def test_one_dataset(params, file_name, test_q_data, test_qa_data, best_epoch):
    print "\n\nStart testing ......................\n Best epoch:", best_epoch
    g_model = MODEL(n_question=params.n_question,
                    seqlen=params.seqlen,
                    batch_size=params.batch_size,
                    q_embed_dim=params.q_embed_dim,
                    qa_embed_dim=params.qa_embed_dim,
                    memory_size=params.memory_size,
                    memory_key_state_dim=params.memory_key_state_dim,
                    memory_value_state_dim=params.memory_value_state_dim,
                    final_fc_dim=params.final_fc_dim)
    # create a module by given a Symbol
    test_net = mx.mod.Module(symbol=g_model.sym_gen(),
                             data_names=['q_data', 'qa_data'],
                             label_names=['target'],
                             context=params.ctx)
    # cresate memory by given input shapes
    test_net.bind(data_shapes=[
        mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'),
        mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')],
        label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')])
    arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, file_name),
                                         epoch=best_epoch)
    test_net.init_params(arg_params=arg_params, aux_params=aux_params,
                         allow_missing=False)
    test_loss, test_accuracy, test_auc = test(test_net, params, test_q_data, test_qa_data, label='Test')
    print "\ntest_auc\t", test_auc
    print "test_accuracy\t", test_accuracy
    print "test_loss\t", test_loss
Ejemplo n.º 2
0
class Trainer:
    def __init__(self):
        self.model = MODEL()
        self.transform = transforms.Compose([
            transforms.Resize((100, 100), interpolation=3),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
        ])

        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=0.0001)
        self.logsoftmax = nn.LogSoftmax(dim=1)

    def preprocess(self, img):
        return self.transform(img)

    def loss(
        self,
        output,
        label,
        epsilon=1e-6,
    ):
        log_probs = self.logsoftmax(output)
        targets = torch.zeros(log_probs.size()).scatter_(
            1,
            label.unsqueeze(0).unsqueeze(1).data, 1)
        targets = (1 - epsilon) * targets + epsilon / 2
        loss = (-targets * log_probs).mean(0).sum()
        return loss

    def train(self, x, label):
        self.model.train()

        x = self.preprocess(x).unsqueeze(0)
        x, label = Variable(x), Variable(torch.tensor(label))

        output = self.model(x)
        loss = self.loss(output, label)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        return loss.item()

    def loss(self, output, label, epsilon=1e-6):
        log_probs = self.logsoftmax(output)
        targets = torch.zeros(log_probs.size()).scatter_(
            1,
            label.unsqueeze(0).unsqueeze(1).data, 1)
        targets = (1 - epsilon) * targets + epsilon / 2
        loss = (-targets * log_probs).mean(0).sum()
        return loss

    def evaluate(self, x):
        self.model.eval()
        x = self.preprocess(x).unsqueeze(0)
        x = Variable(x)
        output = self.model(x)

        return output.argmax().item()

    def package(self, path):
        state_dict = self.model.state_dict()
        # result =  json.dumps(state_dict)
        torch.save(state_dict, path)

    def load_package(self, path):
        checkpoint = torch.load(path)
        self.model.load_state_dict(checkpoint, strict=True)
Ejemplo n.º 3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1, help='the gpu will be used, e.g "0,1,2,3"')
    parser.add_argument('--max_iter', type=int, default=10, help='number of iterations')
    parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations')
    parser.add_argument('--test', type=bool, default=False, help='enable testing')
    parser.add_argument('--train_test', type=bool, default=True, help='enable testing')
    parser.add_argument('--show', type=bool, default=True, help='print progress')
    parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std')
    parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate')
    parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay')
    parser.add_argument('--final_lr', type=float, default=1E-5,
                        help='learning rate will not decrease after hitting this threshold')
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate')
    parser.add_argument('--max_grad_norm', type=float, default=3.0, help='maximum gradient norm')
    parser.add_argument('--hidden_dim', type=int, default=128, help='hidden layer dimension')
    parser.add_argument('--n_hidden', type=int, default=2, help='hidden numbers')

    dataset = 'oj'

    if dataset == 'oj':
        parser.add_argument('--batch_size', type=int, default=5, help='the batch size')
        parser.add_argument('--qa_embed_dim', type=int, default=50, help='answer and question embedding dimensions')
        parser.add_argument('--n_question', type=int, default=68, help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir', type=str, default='./data/oj', help='data directory')
        parser.add_argument('--data_name', type=str, default='oj', help='data set name')
        parser.add_argument('--load', type=str, default='oj', help='model file to load')
        parser.add_argument('--save', type=str, default='oj', help='path to save model')

    elif dataset == 'assistments':
        parser.add_argument('--batch_size', type=int, default=32, help='the batch size')
        parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions')
        parser.add_argument('--n_question', type=int, default=124, help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir', type=str, default='./data/assistments', help='data directory')
        parser.add_argument('--data_name', type=str, default='assistments', help='data set name')
        parser.add_argument('--load', type=str, default='assistments', help='model file to load')
        parser.add_argument('--save', type=str, default='assistments', help='path to save model')

    elif dataset == 'STATICS':
        parser.add_argument('--batch_size', type=int, default=10, help='the batch size')
        parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions')
        parser.add_argument('--n_question', type=int, default=1223,
                            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen', type=int, default=800, help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir', type=str, default='./data/STATICS', help='data directory')
        parser.add_argument('--data_name', type=str, default='STATICS', help='data set name')
        parser.add_argument('--load', type=str, default='STATICS', help='model file to load')
        parser.add_argument('--save', type=str, default='STATICS', help='path to save model')

    params = parser.parse_args()
    params.lr = params.init_lr

    print(params)

    dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',')
    # train_data_path = params.data_dir + "/" + "builder_train.csv"
    # valid_data_path = params.data_dir + "/" + "builder_test.csv"

    train_data_path = params.data_dir + "/" + params.data_name + "_train.csv"
    valid_data_path = params.data_dir + "/" + params.data_name + "_valid.csv"
    # test_data_path = params.data_dir + "/" + params.data_name + "_test.csv"
    train_q_data, train_q_t_data, train_answer_data = dat.load_data(train_data_path)
    valid_q_data, valid_q_t_data, valid_answer_data = dat.load_data(valid_data_path)
    # test_q_data, test_q_t_data, test_answer_data = dat.load_data(test_data_path)

    model = MODEL(n_question=params.n_question,
                  hidden_dim=params.hidden_dim,
                  x_embed_dim=params.qa_embed_dim,
                  hidden_layers=params.n_hidden,
                  gpu=params.gpu)

    model.init_embeddings()
    model.init_params()
    # model = torch.load(params.data_dir + "/save/"+params.save)
    # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum)
    optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9))

    if params.gpu >= 0:
        print('device: ' + str(params.gpu))
        torch.cuda.set_device(params.gpu)
        model.cuda()

    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    for idx in range(params.max_iter):
        train_loss, train_accuracy, train_auc = train(model, idx, params, optimizer, train_q_data, train_q_t_data,
                                                      train_answer_data)
        print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (
            idx + 1, params.max_iter, train_loss, train_auc, train_accuracy))
        valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_q_t_data,
                                                     valid_answer_data)
        print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (
            idx + 1, params.max_iter, valid_auc, valid_accuracy))
        # test_loss, test_accuracy, test_auc = test(model, params, optimizer, test_q_data, test_q_t_data,
        #                                           test_answer_data)
        # print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' % (
        #     idx + 1, params.max_iter, test_auc, test_accuracy))
        all_train_auc[idx + 1] = train_auc
        all_train_accuracy[idx + 1] = train_accuracy
        all_train_loss[idx + 1] = train_loss
        all_valid_loss[idx + 1] = valid_loss
        all_valid_accuracy[idx + 1] = valid_accuracy
        all_valid_auc[idx + 1] = valid_auc
        #
        # output the epoch with the best validation auc
        if valid_auc > best_valid_auc:
            print('%3.4f to %3.4f' % (best_valid_auc, valid_auc))
            best_valid_auc = valid_auc
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='the gpu will be used, e.g "0,1,2,3"')
    parser.add_argument('--max_iter',
                        type=int,
                        default=30,
                        help='number of iterations')
    parser.add_argument('--decay_epoch',
                        type=int,
                        default=20,
                        help='number of iterations')
    parser.add_argument('--test',
                        type=bool,
                        default=False,
                        help='enable testing')
    parser.add_argument('--train_test',
                        type=bool,
                        default=True,
                        help='enable testing')
    parser.add_argument('--show',
                        type=bool,
                        default=True,
                        help='print progress')
    parser.add_argument('--init_std',
                        type=float,
                        default=0.1,
                        help='weight initialization std')
    parser.add_argument('--init_lr',
                        type=float,
                        default=0.01,
                        help='initial learning rate')
    parser.add_argument('--lr_decay',
                        type=float,
                        default=0.75,
                        help='learning rate decay')
    parser.add_argument(
        '--final_lr',
        type=float,
        default=1E-5,
        help='learning rate will not decrease after hitting this threshold')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='momentum rate')
    parser.add_argument('--maxgradnorm',
                        type=float,
                        default=50.0,
                        help='maximum gradient norm')
    parser.add_argument('--final_fc_dim',
                        type=float,
                        default=50,
                        help='hidden state dim for final fc layer')

    dataset = 'assist2009_updated'

    if dataset == 'assist2009_updated':
        parser.add_argument('--q_embed_dim',
                            type=int,
                            default=50,
                            help='question embedding dimensions')
        parser.add_argument('--batch_size',
                            type=int,
                            default=32,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument('--memory_size',
                            type=int,
                            default=20,
                            help='memory size')
        parser.add_argument(
            '--n_question',
            type=int,
            default=110,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/assist2009_updated',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='assist2009_updated',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='data/assist2009_updated',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='data/assist2009_updated/model',
                            help='path to save model')

    elif dataset == 'STATICS':
        parser.add_argument('--batch_size',
                            type=int,
                            default=10,
                            help='the batch size')
        parser.add_argument('--q_embed_dim',
                            type=int,
                            default=50,
                            help='question embedding dimensions')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=100,
                            help='answer and question embedding dimensions')
        parser.add_argument('--memory_size',
                            type=int,
                            default=50,
                            help='memory size')
        parser.add_argument(
            '--n_question',
            type=int,
            default=1223,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=6,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/STATICS',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='STATICS',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='STATICS',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='STATICS',
                            help='path to save model')

    params = parser.parse_args()
    params.lr = params.init_lr
    params.memory_key_state_dim = params.q_embed_dim  # 50
    params.memory_value_state_dim = params.qa_embed_dim  # 200

    print(params)

    dat = DATA(n_question=params.n_question,
               seqlen=params.seqlen,
               separate_char=',')
    # train_data_path = params.data_dir + "/" + "test5.1.txt"
    train_data_path = params.data_dir + "/" + params.data_name + "_train2.csv"
    valid_data_path = params.data_dir + "/" + params.data_name + "_valid2.csv"
    test_data_path = params.data_dir + "/" + params.data_name + "_test.csv"
    train_q_data, train_qa_data, _ = dat.load_data(train_data_path)
    valid_q_data, valid_qa_data, _ = dat.load_data(valid_data_path)
    test_q_data, test_qa_data, _ = dat.load_data(test_data_path)

    params.memory_key_state_dim = params.q_embed_dim  # 记忆key = 问题embedding的维度
    params.memory_value_state_dim = params.qa_embed_dim  # 记忆val = 回答embedding的维度

    model = MODEL(n_question=params.n_question,
                  batch_size=params.batch_size,
                  q_embed_dim=params.q_embed_dim,
                  qa_embed_dim=params.qa_embed_dim,
                  memory_size=params.memory_size,
                  memory_key_state_dim=params.memory_key_state_dim,
                  memory_value_state_dim=params.memory_value_state_dim,
                  final_fc_dim=params.final_fc_dim)

    model.init_embeddings()
    model.init_params()
    # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum)
    optimizer = optim.Adam(params=model.parameters(),
                           lr=params.lr,
                           betas=(0.9, 0.9))

    if params.gpu >= 0:
        print('device: ' + str(params.gpu))
        torch.cuda.set_device(params.gpu)
        model.cuda()

    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    train_shuffle_index = np.random.permutation(train_q_data.shape[0])
    valid_shuffie_index = np.random.permutation(valid_q_data.shape[0])
    train_q_data_shuffled = train_q_data[train_shuffle_index]
    train_qa_data_shuffled = train_qa_data[train_shuffle_index]
    valid_q_data_shuffled = valid_q_data[valid_shuffie_index]
    valid_qa_data_shuffled = valid_qa_data[valid_shuffie_index]

    train_loss_list, train_acc_list, train_auc_list, train_f1_list = [], [], [], []
    valid_loss_list, valid_acc_list, valid_auc_list, valid_f1_list = [], [], [], []

    for idx in range(params.max_iter):

        # train_loss, train_accuracy, train_auc = train(idx, model, params, optimizer, train_q_data, train_qa_data)
        train_loss, train_accuracy, train_auc, train_f1 = train(
            idx, model, params, optimizer, train_q_data_shuffled,
            train_qa_data_shuffled)
        train_loss_list.append(train_loss)
        train_acc_list.append(train_accuracy)
        train_auc_list.append(train_auc)
        train_f1_list.append(train_f1)
        print(
            'Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f, f1 : %.4f'
            % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy,
               train_f1))
        # valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data)
        valid_loss, valid_accuracy, valid_auc, valid_f1 = test(
            model, params, optimizer, valid_q_data_shuffled,
            valid_qa_data_shuffled)
        valid_loss_list.append(valid_loss)
        valid_acc_list.append(valid_accuracy)
        valid_auc_list.append(valid_auc)
        valid_f1_list.append(valid_f1)
        print(
            'Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f, f1 : %.4f'
            % (idx + 1, params.max_iter, valid_auc, valid_accuracy, valid_f1))

        all_train_auc[idx + 1] = train_auc
        all_train_accuracy[idx + 1] = train_accuracy
        all_train_loss[idx + 1] = train_loss
        all_valid_loss[idx + 1] = valid_loss
        all_valid_accuracy[idx + 1] = valid_accuracy
        all_valid_auc[idx + 1] = valid_auc
        #
        # output the epoch with the best validation auc
        generate_dir(params.save)
        if valid_auc > best_valid_auc:
            print('%3.4f to %3.4f' % (best_valid_auc, valid_auc))
            best_valid_auc = valid_auc
            best_epoch = idx + 1
            best_valid_acc = valid_accuracy
            best_valid_loss = valid_loss
            test_loss, test_accuracy, test_auc, test_f1 = test(
                model, params, optimizer, test_q_data, test_qa_data)
            print(
                "test_auc: %.4f\ttest_accuracy: %.4f\ttest_loss: %.4f\ttest_f1: %.4f"
                % (test_auc, test_accuracy, test_loss, test_f1))
            # save_checkpoint(model, memory, params.save + "/Epoch%d-test_auc%.2f-val_auc%.2f-loss%.2f.pt"%(best_epoch, test_auc, valid_auc, test_loss))
            save_path = params.save + "/Epoch%d-test_auc%.2f-val_auc%.2f-loss%.2f.pt" % (
                best_epoch, test_auc, valid_auc, test_loss)
            torch.save(model, save_path)
            print(save_path + " save to " + params.save)

    print("best outcome: best epoch: %.4f" % (best_epoch))
    os.system(f"mv {save_path} {params.save}/best.pt")
    print("valid_auc: %.4f\tvalid_accuracy: %.4f\tvalid_loss: %.4f\t" %
          (best_valid_auc, best_valid_acc, best_valid_loss))
    print("test_auc: %.4f\ttest_accuracy: %.4f\ttest_loss: %.4f\t" %
          (test_auc, test_accuracy, test_loss))

    print(train_loss_list, train_acc_list, train_auc_list, train_f1_list,
          valid_loss_list, valid_acc_list, valid_auc_list, valid_f1_list)
    train_log = 'log.txt'
    with open(train_log, "w") as f:
        f.write("train_loss_list================")
        f.write(str(train_loss_list))
        f.write("train_acc_list================")
        f.write(str(train_acc_list))
        f.write("train_auc_list================")
        f.write(str(train_auc_list))
        f.write("train_f1_lis================")
        f.write(str(train_f1_list))
        f.write("valid_loss_list================")
        f.write(str(valid_loss_list))
        f.write("valid_acc_list================")
        f.write(str(valid_acc_list))
        f.write("================")
        f.write(str(valid_auc_list))
        f.write("================")
        f.write(str(valid_f1_list))
    np.save("train_loss_list.npy", np.array(train_loss_list))
    np.save("train_acc_list.npy", np.array(train_acc_list))
    np.save("train_auc_list.npy", np.array(train_auc_list))
    np.save("train_f1_list.npy", np.array(train_f1_list))
    np.save("valid_loss_list.npy", np.array(valid_loss_list))
    np.save("valid_acc_list.npy", np.array(valid_acc_list))
    np.save("valid_auc_list.npy", np.array(valid_auc_list))
    np.save("valid_f1_list.npy", np.array(valid_f1_list))
Ejemplo n.º 5
0
def train(args):

    log_file = open('../data/logs/log.txt', 'a')
    log_file.write('\n' + '\n')
    log_file.write('-------------------------------------------\n')
    log_file.write(str(args) + '\n')
    log_file.write('-------------------------------------------\n')

    print('model initializing..')
    model = MODEL(args)
    # CUDA_VISIBLE_DEVICES=args.GPU_DEVICE

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        sess.run(tf.local_variables_initializer())

        saver = tf.train.Saver(write_version=tf.train.SaverDef.V2,
                               max_to_keep=10)

        print('loading data..')

        load_file = args.file_directory + str(args.first_train_file)
        train_data = load_data(args, load_file)

        load_file = args.file_directory + str(args.first_test_file)
        test_data = load_data(args, load_file)

        for step in range(args.n_epochs):
            # training
            print('epoch:' + str(step))
            args.is_train = True
            if step is not 0:
                for i in range(args.train_file_num):
                    start_list = list(
                        range(0, train_data.size, args.batch_size))
                    np.random.shuffle(start_list)
                    for start in start_list:
                        end = start + args.batch_size
                        model.train(
                            sess, get_feed_dict(model, train_data, start, end))

            args.is_train = False

            train_loss = 0
            train_time_loss = 0
            train_batchs = 0
            train_labels = []
            train_scores = []
            train_time_true = []
            train_time_pre = []
            for i in range(args.train_file_num):
                #load_file=args.file_directory+str(args.first_train_file+i)
                #train_data=load_data(args,load_file)
                start_list = list(
                    range(0, train_data.size, args.batch_size * 10))
                train_batchs = train_batchs + len(start_list)
                for start in start_list:
                    end = start + args.batch_size * 10
                    loss, labels, scores, time_loss, time_pre, time_true, time_label = model.test(
                        sess, get_feed_dict(model, train_data, start, end))
                    train_labels.extend(labels)
                    train_scores.extend(scores)
                    for k in range(len(time_label)):
                        if time_label[k] == 0:
                            continue
                        train_time_true.append(time_true[k])
                        train_time_pre.append(time_pre[k])
                    train_loss = train_loss + loss
                    train_time_loss = train_time_loss + time_loss
            train_auc, train_f1, train_pre, train_recall, train_time_f1 = model.eval(
                args, train_labels, train_scores, train_time_true,
                train_time_pre)
            train_loss = train_loss / train_batchs
            train_time_loss = train_time_loss / train_batchs

            time_stamp = datetime.datetime.now().strftime('%Y.%m.%d-%H:%M:%S')

            log_str = 'time:' + time_stamp + '  epoch %d \ntrain_loss:%.4f train_time_loss:%.4f train_time_f1:%.4f train_auc:%.4f train_f1_score:%.4f'\
                                       % (step, train_loss, train_time_loss, train_time_f1, train_auc, train_f1)
            print(log_str)
            log_file.write(log_str)
            log_file.write('\n')

    log_file.close()
Ejemplo n.º 6
0
def run_ES(parameters, model: MODEL, utils, root):

    n_step = parameters['n_step']
    n_protocol = 2**n_step
    exact_data = np.zeros((n_protocol, 2),
                          dtype=np.float64)  # 15 digits precision

    b2_array = lambda n10: np.array(list(np.binary_repr(n10, width=n_step)),
                                    dtype=np.int)
    st = time.time()
    # ---> measuring estimated time <---
    model.update_protocol(b2_array(0))
    psi = model.compute_evolved_state()
    model.compute_fidelity(psi_evolve=psi)
    model.compute_energy(psi_evolve=psi)
    print("Est. run time : \t %.3f s" % (0.5 * n_protocol *
                                         (time.time() - st)))
    # ---> Starting real calculation <---

    st = time.time()
    for p in range(n_protocol):
        model.update_protocol(b2_array(p))
        psi = model.compute_evolved_state()
        exact_data[p] = (model.compute_fidelity(psi_evolve=psi),
                         model.compute_energy(psi_evolve=psi))

    outfile = utils.make_file_name(parameters, root=root)
    with open(outfile, 'wb') as f:
        pickle.dump(exact_data, f, protocol=4)
    print("Total run time : \t %.3f s" % (time.time() - st))
    print("\n Thank you and goodbye !")
    f.close()
Ejemplo n.º 7
0
def SD_2SF(param, model: MODEL, init=False):

    if model.n_h_field > 2:
        assert False, 'This works only for bang-bang protocols'

    n_step = param['n_step']
    n_fid_eval = 0

    if init:
        # Random initialization
        tmp = np.ones(n_step, dtype=int)  # m = 0 state ...
        tmp[0:n_step // 2] = 0
        np.random.shuffle(tmp)

        model.update_protocol(tmp)
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())

    else:
        # So user can feed in data say from a specific protocol
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())

    x1_ar, x2_ar = np.triu_indices(n_step, 1)
    order = np.arange(0, x1_ar.shape[0], dtype=np.int)

    while True:  # careful with this. For binary actions, this is guaranteed to break

        np.random.shuffle(order)
        local_minima = True

        for pos in order:
            t1, t2 = (x1_ar[pos], x2_ar[pos])

            model.swap(t1, t2)
            new_fid = model.compute_fidelity()
            n_fid_eval += 1

            if new_fid > old_fid:  # accept descent
                #print("%.15f"%new_fid,'\t',n_fid_eval)
                old_fid = new_fid
                best_protocol = np.copy(model.protocol())
                local_minima = False  # will exit for loop before it ends ... local update accepted
                break
            else:
                model.swap(t1, t2)

        if local_minima:
            break

    return old_fid, best_protocol, n_fid_eval
Ejemplo n.º 8
0
def run_SD(parameters, model: MODEL, utils, root, save=True):

    if parameters['verbose'] == 0:
        blockPrint()

    outfile = utils.make_file_name(parameters, root=root)
    n_exist_sample, all_result = utils.read_current_results(outfile)
    n_sample = parameters['n_sample']

    if n_exist_sample >= n_sample:
        print(
            "\n\n-----------> Samples already computed in file -- terminating ... <-----------"
        )
        return all_result

    print("\n\n-----------> Starting stochastic descent <-----------")

    n_iteration_left = n_sample - n_exist_sample  # data should be saved 10 times --> no more (otherwise things are way too slow !)
    n_mod = max([1, n_iteration_left // 10])

    for it in range(n_iteration_left):

        start_time = time.time()

        if parameters['task'] == 'SD':
            best_fid, best_protocol, n_fid_eval = SD(
                parameters, model,
                init=True)  # -- --> performing stochastic descent here <-- --
        elif parameters['task'] == 'SD2':
            best_fid, best_protocol, n_fid_eval = SD_2SF(
                parameters, model, init=True
            )  # -- --> performing 2 spin flip stochastic descent here <-- --
        else:
            assert False, 'Error in task specification'

        energy, delta_energy, Sent = model.compute_observables(
            protocol=best_protocol)

        result = [
            n_fid_eval, best_fid, energy, delta_energy, Sent, best_protocol
        ]

        print(
            "\n----------> RESULT FOR STOCHASTIC DESCENT NO %i <-------------"
            % (it + 1))
        print("Number of fidelity eval \t%i" % n_fid_eval)
        print("Best fidelity \t\t\t%.16f" % best_fid)
        print("Best hx_protocol\t\t", list(best_protocol))

        all_result.append(result)

        if save and it % n_mod == 0:
            with open(outfile, 'wb') as f:
                pickle.dump([parameters, all_result], f)
                f.close()
            print("Saved iteration --> %i to %s" %
                  (it + n_exist_sample, outfile))
        print("Iteration run time --> %.4f s" % (time.time() - start_time))

    print("\n Thank you and goodbye !")
    enablePrint()

    if save:
        with open(outfile, 'wb') as f:
            pickle.dump([parameters, all_result], f)
            f.close()
    return all_result
Ejemplo n.º 9
0
def run_GRAPE(parameters, model: MODEL, utils, root, save=True):
    if parameters['verbose'] == 0:
        blockPrint()

    outfile = utils.make_file_name(parameters, root=root)
    n_exist_sample, optimal_index, all_result = utils.read_current_results(
        outfile)
    n_sample = parameters['n_sample']
    if optimal_index is not None:
        best_seen_fid = all_result[optimal_index][1]
    else:
        best_seen_fid = 0.0

    if n_exist_sample >= n_sample:
        print(
            "\n\n-----------> Samples already computed in file -- terminating ... <-----------"
        )

        print(
            "\n\n-------> Best encountered fidelity over all samples is %0.8f <-------"
            % (best_seen_fid))

        print("\n\n-------> Best encountered hx_protocol over all samples:")
        print(list(all_result[optimal_index][5]))
        print("<-------")

        return all_result

    print("\n\n-----------> Starting stochastic descent <-----------")

    n_iteration_left = n_sample - n_exist_sample  # data should be saved 10 times --> no more (otherwise things are way too slow !)
    n_mod = max([1, n_iteration_left // 10])

    for it in range(n_iteration_left):

        start_time = time.time()

        if parameters['task'] == 'GRAPE':
            best_fid, best_protocol, n_fid_eval = GRAPE(
                parameters, model,
                init=True)  # -- --> performing stochastic descent here <-- --
        else:
            assert False, 'Error in task specification'

        energy, delta_energy, Sent = model.compute_observables(
            protocol=best_protocol, discrete=False)

        result = [
            n_fid_eval, best_fid, energy, delta_energy, Sent, best_protocol
        ]

        print("\n----------> RESULT FOR GRAPE NO %i <-------------" % (it + 1))
        print("Number of fidelity eval \t%i" % n_fid_eval)
        print("Best fidelity \t\t\t%.16f" % best_fid)
        print("Best hx_protocol\t\t", list(best_protocol))

        all_result.append(result)

        # check if a better fidelity has been seen in previous samples
        if best_fid > best_seen_fid:
            best_seen_fid = best_fid
            optimal_index = len(all_result) - 1

        if save and it % n_mod == 0:
            with open(outfile, 'wb') as f:
                pickle.dump([parameters, all_result], f)
                f.close()
            print("Saved iteration --> %i to %s" %
                  (it + n_exist_sample, outfile))
        print("Iteration run time --> %.4f s" % (time.time() - start_time))

    # print best seen fidelity and protocol over all times
    print(
        "\n\n-------> Best encountered fidelity over all samples is %0.8f <-------"
        % (best_seen_fid))

    print("\n\n-------> Best encountered hx_protocol over all samples:")
    print(list(all_result[optimal_index][5]))
    print("<-------")

    print("\n Thank you and goodbye !")
    enablePrint()

    if save:
        with open(outfile, 'wb') as f:
            pickle.dump([parameters, all_result], f)
            f.close()
    return all_result
Ejemplo n.º 10
0
def GRAPE(param, model: MODEL, init=False):

    n_step = param['n_step']
    n_fid_eval = 0

    if init:
        # Random initialization
        model.update_protocol(
            np.random.uniform(-1.0, 1.0, size=param['n_step']))
        old_fid = model.compute_fidelity(protocol=model.protocol(),
                                         discrete=False)
        best_protocol = np.copy(model.protocol())

    else:
        # So user can feed in data say from a specific protocol
        raise NotImplementedError("yet to be implemented.")
        #old_fid = model.compute_fidelity(discrete=False)
        #best_protocol = np.copy(model.protocol())

    eta = 0.6  # initial learning rate

    n_fid_eval = 0
    fid_diff = 1.0
    while np.abs(fid_diff) > 1E-9 and n_fid_eval < param[
            'n_quench']:  # guaranteed to break but after very long time

        # compute protocol gradient
        protocol_gradient = model.compute_protocol_gradient()

        # normalise gradient
        protocol_gradient /= np.max(np.abs(protocol_gradient))
        # GD update rule
        new_protocol = model.protocol() + eta * protocol_gradient

        # impose boundedness condition
        ind_max = np.where(new_protocol > param['hx_max'])
        new_protocol[ind_max] = param['hx_max']

        ind_min = np.where(new_protocol < param['hx_min'])
        new_protocol[ind_min] = param['hx_min']

        ###
        fid = model.compute_fidelity(protocol=new_protocol, discrete=False)

        # if we overshoot, decrease step size, otherwise update protocol
        fid_diff = fid - old_fid
        if fid_diff < 0:
            eta *= 0.5
            print('overshot minimum:', n_fid_eval, eta, np.abs(fid_diff))
        else:
            # update protocol
            model.update_protocol(new_protocol)

        old_fid = fid.copy()
        n_fid_eval += 1

        print(fid)
        print(n_fid_eval, eta, fid_diff)

    return old_fid, model.protocol(), n_fid_eval
Ejemplo n.º 11
0
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data, valid_tf_data):
    ### ================================== model initialization ==================================
    g_model = MODEL(n_question=params.n_question,
                    seqlen=params.seqlen,
                    batch_size=params.batch_size,
                    q_embed_dim=params.q_embed_dim,
                    qa_embed_dim=params.qa_embed_dim,
                    memory_size=params.memory_size,
                    memory_key_state_dim=params.memory_key_state_dim,
                    memory_value_state_dim=params.memory_value_state_dim,
                    final_fc_dim=params.final_fc_dim)
    # create a module by given a Symbol
    net = mx.mod.Module(symbol=g_model.sym_gen(),
                        data_names=['q_data', 'qa_data'],
                        label_names=['target'],
                        context=params.ctx, )
    # create memory by given input shapes
    net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'),
                          mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')],
             label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')])

    # initial parameters with the default DKVMN initializer
    init_dkvmn_param_file_name = params.save + '-dkvmn_initialization'
    arg_params, aux_params = load_params(prefix=os.path.join('model', params.load, init_dkvmn_param_file_name), epoch=30)
    net.init_params(arg_params=arg_params, aux_params=aux_params, allow_missing=False)
    '''
    # initial parameters with the default random initializer
    net.init_params(initializer=mx.init.Normal(sigma=params.init_std), force_init=True)    
    '''


    # decay learning rate in the lr_scheduler
    lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20 * (train_q_data.shape[0] / params.batch_size),
                                                   factor=0.667, stop_factor_lr=1e-5)

    net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum': params.momentum,
                                                          'lr_scheduler': lr_scheduler})

    '''
    for parameters in net.get_params()[0]:
        print(parameters, net.get_params()[0][parameters].asnumpy().shape)
        #print(parameters, net.get_params()[0][parameters])
    print("\n")
    '''

    ### ================================== start training ==================================
    all_train_loss = {}
    all_train_acc = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_acc = {}
    all_valid_auc = {}

    best_valid_acc = -1
    best_valid_loss = -1

    for idx in range(params.max_iter):
        train_loss, train_acc = run.train(net, params, train_q_data, train_qa_data, label='Train')
        pred_list, target_list = run.test(net, params, valid_q_data, valid_qa_data, valid_tf_data, label='Valid')

        all_pred = np.concatenate(pred_list, axis=0)
        all_target = np.concatenate(target_list, axis=0)
        valid_loss = run.binaryEntropy(all_target, all_pred)
        valid_acc = run.compute_accuracy(all_target, all_pred)

        '''
        print('epoch', idx + 1)
        print("valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc)
        print("valid_acc\t", valid_acc, "\ttrain_acc\t", train_acc)
        print("valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss)
        '''

        all_valid_loss[idx + 1] = valid_loss
        all_train_loss[idx + 1] = train_loss
        all_valid_acc[idx + 1] = valid_acc
        all_train_acc[idx + 1] = train_acc

        # output the epoch with the best validation auc
        if valid_acc > best_valid_acc:
            best_valid_acc = valid_acc
            best_valid_loss = valid_loss

            if not os.path.isdir('model'):
                os.makedirs('model')
            if not os.path.isdir(os.path.join('model', params.save)):
                os.makedirs(os.path.join('model', params.save))
            net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=30)

    if not os.path.isdir('result'):
        os.makedirs('result')
    if not os.path.isdir(os.path.join('result', params.save)):
        os.makedirs(os.path.join('result', params.save))
    f_save_log = open(os.path.join('result', params.save, file_name), 'w')
    f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n")
    f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n")
    f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n")
    f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n")
    f_save_log.write("valid_acc:\n" + str(all_valid_acc) + "\n\n")
    f_save_log.write("train_acc:\n" + str(all_train_acc) + "\n\n")
    f_save_log.close()

    return best_valid_acc, best_valid_loss
Ejemplo n.º 12
0
def train_one_dataset(params, file_name, train_q_data, train_qa_data, valid_q_data, valid_qa_data):
    ### ================================== model initialization ==================================
    g_model = MODEL(n_question=params.n_question,
                    seqlen=params.seqlen,
                    batch_size=params.batch_size,
                    q_embed_dim=params.q_embed_dim,
                    qa_embed_dim=params.qa_embed_dim,
                    memory_size=params.memory_size,
                    memory_key_state_dim=params.memory_key_state_dim,
                    memory_value_state_dim=params.memory_value_state_dim,
                    final_fc_dim = params.final_fc_dim)
    # create a module by given a Symbol
    net = mx.mod.Module(symbol=g_model.sym_gen(),
                        data_names = ['q_data', 'qa_data'],
                        label_names = ['target'],
                        context=params.ctx)
    # create memory by given input shapes
    net.bind(data_shapes=[mx.io.DataDesc(name='q_data', shape=(params.seqlen, params.batch_size), layout='SN'),
                          mx.io.DataDesc(name='qa_data', shape=(params.seqlen, params.batch_size), layout='SN')],
             label_shapes=[mx.io.DataDesc(name='target', shape=(params.seqlen, params.batch_size), layout='SN')])
    # initial parameters with the default random initializer
    net.init_params(initializer=mx.init.Normal(sigma=params.init_std))
    # decay learning rate in the lr_scheduler
    lr_scheduler = mx.lr_scheduler.FactorScheduler(step=20*(train_q_data.shape[0]/params.batch_size), factor=0.667, stop_factor_lr=1e-5)

    net.init_optimizer(optimizer='sgd', optimizer_params={'learning_rate': params.lr, 'momentum':params.momentum,'lr_scheduler': lr_scheduler})

    for parameters in net.get_params()[0]:
        print parameters, net.get_params()[0][parameters].asnumpy().shape
    print "\n"

    ### ================================== start training ==================================
    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    for idx in xrange(params.max_iter):
        train_loss, train_accuracy, train_auc = train(net, params, train_q_data, train_qa_data, label='Train')
        valid_loss, valid_accuracy, valid_auc = test(net, params, valid_q_data, valid_qa_data, label='Valid')

        print 'epoch', idx + 1
        print "valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc
        print "valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t", train_accuracy
        print "valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss
        net.save_checkpoint(prefix=os.path.join('model', params.save, file_name), epoch=idx+1)
        # output the epoch with the best validation auc
        if valid_auc > best_valid_auc:
            best_valid_auc = valid_auc
            best_epoch = idx+1

    f_save_log = open(os.path.join('result', params.save, file_name), 'w')
    f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n")
    f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n")
    f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n")
    f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n")
    f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n")
    f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n")
    f_save_log.close()
    return best_epoch
Ejemplo n.º 13
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--gpu",
                        type=int,
                        default=0,
                        help="the gpu will be used")
    parser.add_argument("--max_iter",
                        type=int,
                        default=300,
                        help="number of iterations")
    parser.add_argument("--decay_epoch",
                        type=int,
                        default=20,
                        help="number of iterations")
    parser.add_argument("--test",
                        type=bool,
                        default=False,
                        help="enable tesing")
    parser.add_argument("--train_test",
                        type=bool,
                        default=True,
                        help="enable testing")
    parser.add_argument("-show",
                        type=bool,
                        default=True,
                        help="print progress")
    parser.add_argument("--init_std",
                        type=float,
                        default=0.1,
                        help="weight initial std")
    parser.add_argument("--init_lr",
                        type=float,
                        default=0.01,
                        help="initial learning rate")
    parser.add_argument("--lr_decay",
                        type=float,
                        default=0.75,
                        help="learning rate decay")
    parser.add_argument(
        "--final_lr",
        type=float,
        default=1e-5,
        help=
        "learning rate will not decrease after hitting the threshold final_lr")
    parser.add_argument("--momentum",
                        type=float,
                        default=0.9,
                        help="momentum rate")
    parser.add_argument("--maxgradnorm",
                        type=float,
                        default=50.0,
                        help="maximum gradient norm")
    parser.add_argument("--final_fc_dim",
                        type=float,
                        default=50,
                        help="hidden state dim for final fc layer")

    dataset = "assist2009_updated"

    if dataset == "assist2009_updated":
        parser.add_argument("--q_embed_dim",
                            type=int,
                            default=50,
                            help="question embedding dimensions")
        parser.add_argument("--batch_size",
                            type=int,
                            default=32,
                            help="the batch size")
        parser.add_argument("--qa_embed_dim",
                            type=int,
                            default=200,
                            help="answer and question embedding dimensions")
        parser.add_argument("--memory_size",
                            type=int,
                            default=20,
                            help="memory_size")
        parser.add_argument(
            "--n_question",
            type=int,
            default=110,
            help="the number of unique questions in the database")
        parser.add_argument("--seqlen",
                            type=int,
                            default=200,
                            help="the allowed maximum length of a seqence")
        parser.add_argument("--data_dir",
                            type=str,
                            default="./data/assist2009_updated")
        parser.add_argument("--data_name",
                            type=str,
                            default="assist2009_updated")
        parser.add_argument("--load",
                            type=str,
                            default="assist2009_updated",
                            help="model file to load")
        parser.add_argument("--save",
                            type=str,
                            default="assist2009_updated",
                            help="path to save model")

    params = parser.parse_args()
    params.lr = params.init_lr
    params.memory_key_state_dim = params.q_embed_dim
    params.memory_value_state_dim = params.qa_embed_dim

    print(params)

    dat = Data(params.n_question, params.seqlen, ",")
    train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv"
    valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv"
    test_data_path = params.data.dir + "/" + params.data_name + "_test.csv"
    train_q_data, train_qa_data = dat.load_data(train_data_path)
    valid_q_data, valid_qa_data = dat.load_data(valid_data_path)
    test_q_data, test_qa_data = dat.load_data(test_data_path)

    model = MODEL(params.n_question, params.batch_size, params.q_embed_dim,
                  params.qa_embed_dim, params.memory_size,
                  params.memory_key_state_dim, params.memory_value_state_dim,
                  params.final_fc_dim)

    model.init_embedding()
    model.init_params()

    optimizer = optim.Adam(params=model.parameters(),
                           lr=params.lr,
                           betas=(0.9, 0.9))

    if params.gpu >= 0:
        print("device: " + str(params.gpu))
        torch.cuda.set_device(params.gpu)
        model.cuda()

    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    for idx in range(params.max_iter):
        train_loss, train_accuracy, train_auc = train(idx, model, params,
                                                      optimizer, train_q_data,
                                                      train_qa_data)
        print(f"Epoch {idx + 1}/{params.max_iter}, loss: ")
        valid_loss, valid_accuracy, valid_auc = test(model, params,
                                                     valid_q_data,
                                                     valid_qa_data)

        all_train_auc[idx + 1] = train_auc
        all_train_accuracy[idx + 1] = train_accuracy
        all_train_loss[idx + 1] = train_loss
        all_valid_auc[idx + 1] = valid_auc
        all_valid_accuracy[idx + 1] = valid_accuracy
        all_valid_loss[idx + 1] = valid_loss

        if valid_auc > best_valid_auc:
            best_valid_auc = valid_auc

    print(f"best_auc: ${best_valid_auc}")
Ejemplo n.º 14
0
# Rescale data
data.rescale()

# Visualize rescaled data
data.show()

# %% [markdown]
# ## 3. Define the model architecture and learn, validation and test processes.

from model import MODEL

# %% [markdown]
# ## 4. Create the model with hyperparameters and start start training process.

# Instantiate the model
network = \
    MODEL(input_size=12, output_size=4, hidden_size=32, num_layers=2, p=0.3)
print(network)

# Execute learning process
model_name = \
    network.learn(data=data, batch_size=16, sequence_length=16, lr=0.01, \
        num_epochs=100, validate_every=1, max_norm=4)

#%% [markdown]
# ## 5. Read best model from the file.

with open(model_name, 'rb') as f:

    # Load checkpoint
    checkpoint = torch.load(f)
Ejemplo n.º 15
0
def SD(param, model: MODEL, init_random=False):
    """ Single spin flip stochastic descent
    """

    n_step = param['n_step']
    n_fid_eval = 1
    n_visit = 1

    if init_random:
        # Random initialization
        model.update_protocol(
            np.random.randint(0, model.n_h_field, size=n_step))
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())
    else:
        # So user can feed in data say from a specific protocol
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())

    random_position = np.arange(n_step, dtype=int)
    fid_series = [old_fid]

    while True:

        np.random.shuffle(random_position)
        local_minima_reached = True  # trick

        for t in random_position:

            model.update_hx(t,
                            model.protocol_hx(t) ^ 1)  # assumes binary fields
            new_fid = model.compute_fidelity()
            n_fid_eval += 1
            fid_series.append(old_fid)

            if new_fid > old_fid:  # accept descent
                old_fid = new_fid
                n_visit += 1
                local_minima_reached = False
                break
            else:
                model.update_hx(t,
                                model.protocol_hx(t)
                                ^ 1)  # assumes binary fields

        if local_minima_reached:
            break

    return old_fid, np.copy(model.protocol()), n_fid_eval, n_visit, fid_series
Ejemplo n.º 16
0
def SA(param, model: MODEL):

    Ti = param['Ti']
    n_quench = param['n_quench']
    if n_quench == 0:
        return
    n_step = param['n_step']

    # initial random protocol
    model.update_protocol(np.random.randint(0, model.n_h_field, size=n_step))
    old_fid = model.compute_fidelity()
    best_fid = old_fid
    best_protocol = np.copy(model.protocol())

    T = Ti
    step = 0
    while T > 1e-12:
        beta = 1. / T

        #  --- ---> single spin flip update <--- ---
        random_time = np.random.randint(0, n_step)
        current_hx = model.protocol_hx(random_time)
        model.update_hx(random_time, model.random_flip(random_time))
        #  --- --- --- --- --- --- --- --- --- ---

        new_fid = model.compute_fidelity()

        if new_fid > best_fid:
            best_fid = new_fid
            best_protocol = np.copy(
                model.protocol())  # makes an independent copy !

        d_fid = new_fid - old_fid

        if d_fid > 0.:  # accept move
            old_fid = new_fid
        elif np.exp(beta * d_fid) > np.random.uniform():  # accept move
            old_fid = new_fid
        else:  # reject move
            model.update_hx(random_time, current_hx)

        step += 1
        T = Ti * (1.0 - step / n_quench)

    return best_fid, best_protocol, n_quench
Ejemplo n.º 17
0
def SD_2SF(param, model: MODEL, init_random=False):
    """ 2SF + 1 SF stochastic descent: all possible 2 spin-flip and
    1 spin-flip moves are considered. Algorithm halts when all moves will decrease fidelity
    """

    if model.n_h_field > 2:
        assert False, 'This works only for bang-bang protocols'

    n_step = param['n_step']
    n_fid_eval = 0
    n_visit = 1

    if init_random:
        # Random initialization
        model.update_protocol(
            np.random.randint(0, model.n_h_field, size=n_step))
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())
    else:
        # So user can feed in data say from a specific protocol
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())

    x1_ar, x2_ar = np.triu_indices(n_step, 1)

    n_2F_step = x1_ar.shape[0]  # number of possible 2-flip updates
    order2F = np.arange(
        0, n_2F_step,
        dtype=np.int)  # ordering sequenc // to be randomly shuffled

    n_1F_step = n_step
    order1F = np.arange(
        0, n_1F_step,
        dtype=np.int)  # ordering sequenc // to be randomly shuffled

    order1F_vs_2F = 2 * np.ones(n_2F_step + n_1F_step, dtype=np.int)
    order1F_vs_2F[:n_1F_step] = 1

    ############################
    #########################
    while True:  # careful with this. For binary actions, this is guaranteed to break

        np.random.shuffle(order1F)
        np.random.shuffle(order2F)
        np.random.shuffle(order1F_vs_2F)
        idx_1F = 0
        idx_2F = 0

        local_minima_reached = True

        for update_type in order1F_vs_2F:

            if update_type == 1:
                # perform 1 SF update
                t = order1F[idx_1F]
                model.update_hx(t,
                                model.protocol_hx(t)
                                ^ 1)  # assumes binary fields
                new_fid = model.compute_fidelity()
                n_fid_eval += 1
                idx_1F += 1

                if new_fid > old_fid:  # accept descent
                    #print("%.15f"%new_fid,'\t',n_fid_eval)
                    n_visit += 1
                    old_fid = new_fid
                    local_minima_reached = False  # will exit for loop before it ends ... local update accepted
                    break
                else:
                    model.update_hx(t, model.protocol_hx(t) ^ 1)
            else:
                # perform 2 SF update
                o2F = order2F[idx_2F]
                t1, t2 = x1_ar[o2F], x2_ar[o2F]
                model.update_hx(t1,
                                model.protocol_hx(t1)
                                ^ 1)  # assumes binary fields
                model.update_hx(t2, model.protocol_hx(t2) ^ 1)
                new_fid = model.compute_fidelity()
                n_fid_eval += 1
                idx_2F += 1

                if new_fid > old_fid:  # accept descent
                    print("%.15f" % new_fid, '\t', n_fid_eval)
                    n_visit += 1
                    old_fid = new_fid
                    local_minima_reached = False  # will exit for loop before it ends ... local update accepted
                    break
                else:
                    model.update_hx(t1,
                                    model.protocol_hx(t1)
                                    ^ 1)  # assumes binary fields
                    model.update_hx(t2, model.protocol_hx(t2) ^ 1)

        if local_minima_reached:
            break

    return old_fid, np.copy(model.protocol()), n_fid_eval, n_visit
Ejemplo n.º 18
0
def SD(param, model: MODEL, init=False):

    n_step = param['n_step']
    n_fid_eval = 0

    if init:
        # Random initialization
        model.update_protocol(
            np.random.randint(0, model.n_h_field, size=n_step))
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())
    else:
        # So user can feed in data say from a specific protocol
        old_fid = model.compute_fidelity()
        best_protocol = np.copy(model.protocol())

    while True:  # careful with this. For binary actions, this is guaranteed to break

        random_position = np.arange(n_step, dtype=int)
        np.random.shuffle(random_position)

        local_minima = True
        for t in random_position:
            model.update_hx(t, model.random_flip(t))
            new_fid = model.compute_fidelity()
            n_fid_eval += 1

            if new_fid > old_fid:  # accept descent
                old_fid = new_fid
                best_protocol = np.copy(model.protocol())
                local_minima = False  # will exit for loop before it ends ... local update accepted
                break
            else:
                model.update_hx(t, model.random_flip(t))

        if local_minima:
            break

    return old_fid, best_protocol, n_fid_eval
Ejemplo n.º 19
0
    return check_args(parser.parse_args())


def check_args(args):
    # creates saving directory if necessary
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # makes sure batch_size and epoch are positive
    if args.epoch < 1:
        raise Exception('Number of epochs must be larger than or equal to one')

    if args.batch_size < 1:
        raise Exception('Batch size must be larger than or equal to one')

    return args


if __name__ == '__main__':
    # parse arguments
    args = parse_args()

    # sets random seeds
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    # instanciates the model
    model = MODEL(args)

    # trains the model
    train(model)
Ejemplo n.º 20
0
def Gibbs_Sampling(param, model: MODEL):
    # should also measure acceptance rate

    Ti = param['Ti']
    beta = 1. / Ti
    n_step = param['n_step']
    n_equilibrate = 10000
    n_auto_correlate = n_step * 10  # should look at auto-correlation time !

    # initial random protocol
    model.update_protocol(np.random.randint(0, model.n_h_field, size=n_step))
    old_fid = model.compute_fidelity()
    best_fid = old_fid

    for i in range(n_equilibrate):

        random_time = np.random.randint(0, n_step)
        current_hx = model.protocol_hx(random_time)
        model.update_hx(random_time, model.random_flip(random_time))

        new_fid = model.compute_fidelity()

        d_fid = new_fid - old_fid

        if d_fid > 0.:  # accept move
            old_fid = new_fid
        elif np.exp(beta * d_fid) > np.random.uniform():  # accept move
            old_fid = new_fid
        else:  # reject move
            model.update_hx(random_time, current_hx)

    samples = []
    fid_samples = []
    energy_samples = []

    for i in range(n_sample):

        for j in range(n_auto_correlate):
            random_time = np.random.randint(0, n_step)
            current_hx = model.protocol_hx(random_time)
            model.update_hx(random_time, model.random_flip(random_time))

            new_fid = model.compute_fidelity()

            d_fid = new_fid - old_fid

            if d_fid > 0.:  # accept move
                old_fid = new_fid
            elif np.exp(beta * d_fid) > np.random.uniform():  # accept move
                old_fid = new_fid
            else:  # reject move
                model.update_hx(random_time, current_hx)

        samples.append(np.copy(model.protocol()))
        fid_samples.append(model.compute_fidelity())
        energy_samples.append(model.compute_energy())

    return samples, fid_samples, energy_samples
Ejemplo n.º 21
0
 def __init__(self):
     self.db = MODEL.ChangeInfoUser()
Ejemplo n.º 22
0
def run_SA(parameters, model: MODEL, utils, root, save=True):

    if parameters['verbose'] == 0:
        blockPrint()

    outfile = utils.make_file_name(parameters, root=root)
    n_exist_sample, all_result = utils.read_current_results(outfile)
    n_sample = parameters['n_sample']

    if parameters['Ti'] < 0.:
        parameters['Ti'] = compute_initial_Ti(parameters, model, n_sample=1000)
        print("Initial temperature Ti=%.3f" % parameters['Ti'])

    if n_exist_sample >= n_sample:
        print(
            "\n\n-----------> Samples already computed in file -- terminating ... <-----------"
        )
        return
        all_result

    print("\n\n-----------> Starting simulated annealing <-----------")

    n_iteration_left = n_sample - n_exist_sample  # data should be saved 10 times --> no more (otherwise things are way too slow !)
    n_mod = max([1, n_iteration_left // 10])

    for it in range(n_iteration_left):

        start_time = time.time()
        best_fid, best_protocol, n_fid_eval = SA(
            parameters, model)  # -- --> performing annealing here <-- --

        if parameters['task'] == 'SASD':
            print(' -> Stochastic descent ... ')
            model.update_protocol(best_protocol)
            best_fid, best_protocol, n_fid_eval_SD = SD(parameters,
                                                        model,
                                                        init=False)
            n_fid_eval += n_fid_eval_SD

        energy = model.compute_energy(protocol=best_protocol)

        result = [n_fid_eval, best_fid, energy, best_protocol]

        print("\n----------> RESULT FOR ANNEALING NO %i <-------------" %
              (it + 1))
        print("Number of fidelity eval \t%i" % n_fid_eval)
        print("Best fidelity \t\t\t%.4f" % best_fid)
        print("Best hx_protocol\t\t", list(best_protocol))

        all_result.append(result)
        if save and it % n_mod == 0:
            with open(outfile, 'wb') as f:
                pickle.dump([parameters, all_result], f)
                f.close()
            print("Saved iteration --> %i to %s" %
                  (it + n_exist_sample, outfile))
        print("Iteration run time --> %.4f s" % (time.time() - start_time))

    print("\n Thank you and goodbye !")
    enablePrint()

    if save:
        with open(outfile, 'wb') as f:
            pickle.dump([parameters, all_result], f)
            f.close()
    return all_result
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--max_iter',
                        type=int,
                        default=300,
                        help='number of iterations')
    parser.add_argument('--decay_epoch',
                        type=int,
                        default=20,
                        help='number of iterations')
    parser.add_argument('--test',
                        type=bool,
                        default=False,
                        help='enable testing')
    parser.add_argument('--train_test',
                        type=bool,
                        default=True,
                        help='enable testing')
    parser.add_argument('--show',
                        type=bool,
                        default=True,
                        help='print progress')
    parser.add_argument('--init_std',
                        type=float,
                        default=0.1,
                        help='weight initialization std')
    parser.add_argument('--init_lr',
                        type=float,
                        default=0.001,
                        help='initial learning rate')
    parser.add_argument('--lr_decay',
                        type=float,
                        default=0.75,
                        help='learning rate decay')
    parser.add_argument(
        '--final_lr',
        type=float,
        default=1E-5,
        help='learning rate will not decrease after hitting this threshold')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='momentum rate')
    parser.add_argument('--max_grad_norm',
                        type=float,
                        default=3.0,
                        help='maximum gradient norm')
    parser.add_argument('--hidden_dim',
                        type=int,
                        default=200,
                        help='hidden layer dimension')
    parser.add_argument('--n_hidden',
                        type=int,
                        default=2,
                        help='hidden numbers')
    parser.add_argument('--dataset', type=str, default='assist2017')

    if parser.parse_args().dataset == 'assist2009_updated':
        parser.add_argument('--batch_size',
                            type=int,
                            default=100,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument(
            '--n_question',
            type=int,
            default=110,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/assist2009_updated',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='assist2009_updated',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='assist2009_updated',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='assist2009_updated',
                            help='path to save model')

    elif parser.parse_args().dataset == 'assist2015':
        parser.add_argument('--batch_size',
                            type=int,
                            default=100,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument(
            '--n_question',
            type=int,
            default=100,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/assist2015',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='assist2015',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='assist2015',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='assist2015',
                            help='path to save model')

    elif parser.parse_args().dataset == 'assist2017':
        parser.add_argument('--batch_size',
                            type=int,
                            default=32,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument(
            '--n_question',
            type=int,
            default=102,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='../dataset/assist2017/train_valid_test',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='assist2017',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='assist2017',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='assist2017',
                            help='path to save model')

    elif parser.parse_args().dataset == 'STATICS':
        parser.add_argument('--batch_size',
                            type=int,
                            default=100,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument(
            '--n_question',
            type=int,
            default=1223,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/STATICS',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='STATICS',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='STATICS',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='STATICS',
                            help='path to save model')

    elif parser.parse_args().dataset == 'synthetic':
        parser.add_argument('--batch_size',
                            type=int,
                            default=100,
                            help='the batch size')
        parser.add_argument('--qa_embed_dim',
                            type=int,
                            default=200,
                            help='answer and question embedding dimensions')
        parser.add_argument(
            '--n_question',
            type=int,
            default=50,
            help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen',
                            type=int,
                            default=200,
                            help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir',
                            type=str,
                            default='./data/synthetic',
                            help='data directory')
        parser.add_argument('--data_name',
                            type=str,
                            default='synthetic',
                            help='data set name')
        parser.add_argument('--load',
                            type=str,
                            default='synthetic',
                            help='model file to load')
        parser.add_argument('--save',
                            type=str,
                            default='synthetic',
                            help='path to save model')

    params = parser.parse_args()
    params.lr = params.init_lr

    print(params)

    dat = DATA(n_question=params.n_question,
               seqlen=params.seqlen,
               separate_char=',')
    if params.dataset != 'synthetic':
        train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv"
        valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv"
        test_data_path = params.data_dir + "/" + params.data_name + "_test.csv"
    else:
        train_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_train1.csv"
        valid_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_valid1.csv"
        test_data_path = params.data_dir + "/" + "naive_c5_q50_s4000_v0_test.csv"

    train_q_data, train_q_t_data, train_answer_data = dat.load_data(
        train_data_path)
    valid_q_data, valid_q_t_data, valid_answer_data = dat.load_data(
        valid_data_path)
    test_q_data, test_q_t_data, test_answer_data = dat.load_data(
        test_data_path)

    model = MODEL(n_question=params.n_question,
                  hidden_dim=params.hidden_dim,
                  x_embed_dim=params.qa_embed_dim,
                  hidden_layers=params.n_hidden,
                  gpu=params.gpu)

    model.init_embeddings()
    model.init_params()
    optimizer = optim.Adam(params=model.parameters(),
                           lr=params.lr,
                           betas=(0.9, 0.9))

    if params.gpu >= 0:
        print('device: ' + str(params.gpu))
        torch.cuda.set_device(params.gpu)
        model.cuda()

    # all_train_loss = {}
    # all_train_accuracy = {}
    # all_train_auc = {}
    # all_valid_loss = {}
    # all_valid_accuracy = {}
    # all_valid_auc = {}
    best_valid_auc = 0

    for idx in range(params.max_iter):
        train_loss, train_accuracy, train_auc = train(model, params, optimizer,
                                                      train_q_data,
                                                      train_q_t_data,
                                                      train_answer_data)
        print(
            'Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' %
            (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy))
        valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer,
                                                     valid_q_data,
                                                     valid_q_t_data,
                                                     valid_answer_data)
        print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' %
              (idx + 1, params.max_iter, valid_auc, valid_accuracy))
        test_loss, test_accuracy, test_auc = test(model, params, optimizer,
                                                  test_q_data, test_q_t_data,
                                                  test_answer_data)
        print('Epoch %d/%d, test auc : %3.5f, test accuracy : %3.5f' %
              (idx + 1, params.max_iter, test_auc, test_accuracy))

        # all_train_auc[idx + 1] = train_auc
        # all_train_accuracy[idx + 1] = train_accuracy
        # all_train_loss[idx + 1] = train_loss
        # all_valid_loss[idx + 1] = valid_loss
        # all_valid_accuracy[idx + 1] = valid_accuracy
        # all_valid_auc[idx + 1] = valid_auc

        if valid_auc > best_valid_auc:
            print('%3.4f to %3.4f' % (best_valid_auc, valid_auc))
            best_valid_auc = valid_auc
Ejemplo n.º 24
0
print('Q Sentence Len    :', max_len[1])
print('Ans Sentence Len  :', max_len[2])
print('CNN Filter Size   :', CNN_FILTER_SIZE)
print('CNN Filter Size2   :', CNN_FILTER_SIZE2)
print('CNN Filter Num    :', CNN_FILTER_NUM)
print('DNN Output Size   :', DNN_WIDTH, '->', 1)
print('###############################################################', '\n')

with open('output_data/question/qa.train.json') as data_file:
    train_q = json.load(data_file)
with open('output_data/question/qa.val.json') as data_file:
    val_q = json.load(data_file)

start = time.time()

acm_net = MODEL.MODEL(BATCHSIZE, X_DIMENSION, DNN_WIDTH, CNN_FILTER_SIZE,
                      CNN_FILTER_SIZE2, CNN_FILTER_NUM, CNN_FILTER_NUM2,
                      LEARNING_RATE, DROPOUT, choice, max_plot_num, max_len,
                      parameterPath)
acm_net.initialize()
accuracy = utility.train(train_q, val_q, plotFilePath, acm_net, EPOCH,
                         LEARNING_RATE, BATCHSIZE, DROPOUT, choice, max_len)
accuracy_train = np.array(accuracy['train'])
accuracy_val = np.array(accuracy['val'])

print('val: ', accuracy_val, np.amax(accuracy_val))
print('train: ', accuracy_train, np.amax(accuracy_train))

eval_time = time.time() - start
print('use time: ', eval_time)
Ejemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', type=int, default=0, help='the gpu will be used, e.g "0,1,2,3"')
    parser.add_argument('--max_iter', type=int, default=300, help='number of iterations')
    parser.add_argument('--decay_epoch', type=int, default=20, help='number of iterations')
    parser.add_argument('--test', type=bool, default=False, help='enable testing')
    parser.add_argument('--train_test', type=bool, default=True, help='enable testing')
    parser.add_argument('--show', type=bool, default=True, help='print progress')
    parser.add_argument('--init_std', type=float, default=0.1, help='weight initialization std')
    parser.add_argument('--init_lr', type=float, default=0.01, help='initial learning rate')
    parser.add_argument('--lr_decay', type=float, default=0.75, help='learning rate decay')
    parser.add_argument('--final_lr', type=float, default=1E-5,
                        help='learning rate will not decrease after hitting this threshold')
    parser.add_argument('--momentum', type=float, default=0.9, help='momentum rate')
    parser.add_argument('--maxgradnorm', type=float, default=50.0, help='maximum gradient norm')
    parser.add_argument('--final_fc_dim', type=float, default=50, help='hidden state dim for final fc layer')

    dataset = 'assist2009_updated'

    if dataset == 'assist2009_updated':
        parser.add_argument('--q_embed_dim', type=int, default=50, help='question embedding dimensions')
        parser.add_argument('--batch_size', type=int, default=32, help='the batch size')
        parser.add_argument('--qa_embed_dim', type=int, default=200, help='answer and question embedding dimensions')
        parser.add_argument('--memory_size', type=int, default=20, help='memory size')
        parser.add_argument('--n_question', type=int, default=110, help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen', type=int, default=200, help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir', type=str, default='./data/assist2009_updated', help='data directory')
        parser.add_argument('--data_name', type=str, default='assist2009_updated', help='data set name')
        parser.add_argument('--load', type=str, default='assist2009_updated', help='model file to load')
        parser.add_argument('--save', type=str, default='assist2009_updated', help='path to save model')

    elif dataset == 'STATICS':
        parser.add_argument('--batch_size', type=int, default=10, help='the batch size')
        parser.add_argument('--q_embed_dim', type=int, default=50, help='question embedding dimensions')
        parser.add_argument('--qa_embed_dim', type=int, default=100, help='answer and question embedding dimensions')
        parser.add_argument('--memory_size', type=int, default=50, help='memory size')
        parser.add_argument('--n_question', type=int, default=1223, help='the number of unique questions in the dataset')
        parser.add_argument('--seqlen', type=int, default=6, help='the allowed maximum length of a sequence')
        parser.add_argument('--data_dir', type=str, default='./data/STATICS', help='data directory')
        parser.add_argument('--data_name', type=str, default='STATICS', help='data set name')
        parser.add_argument('--load', type=str, default='STATICS', help='model file to load')
        parser.add_argument('--save', type=str, default='STATICS', help='path to save model')



    params = parser.parse_args()
    params.lr = params.init_lr
    params.memory_key_state_dim = params.q_embed_dim
    params.memory_value_state_dim = params.qa_embed_dim

    print(params)

    dat = DATA(n_question=params.n_question, seqlen=params.seqlen, separate_char=',')
    # train_data_path = params.data_dir + "/" + "test5.1.txt"
    train_data_path = params.data_dir + "/" + params.data_name + "_train1.csv"
    valid_data_path = params.data_dir + "/" + params.data_name + "_valid1.csv"
    test_data_path = params.data_dir + "/" + params.data_name + "_test.csv"
    train_q_data, train_qa_data = dat.load_data(train_data_path)
    valid_q_data, valid_qa_data = dat.load_data(valid_data_path)
    test_q_data, test_qa_data = dat.load_data(test_data_path)

    params.memory_key_state_dim = params.q_embed_dim
    params.memory_value_state_dim = params.qa_embed_dim

    model = MODEL(n_question=params.n_question,
                  batch_size=params.batch_size,
                  q_embed_dim=params.q_embed_dim,
                  qa_embed_dim=params.qa_embed_dim,
                  memory_size=params.memory_size,
                  memory_key_state_dim=params.memory_key_state_dim,
                  memory_value_state_dim=params.memory_value_state_dim,
                  final_fc_dim=params.final_fc_dim)


    model.init_embeddings()
    model.init_params()
    # optimizer = optim.SGD(params=model.parameters(), lr=params.lr, momentum=params.momentum)
    optimizer = optim.Adam(params=model.parameters(), lr=params.lr, betas=(0.9, 0.9))

    if params.gpu >= 0:
        print('device: ' + str(params.gpu))
        torch.cuda.set_device(params.gpu)
        model.cuda()

    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    # shuffle_index = np.random.permutation(train_q_data.shape[0])
    # q_data_shuffled = train_q_data[shuffle_index]
    # qa_data_shuffled = train_qa_data[shuffle_index]

    for idx in range(params.max_iter):
        train_loss, train_accuracy, train_auc = train(idx, model, params, optimizer, train_q_data, train_qa_data)
        print('Epoch %d/%d, loss : %3.5f, auc : %3.5f, accuracy : %3.5f' % (idx + 1, params.max_iter, train_loss, train_auc, train_accuracy))
        valid_loss, valid_accuracy, valid_auc = test(model, params, optimizer, valid_q_data, valid_qa_data)
        print('Epoch %d/%d, valid auc : %3.5f, valid accuracy : %3.5f' % (idx + 1, params.max_iter, valid_auc, valid_accuracy))


        all_train_auc[idx + 1] = train_auc
        all_train_accuracy[idx + 1] = train_accuracy
        all_train_loss[idx + 1] = train_loss
        all_valid_loss[idx + 1] = valid_loss
        all_valid_accuracy[idx + 1] = valid_accuracy
        all_valid_auc[idx + 1] = valid_auc
        #
        # output the epoch with the best validation auc
        if valid_auc > best_valid_auc:
            print('%3.4f to %3.4f' % (best_valid_auc, valid_auc))
            best_valid_auc = valid_auc
Ejemplo n.º 26
0
def train_one_dataset(params, file_name, train_q_data, train_qa_data,
                      valid_q_data, valid_qa_data):
    ### ================================== model initialization ==================================
    g_model = MODEL(n_question=params.n_question,
                    seqlen=params.seqlen,
                    batch_size=params.batch_size,
                    q_embed_dim=params.q_embed_dim,
                    qa_embed_dim=params.qa_embed_dim,
                    memory_size=params.memory_size,
                    memory_key_state_dim=params.memory_key_state_dim,
                    memory_value_state_dim=params.memory_value_state_dim,
                    final_fc_dim=params.final_fc_dim)
    # create a module by given a Symbol
    net = mx.mod.Module(symbol=g_model.sym_gen(),
                        data_names=['q_data', 'qa_data'],
                        label_names=['target'],
                        context=params.ctx)
    # create memory by given input shapes
    net.bind(data_shapes=[
        mx.io.DataDesc(name='q_data',
                       shape=(params.seqlen, params.batch_size),
                       layout='SN'),
        mx.io.DataDesc(name='qa_data',
                       shape=(params.seqlen, params.batch_size),
                       layout='SN')
    ],
             label_shapes=[
                 mx.io.DataDesc(name='target',
                                shape=(params.seqlen, params.batch_size),
                                layout='SN')
             ])
    # initial parameters with the default random initializer
    net.init_params(initializer=mx.init.Normal(sigma=params.init_std))
    # decay learning rate in the lr_scheduler
    lr_scheduler = mx.lr_scheduler.FactorScheduler(
        step=20 * (train_q_data.shape[0] / params.batch_size),
        factor=0.667,
        stop_factor_lr=1e-5)

    net.init_optimizer(optimizer='sgd',
                       optimizer_params={
                           'learning_rate': params.lr,
                           'momentum': params.momentum,
                           'lr_scheduler': lr_scheduler
                       })

    for parameters in net.get_params()[0]:
        print(parameters, net.get_params()[0][parameters].asnumpy().shape)
    print("\n")

    ### ================================== start training ==================================
    all_train_loss = {}
    all_train_accuracy = {}
    all_train_auc = {}
    all_valid_loss = {}
    all_valid_accuracy = {}
    all_valid_auc = {}
    best_valid_auc = 0

    for idx in range(params.max_iter):
        train_loss, train_accuracy, train_auc = train(net,
                                                      params,
                                                      train_q_data,
                                                      train_qa_data,
                                                      label='Train')
        valid_loss, valid_accuracy, valid_auc = test(net,
                                                     params,
                                                     valid_q_data,
                                                     valid_qa_data,
                                                     label='Valid')

        print('epoch', idx + 1)
        print("valid_auc\t", valid_auc, "\ttrain_auc\t", train_auc)
        print("valid_accuracy\t", valid_accuracy, "\ttrain_accuracy\t",
              train_accuracy)
        print("valid_loss\t", valid_loss, "\ttrain_loss\t", train_loss)

        if not os.path.isdir('model'):
            os.makedirs('model')
        if not os.path.isdir(os.path.join('model', params.save)):
            os.makedirs(os.path.join('model', params.save))

        all_valid_auc[idx + 1] = valid_auc
        all_train_auc[idx + 1] = train_auc
        all_valid_loss[idx + 1] = valid_loss
        all_train_loss[idx + 1] = train_loss
        all_valid_accuracy[idx + 1] = valid_accuracy
        all_train_accuracy[idx + 1] = train_accuracy

        # output the epoch with the best validation auc
        if valid_auc > best_valid_auc:
            best_valid_auc = valid_auc
            best_epoch = idx + 1
            # here the epoch is default, set to be 100
            # we only save the model in the epoch with the better results
            net.save_checkpoint(prefix=os.path.join('model', params.save,
                                                    file_name),
                                epoch=100)

    if not os.path.isdir('result'):
        os.makedirs('result')
    if not os.path.isdir(os.path.join('result', params.save)):
        os.makedirs(os.path.join('result', params.save))
    f_save_log = open(os.path.join('result', params.save, file_name), 'w')
    f_save_log.write("valid_auc:\n" + str(all_valid_auc) + "\n\n")
    f_save_log.write("train_auc:\n" + str(all_train_auc) + "\n\n")
    f_save_log.write("valid_loss:\n" + str(all_valid_loss) + "\n\n")
    f_save_log.write("train_loss:\n" + str(all_train_loss) + "\n\n")
    f_save_log.write("valid_accuracy:\n" + str(all_valid_accuracy) + "\n\n")
    f_save_log.write("train_accuracy:\n" + str(all_train_accuracy) + "\n\n")
    f_save_log.close()
    return best_epoch
Ejemplo n.º 27
0
def main():
    # Utility object for reading, writing parameters, etc.
    utils = UTILS()

    # Reading parameters from para.dat file
    parameters = utils.read_parameter_file()

    # Command line specified parameters overide parameter file values
    utils.read_command_line_arg(parameters, sys.argv)

    # Printing parameters for user
    utils.print_parameters(parameters)

    # Defining Hamiltonian
    H = HAMILTONIAN(**parameters)

    # Defines the model, and precomputes evolution matrices given set of states
    model = MODEL(H, parameters)

    #n_step = parameters['n_step']
    #X,y=sample_m0(10000,n_step,model)
    #print(y[0:10])
    #plt.hist(y,bins=20)
    #plt.show()

    rob_vs_T = {}
    n_eval = {}
    fid = {}
    res = {}
    visit = {}
    T_list = np.arange(0.05, 10.001, 0.05)
    n_step_list = [40, 50, 60, 70, 80, 90, 100, 110]

    for T in T_list:
        for n_step in n_step_list:  #[40,50,60,70,80,90,100,110,120]:

            ##for T in np.arange(0.025,10.001,0.025):
            #    for n_step in [100,200,400] :
            parameters['T'] = T
            parameters['n_step'] = n_step
            parameters['dt'] = T / n_step

            file = utils.make_file_name(parameters, root='data/')
            res = parse_data(file)
            n_eval[(n_step, hash(T))] = res['n_fid']
            fid[(n_step, hash(T))] = res['F']
            visit[(n_step, hash(T))] = res['n_visit']
        '''    with open(file,'rb') as f:
                _, data = pickle.load(f)
                n_elem = len(data)
                n_eval[(n_step,hash(T))]=[]
                n_fid[(n_step,hash(T))]=[]
                for elem in data:
                    n_eval[(n_step,hash(T))].append(elem[0])
                    n_fid[(n_step,hash(T))].append(elem[1])'''

    #print(n_eval)
    #exit()
    n_eval_mean = {}
    fid_mean = {}
    visit_mean = {}
    #print(visit[(40,115292150460684704)])
    #exit()
    for n_step in n_step_list:
        n_eval_mean[n_step] = []
        fid_mean[n_step] = []
        visit_mean[n_step] = []
        for T in T_list:
            hT = hash(T)
            n_eval_mean[n_step].append(
                [T, np.mean(n_eval[(n_step, hT)]) / (n_step * n_step)])
            fid_mean[n_step].append([T, np.mean(fid[(n_step, hT)])])
            visit_mean[n_step].append(
                [T, np.mean(visit[(n_step, hT)]) / (n_step)])

    c_list = [
        '#d53e4f', '#f46d43', '#fdae61', '#fee08b', '#e6f598', '#abdda4',
        '#66c2a5', '#3288bd'
    ]
    for i, n_step in enumerate(n_step_list):
        x = np.array(n_eval_mean[n_step])
        plt.plot(x[:, 0], x[:, 1], c='black', zorder=0)
        plt.scatter(x[:, 0],
                    x[:, 1],
                    c=c_list[i],
                    marker='o',
                    s=5,
                    label='$N=%i$' % n_step,
                    zorder=1)

    plt.title('Number of fidelity evaluations vs. ramp time \n for 2 flip')
    plt.ylabel('$N_{eval}/N^2$')
    plt.xlabel('$T$')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()

    for i, n_step in enumerate(n_step_list):
        x = np.array(visit_mean[n_step])
        plt.plot(x[:, 0], x[:, 1], c='black', zorder=0)
        plt.scatter(x[:, 0],
                    x[:, 1],
                    c=c_list[i],
                    marker='o',
                    s=5,
                    label='$N=%i$' % n_step,
                    zorder=1)

    plt.title('Number of visited states vs. ramp time \n for 2 flip')
    plt.ylabel('$N_{visit}/N$')
    plt.xlabel('$T$')
    plt.legend(loc='best')
    plt.tight_layout()
    plt.show()





    '''