예제 #1
0
파일: main.py 프로젝트: siddk/memory
def run_task(data_directory, task_id):
    """
    Parse data, build model, and run training and testing for a single task.

    :param data_directory: Path to train and test data.
    :param task_id: Task to evaluate
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id))

    dictionary = {"nil": 0}

    # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES)
    # Questions shape: (14 (see parser.py), NUM_SAMPLES)
    # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES)
    train_story, train_questions, train_qstory = parse_babi_task(train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory, model, loss,
                           general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss, general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss, general_config)
예제 #2
0
파일: infer.py 프로젝트: Oneflow-Inc/models
def main(args):

    start_t = time.time()
    model = build_model(args)
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    model.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    model.eval()
    model.to("cuda")

    start_t = time.time()
    image = load_image(args.image_path,
                       image_size=(args.image_size, args.image_size))
    image = flow.Tensor(image, device=flow.device("cuda"))
    predictions = model(image).softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
예제 #3
0
def train(train_cfg):
    with open(train_cfg,"r") as f:
        train_cfg = json.load(f)

    #UserDataApi = UserDataApi()
    feature_cfg = train_cfg["feature_cfg"]
    model_name = train_cfg["model"]
    model_cfg = train_cfg["model_cfg"]

    #stock_list = jq.get_index_stocks('000300.XSHG')#jq.get_industry_stocks('I64')
    stock_list = jq.get_all_securities().index.tolist()
    #stock_list= ['300014.XSHE']
    print (stock_list)
    feature_cfg = "./config/feature_create_cfg.json"
    with open(feature_cfg,"r") as f:
        feature_cfg = json.load(f)

    info = create_feature(feature_cfg,stock_list,if_dump = True,load_feature = False)
    feature = info["feature"]
    label = info["label"]
    label_index = 0

    clip_index = info["label_clip_index"][:,label_index]
    label = label[:,label_index] 
    feature = feature[clip_index]
    label = label[clip_index]
    
    model = build_model(model_name,model_cfg)
    model.fit(feature,label)
    model.dump()
예제 #4
0
    def train(self, sess, num_gpus):
        tf.set_random_seed(cfg.seed)
        set_log(cfg.job_dir)
        job_env = Prework(cfg)
        job_env.make_env()

        assert cfg.input_style == 0
        model, handlers = build_model(cfg, job_env, num_gpus)
        handle, train_iterator, valid_iterator, train_num, valid_num = handlers
        valid_loss_checker = checker(cfg)

        sess.run(tf.global_variables_initializer())
        saver = tf.train.Saver(tf.global_variables(), max_to_keep=5, name='train_saver')
        best_saver = tf.train.Saver(tf.global_variables(), name="best_saver")
        checkpoint_path = os.path.join(job_env.job_dir, 'model.ckpt')
        train_summary_writer = tf.summary.FileWriter(job_env.train_event_dir)
        valid_summary_writer = tf.summary.FileWriter(job_env.valid_event_dir)

        train_handle = sess.run(train_iterator.string_handle())

        if cfg.resume:
            load_path = load_model(cfg, job_env, saver, sess)
            logging.info("Loading the existing model: %s" % load_path)

        epoch, train_cursor = 0, 0
        group_size = num_gpus * cfg.batch_size
        train_num_batches = train_num // group_size
        valid_num_batches = valid_num // group_size
        while True:
            try:
                start_time = time.time()
                loss, _, i_global, i_merge = sess.run(
                    [model.loss, model.train_op, model.global_step, model.merged],
                    feed_dict={handle: train_handle,
                               model.learning_rate: valid_loss_checker.learning_rate,
                               model.fw_dropout_keep: cfg.fw_dropout_keep,
                               model.recur_dropout_keep: cfg.recur_dropout_keep})
                iter_time = time.time() - start_time
                if i_global % cfg.train_log_freq == 0:
                    report_train(epoch, i_global, train_cursor, train_num_batches,
                                 loss, iter_time, i_merge, train_summary_writer)
                if i_global % cfg.valid_freq == 0:
                    report_valid(sess, i_global, handle, valid_iterator, valid_num_batches,
                                 model, best_saver, valid_loss_checker, job_env, valid_summary_writer)
                    if valid_loss_checker.should_stop():
                        break
                if i_global % cfg.save_freq == 0:
                    saver.save(sess, checkpoint_path, global_step=i_global)
                train_cursor += 1
                if train_cursor == train_num_batches:
                    train_cursor = 0
                    epoch += 1
            except tf.errors.OutOfRangeError:
                break
        sess.close()
예제 #5
0
def main(args):
    start_t = time.time()
    model = build_model(args)
    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    start_t = time.time()
    pretrain_models = flow.load(args.model_path)
    model.load_state_dict(pretrain_models)
    end_t = time.time()
    print("load params time : {}".format(end_t - start_t))

    model.eval()
    model.to("cuda")

    class ViTEvalGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.model = model

        def build(self, image):
            with flow.no_grad():
                predictions = self.model(image)
            return predictions

    vit_eval_graph = ViTEvalGraph()

    start_t = time.time()
    image = load_image(args.image_path,
                       image_size=(args.image_size, args.image_size))
    image = flow.Tensor(image, device=flow.device("cuda"))
    predictions = vit_eval_graph(image).softmax()
    predictions = predictions.numpy()
    end_t = time.time()
    print("infer time : {}".format(end_t - start_t))
    clsidx = np.argmax(predictions)
    print("predict prob: %f, class name: %s" %
          (np.max(predictions), clsidx_2_labels[clsidx]))
예제 #6
0
def run_task(data_directory, task_id):
    """
    Parse data, build model, and run training and testing for a single task.

    :param data_directory: Path to train and test data.
    :param task_id: Task to evaluate
    """
    print("Train and test for task %d ..." % task_id)

    # Parse data
    train_files = glob.glob('%s/qa%d_*_train.txt' % (data_directory, task_id))
    test_files = glob.glob('%s/qa%d_*_test.txt' % (data_directory, task_id))

    dictionary = {"nil": 0}

    # Story shape: (SENTENCE_SIZE, STORY_SIZE, NUM_STORIES)
    # Questions shape: (14 (see parser.py), NUM_SAMPLES)
    # QStory shape: (SENTENCE_SIZE, NUM_SAMPLES)
    train_story, train_questions, train_qstory = parse_babi_task(
        train_files, dictionary, False)
    test_story, test_questions, test_qstory = parse_babi_task(
        test_files, dictionary, False)

    general_config = BabiConfig(train_story, train_questions, dictionary)

    memory, model, loss = build_model(general_config)

    if general_config.linear_start:
        train_linear_start(train_story, train_questions, train_qstory, memory,
                           model, loss, general_config)
    else:
        train(train_story, train_questions, train_qstory, memory, model, loss,
              general_config)

    test(test_story, test_questions, test_qstory, memory, model, loss,
         general_config)
예제 #7
0
def train(args, cfg):
    

    #加载数据
    train_loader, test_loader = build_data.build_data_loader(args, cfg)  
    print("------------load dataset success !---------------")

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    # net = VGG16_Finetuning.VGG16_Finetuning(cfg.DATASETS.NUM_CLASSES).to(device)
    net = build_model(args, cfg, device)
    print("-------------load model success !----------------")
    
    #学习算法的选择
    if "SGD" == cfg.TRAIN_SET.LR_POLOCY: 
        optimizer = optim.SGD(net.parameters(), lr=cfg.TRAIN_SET.BASE_LR, momentum=0.9)
    if "Adam" == cfg.Train_SET.LR_POLOCY:
        pass
    #optimizer = optim.Adam(net.parameters(), lr=cfg['lr'])

    schedule = torch.optim.lr_scheduler.MultiStepLR(optimizer,milestones=[50,70,100,200],gamma=0.1)

    criterion = nn.CrossEntropyLoss().to(device)
    
    if not os.path.exists(cfg.WORK_SAVE.MODEL_PATH):
        os.mkdir(cfg.WORK_SAVE.MODEL_PATH)
    if not os.path.exists(cfg.WORK_SAVE.MODEL_PATH + "/" + cfg.MODEL.NAME):
        os.mkdir(cfg.WORK_SAVE.MODEL_PATH + "/" + cfg.MODEL.NAME)

    #定义Summary_Writer
    writer = SummaryWriter(cfg.WORK_SAVE.MODEL_PATH + "/" + cfg.MODEL.NAME + "/logs")
    writer.add_graph(net, torch.rand([1,3,cfg.MODEL.INPUT_SIZE,cfg.MODEL.INPUT_SIZE]))

    start_epoch = 0
    if args.RESUME:
        
        checkpoint = torch.load(args.path_checkpoint)  # 断点路径

        net.load_state_dict(checkpoint['net']) # 加载断点

        optimizer.load_state_dict(checkpoint['optimizer'])  # 加载优化器参数
        start_epoch = checkpoint['epoch']  # 设置开始的epoch
        schedule.load_state_dict(checkpoint['schedule'])   #加载学习率的状态
    else:
        if cfg.MODEL.PRE_TRAIN:
            net.load_state_dict(torch.load(cfg.MODEL.PRETRAIN_MODEL))



    for epoch in range(start_epoch+1, args.epoch):
        running_loss = 0.0
        total = 0.0 
        correct = 0.0
        for i, data in enumerate(tqdm(train_loader)):
            img, label = data
            
            #input处理
            img = img.to(device)
            label = label.to(device)

            img, label = Variable(img), Variable(label)
            
            #推断、损失计算和反向传播
            optimizer.zero_grad()  #梯度清零
            outputs = net(img)     #inference
            loss = criterion(outputs, label)   #求解loss
            loss.backward()        #反向传播求解梯度
            optimizer.step()       #更新权重参数

            #计算准确率
            _, predicted = torch.max(outputs.data, 1)
            total += label.size(0)
            correct += torch.sum(predicted == label.data).cpu().numpy()
            
            running_loss += loss.item()

            if i % 100 == 99:

                loss_avg = running_loss / 100
                print(
                "Time: {:.19s} Training:Epoch[{:0>3}/{:0>3}] Iteration[{:0>3}/{:0>3}] Loss:{:.4f} Acc:{}"
                .format(
                    str(datetime.now()), epoch, args.epoch, i + 1,
                    len(train_loader), loss_avg, correct / total))
                
                
                #writer loss
                for i, (name, param) in enumerate(net.named_parameter()):
                    if 'bn' not in name:
                        writer.add_histogram(name, param, 0)
                        writer.add_scalar('loss', running_loss, i)
                        running_loss = running_loss * 0.5
                
                #feature map的可视化和卷积核的可视化
                
        
        schedule.step()
        
        

        #模型保存
        if (epoch + 1) % 50 == 0:
            print("epoch: ", epoch)
            print('learning rate: ', optimizer.state_dict()['param_groups'][0]['lr'])

            checkpoint = {
                "net": net.state_dict(),
                "optimizer":optimizer.state_dict(),
                "epoch": epoch,
                "schedule": schedule.state_dict()
            }
            torch.save(checkpoint, cfg.WORK_SAVE.MODEL_PATH + "/" + cfg.MODEL.NAME + '/' + str(epoch) + ".pth")
            
            #模型测试
            test(cfg.WORK_SAVE.MODEL_PATH + "/" + cfg.MODEL.NAME + '/' + str(epoch) + ".pth", cfg.DATASETS.VAL_TXT)
예제 #8
0
def main(args):
    # path setup
    training_results_path = os.path.join(args.results, args.tag)
    os.makedirs(training_results_path, exist_ok=True)

    # build dataloader
    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=9469,
        batch_size=args.train_batch_size,
        image_size=args.image_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=3925,
        batch_size=args.val_batch_size,
        image_size=args.image_size,
    )

    # oneflow init
    start_t = time.time()
    model = build_model(args)
    if args.load_checkpoint != "":
        print("load_checkpoint >>>>>>>>> ", args.load_checkpoint)
        model.load_state_dict(flow.load(args.load_checkpoint))

    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    of_cross_entropy = flow.nn.CrossEntropyLoss()

    model.to("cuda")
    of_cross_entropy.to("cuda")

    of_sgd = flow.optim.SGD(
        model.parameters(), lr=args.learning_rate, momentum=args.mom
    )

    class ViTNetGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.model = model
            self.cross_entropy = of_cross_entropy
            self.add_optimizer(of_sgd)
            self.train_data_loader = train_data_loader

        def build(self):
            image, label = self.train_data_loader()
            image = image.to("cuda")
            label = label.to("cuda")
            logits = self.model(image)
            loss = self.cross_entropy(logits, label)
            loss.backward()
            return loss

    vit_graph = ViTNetGraph()

    class ViTEvalGraph(flow.nn.Graph):
        def __init__(self):
            super().__init__()
            self.model = model
            self.val_data_loader = val_data_loader

        def build(self):
            image, label = self.val_data_loader()
            image = image.to("cuda")
            with flow.no_grad():
                logits = self.model(image)
                predictions = logits.softmax()
            return predictions, label

    vit_eval_graph = ViTEvalGraph()

    of_losses = []
    of_accuracy = []
    all_samples = len(val_data_loader) * args.val_batch_size
    print_interval = 20

    for epoch in range(args.epochs):
        model.train()

        for b in range(len(train_data_loader)):
            # oneflow graph train
            start_t = time.time()
            loss = vit_graph()
            end_t = time.time()
            if b % print_interval == 0:
                l = loss.numpy()
                of_losses.append(l)
                print(
                    "epoch {} train iter {} oneflow loss {}, train time : {}".format(
                        epoch, b, l, end_t - start_t
                    )
                )

        print("epoch %d train done, start validation" % epoch)

        model.eval()
        correct_of = 0.0
        for b in range(len(val_data_loader)):
            start_t = time.time()
            predictions, label = vit_eval_graph()
            of_predictions = predictions.numpy()
            clsidxs = np.argmax(of_predictions, axis=1)

            label_nd = label.numpy()
            for i in range(args.val_batch_size):
                if clsidxs[i] == label_nd[i]:
                    correct_of += 1
            end_t = time.time()

        top1 = correct_of / all_samples
        of_accuracy.append(top1)
        print("epoch %d, oneflow top1 val acc: %f" % (epoch, top1))

        flow.save(
            model.state_dict(),
            os.path.join(
                args.save_checkpoint_path,
                "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples),
            ),
        )

    writer = open("graph/losses.txt", "w")
    for o in of_losses:
        writer.write("%f\n" % o)
    writer.close()

    writer = open("graph/accuracy.txt", "w")
    for o in of_accuracy:
        writer.write("%f\n" % o)
    writer.close()
예제 #9
0
def train(cfg, output_dir=""):
    # logger = logging.getLogger("ModelZoo.trainer")

    # build model
    set_random_seed(cfg.RNG_SEED)
    model, loss_fn, metric_fn = build_model(cfg)
    logger.info("Build model:\n{}".format(str(model)))
    model = nn.DataParallel(model).cuda()

    # build optimizer
    optimizer = build_optimizer(cfg, model)

    # build lr scheduler
    scheduler = build_scheduler(cfg, optimizer)

    # build checkpointer
    checkpointer = Checkpointer(model,
                                optimizer=optimizer,
                                scheduler=scheduler,
                                save_dir=output_dir,
                                logger=logger)

    checkpoint_data = checkpointer.load(cfg.GLOBAL.TRAIN.WEIGHT,
                                        resume=cfg.AUTO_RESUME)
    ckpt_period = cfg.GLOBAL.TRAIN.CHECKPOINT_PERIOD

    # build data loader
    train_data_loader = build_data_loader(cfg,
                                          cfg.GLOBAL.DATASET,
                                          mode="train")
    val_period = cfg.GLOBAL.VAL.VAL_PERIOD
    # val_data_loader = build_data_loader(cfg, mode="val") if val_period > 0 else None

    # build tensorboard logger (optionally by comment)
    tensorboard_logger = TensorboardLogger(output_dir)

    # train
    max_epoch = cfg.GLOBAL.MAX_EPOCH
    start_epoch = checkpoint_data.get("epoch", 0)
    # best_metric_name = "best_{}".format(cfg.TRAIN.VAL_METRIC)
    # best_metric = checkpoint_data.get(best_metric_name, None)
    logger.info("Start training from epoch {}".format(start_epoch))
    for epoch in range(start_epoch, max_epoch):
        cur_epoch = epoch + 1
        scheduler.step()
        start_time = time.time()
        train_meters = train_model(
            model,
            loss_fn,
            metric_fn,
            data_loader=train_data_loader,
            optimizer=optimizer,
            curr_epoch=epoch,
            tensorboard_logger=tensorboard_logger,
            log_period=cfg.GLOBAL.TRAIN.LOG_PERIOD,
            output_dir=output_dir,
        )
        epoch_time = time.time() - start_time
        logger.info("Epoch[{}]-Train {}  total_time: {:.2f}s".format(
            cur_epoch, train_meters.summary_str, epoch_time))

        # checkpoint
        if cur_epoch % ckpt_period == 0 or cur_epoch == max_epoch:
            checkpoint_data["epoch"] = cur_epoch
            # checkpoint_data[best_metric_name] = best_metric
            checkpointer.save("model_{:03d}".format(cur_epoch),
                              **checkpoint_data)
        '''
        # validate
        if val_period < 1:
            continue
        if cur_epoch % val_period == 0 or cur_epoch == max_epoch:
            val_meters = validate_model(model,
                                        loss_fn,
                                        metric_fn,
                                        image_scales=cfg.MODEL.VAL.IMG_SCALES,
                                        inter_scales=cfg.MODEL.VAL.INTER_SCALES,
                                        isFlow=(cur_epoch > cfg.SCHEDULER.INIT_EPOCH),
                                        data_loader=val_data_loader,
                                        curr_epoch=epoch,
                                        tensorboard_logger=tensorboard_logger,
                                        log_period=cfg.TEST.LOG_PERIOD,
                                        output_dir=output_dir,
                                        )
            logger.info("Epoch[{}]-Val {}".format(cur_epoch, val_meters.summary_str))

            # best validation
            cur_metric = val_meters.meters[cfg.TRAIN.VAL_METRIC].global_avg
            if best_metric is None or cur_metric > best_metric:
                best_metric = cur_metric
                checkpoint_data["epoch"] = cur_epoch
                checkpoint_data[best_metric_name] = best_metric
                checkpointer.save("model_best", **checkpoint_data)
        '''

    logger.info("Train Finish!")
    # logger.info("Best val-{} = {}".format(cfg.TRAIN.VAL_METRIC, best_metric))

    return model
예제 #10
0
파일: train.py 프로젝트: Oneflow-Inc/models
def main(args):

    train_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="train",
        dataset_size=9469,
        batch_size=args.train_batch_size,
        image_size=args.image_size,
    )

    val_data_loader = OFRecordDataLoader(
        ofrecord_root=args.ofrecord_path,
        mode="val",
        dataset_size=3925,
        batch_size=args.val_batch_size,
        image_size=args.image_size,
    )

    # oneflow init
    start_t = time.time()
    model = build_model(args)
    if args.load_checkpoint != "":
        print("load_checkpoint >>>>>>>>> ", args.load_checkpoint)
        checkpoint = flow.load(args.load_checkpoint)
        model.load_state_dict(checkpoint)

    end_t = time.time()
    print("init time : {}".format(end_t - start_t))

    of_cross_entropy = flow.nn.CrossEntropyLoss()

    model.to("cuda")
    of_cross_entropy.to("cuda")

    of_sgd = flow.optim.SGD(model.parameters(),
                            lr=args.learning_rate,
                            momentum=args.mom)

    of_losses = []
    all_samples = len(val_data_loader) * args.val_batch_size
    print_interval = 20

    for epoch in range(args.epochs):
        model.train()

        for b in range(len(train_data_loader)):
            image, label = train_data_loader()

            # oneflow train
            start_t = time.time()
            image = image.to("cuda")
            label = label.to("cuda")
            logits = model(image)
            loss = of_cross_entropy(logits, label)
            loss.backward()
            of_sgd.step()
            of_sgd.zero_grad()
            end_t = time.time()
            if b % print_interval == 0:
                l = loss.numpy()
                of_losses.append(l)
                print(
                    "epoch {} train iter {} oneflow loss {}, train time : {}".
                    format(epoch, b, l, end_t - start_t))

        print("epoch %d train done, start validation" % epoch)

        model.eval()
        correct_of = 0.0
        for b in range(len(val_data_loader)):
            image, label = val_data_loader()

            start_t = time.time()
            image = image.to("cuda")
            with flow.no_grad():
                logits = model(image)
                predictions = logits.softmax()
            of_predictions = predictions.numpy()
            clsidxs = np.argmax(of_predictions, axis=1)

            label_nd = label.numpy()
            for i in range(args.val_batch_size):
                if clsidxs[i] == label_nd[i]:
                    correct_of += 1
            end_t = time.time()

        print("epoch %d, oneflow top1 val acc: %f" %
              (epoch, correct_of / all_samples))

        flow.save(
            model.state_dict(),
            os.path.join(
                args.save_checkpoint_path,
                "epoch_%d_val_acc_%f" % (epoch, correct_of / all_samples),
            ),
        )

    writer = open("of_losses.txt", "w")
    for o in of_losses:
        writer.write("%f\n" % o)
    writer.close()