def train_async(args):
    # parameters from arguments

    logging.debug('enter train')
    model_name = args.model
    checkpoint = args.checkpoint
    pretrained_model = args.pretrained_model
    model_save_dir = args.model_save_dir
    if not os.path.exists(model_save_dir):
        os.mkdir(model_save_dir)
    startup_prog = fluid.Program()
    train_prog = fluid.Program()
    tmp_prog = fluid.Program()

    train_loader, train_cost, global_lr, train_feas, train_label = build_program(
        is_train=True,
        main_prog=train_prog,
        startup_prog=startup_prog,
        args=args)
    test_loader, test_feas = build_program(is_train=False,
                                           main_prog=tmp_prog,
                                           startup_prog=startup_prog,
                                           args=args)
    test_prog = tmp_prog.clone(for_test=True)

    train_fetch_list = [
        global_lr.name, train_cost.name, train_feas.name, train_label.name
    ]
    test_fetch_list = [test_feas.name]

    place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
    exe = fluid.Executor(place)
    num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
    if num_trainers <= 1 and args.use_gpu:
        places = fluid.framework.cuda_places()
    else:
        places = place

    exe.run(startup_prog)

    if checkpoint is not None:
        fluid.load(program=train_prog, model_path=checkpoint, executor=exe)

    if pretrained_model:
        load_params(exe, train_prog, pretrained_model)

    if args.use_gpu:
        devicenum = get_gpu_num()
    else:
        devicenum = int(os.environ.get('CPU_NUM', 1))
    assert (args.train_batch_size % devicenum) == 0
    train_batch_size = args.train_batch_size / devicenum
    test_batch_size = args.test_batch_size

    train_loader.set_sample_generator(reader.train(args),
                                      batch_size=train_batch_size,
                                      drop_last=True,
                                      places=places)

    test_loader.set_sample_generator(reader.test(args),
                                     batch_size=test_batch_size,
                                     drop_last=False,
                                     places=place)

    train_exe = fluid.ParallelExecutor(main_program=train_prog,
                                       use_cuda=args.use_gpu,
                                       loss_name=train_cost.name)

    totalruntime = 0
    iter_no = 0
    train_info = [0, 0, 0]
    while iter_no <= args.total_iter_num:
        for train_batch in train_loader():
            t1 = time.time()
            lr, loss, feas, label = train_exe.run(feed=train_batch,
                                                  fetch_list=train_fetch_list)
            t2 = time.time()
            period = t2 - t1
            lr = np.mean(np.array(lr))
            train_info[0] += np.mean(np.array(loss))
            train_info[1] += recall_topk(feas, label, k=1)
            train_info[2] += 1
            if iter_no % args.display_iter_step == 0:
                avgruntime = totalruntime / args.display_iter_step
                avg_loss = train_info[0] / train_info[2]
                avg_recall = train_info[1] / train_info[2]
                print("[%s] trainbatch %d, lr %.6f, loss %.6f, "\
                    "recall %.4f, time %2.2f sec" % \
                    (fmt_time(), iter_no, lr, avg_loss, avg_recall, avgruntime))
                sys.stdout.flush()
                totalruntime = 0
            if iter_no % 1000 == 0:
                train_info = [0, 0, 0]

            totalruntime += period

            if iter_no % args.test_iter_step == 0 and iter_no != 0:
                f, l = [], []
                for batch_id, test_batch in enumerate(test_loader()):
                    t1 = time.time()
                    [feas] = exe.run(test_prog,
                                     feed=test_batch,
                                     fetch_list=test_fetch_list)

                    label = np.asarray(test_batch[0]['label'])
                    label = np.squeeze(label)
                    f.append(feas)
                    l.append(label)

                    t2 = time.time()
                    period = t2 - t1
                    if batch_id % 20 == 0:
                        print("[%s] testbatch %d, time %2.2f sec" % \
                            (fmt_time(), batch_id, period))

                f = np.vstack(f)
                l = np.hstack(l)
                recall = recall_topk(f, l, k=1)
                print("[%s] test_img_num %d, trainbatch %d, test_recall %.5f" % \
                    (fmt_time(), len(f), iter_no, recall))
                sys.stdout.flush()

            if iter_no % args.save_iter_step == 0 and iter_no != 0:
                model_path = os.path.join(model_save_dir, model_name,
                                          str(iter_no))
                fluid.save(program=train_prog, model_path=model_path)

            iter_no += 1
Beispiel #2
0
        utility_predict.save_preds(
            self.t_params, self.m_params, self.li_predictions,
            self.li_timestamps_chunked[:len(self.li_predictions)],
            self.li_true_values, self.era5_eobs.li_loc,
            self.upload_batch_number)
        self.upload_batch_number = self.upload_batch_number + 1
        self.li_timestamps_chunked = self.li_timestamps_chunked[
            len(self.li_predictions):]
        self.li_predictions = []
        self.li_true_values = []


if __name__ == "__main__":
    s_dir = utility.get_script_directory(sys.argv[0])

    args_dict = utility.parse_arguments(s_dir)

    t_params, m_params = utility.load_params(args_dict, "test")

    #main(t_params(), m_params)

    test_tru_net = TestTruNet(t_params, m_params)
    mts = m_params['model_type_settings']
    locations = mts.get('location_test', None) if mts.get(
        'location_test', None) != None else mts.get('location')

    for loc in locations:
        test_tru_net.initialize_scheme_era5Eobs(location=[loc])
        test_tru_net.predict(min_prob_for_rain=mts.get('prob_thresh', 0.5))
        print(f"Completed Prediction for {loc}")
import engine
import fnlp_engine
import utility

if __name__ == '__main__':
    args = utility.load_params(jsonFile='config.json')
    if args['data']['dataset'] == 'zh':
        runner = engine.Engine(args)
    elif args['data']['dataset'] == 'en':
        runner = fnlp_engine.Engine(args)
    else:
        print('Invalid dataset!')
        exit()

    if args['data']['dataset'] == 'zh' and args['predict']:
        runner.load_model()
        #x = raw_input('Input a sentence: ')
        x = input('Input a sentence: ')
        runner.predict(x)
    else:
        if args['train']:
            if args['continue']:
                runner.load_model()
            runner.train()
            #runner.save_model()
            runner.test()
        else:
            runner.load_model()
            runner.test()
Beispiel #4
0
        self.mse_agg_val(mse)

        return True

    @tf.function
    def distributed_train_step(self, feature, target, mask, bounds, _init):
        gradients = self.strategy.run(self.train_step,
                                      args=(feature, target, mask, bounds,
                                            _init))
        return gradients

    @tf.function
    def distributed_val_step(self, feature, target, mask, bounds):
        bool_completed = self.strategy.run(self.val_step,
                                           args=(feature, target, mask,
                                                 bounds))
        return bool_completed


if __name__ == "__main__":
    s_dir = utility.get_script_directory(sys.argv[0])
    args_dict = utility.parse_arguments(s_dir)

    # get training and model params
    t_params, m_params = utility.load_params(args_dict)

    # Initialize and  train model
    weather_model = WeatherModel(t_params, m_params)
    weather_model.initialize_scheme_era5Eobs()
    weather_model.train_model()