def create_network(name, *args, **kwargs): if name == 'autodis': model_config = ModelConfig() train_config = TrainConfig() model_builder = ModelBuilder(model_config, train_config) _, autodis_eval_net = model_builder.get_train_eval_net() return autodis_eval_net raise NotImplementedError(f"{name} is not implemented in the repo")
def test_deepfm(): data_config = DataConfig() train_config = TrainConfig() device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id) rank_size = None rank_id = None dataset_path = "/home/workspace/mindspore_dataset/criteo_data/criteo_h5/" print("dataset_path:", dataset_path) ds_train = create_dataset(dataset_path, train_mode=True, epochs=1, batch_size=train_config.batch_size, data_type=DataType(data_config.data_format), rank_size=rank_size, rank_id=rank_id) model_builder = ModelBuilder(ModelConfig, TrainConfig) train_net, eval_net = model_builder.get_train_eval_net() auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) loss_file_name = './loss.log' time_callback = TimeMonitor(data_size=ds_train.get_dataset_size()) loss_callback = LossCallBack(loss_file_path=loss_file_name) callback_list = [time_callback, loss_callback] eval_file_name = './auc.log' ds_eval = create_dataset(dataset_path, train_mode=False, epochs=1, batch_size=train_config.batch_size, data_type=DataType(data_config.data_format)) eval_callback = EvalCallBack(model, ds_eval, auc_metric, eval_file_path=eval_file_name) callback_list.append(eval_callback) print("train_config.train_epochs:", train_config.train_epochs) model.train(train_config.train_epochs, ds_train, callbacks=callback_list) export_loss_value = 0.51 print("loss_callback.loss:", loss_callback.loss) assert loss_callback.loss < export_loss_value export_per_step_time = 40.0 print("time_callback:", time_callback.per_step_time) assert time_callback.per_step_time < export_per_step_time print("*******test case pass!********")
args_opt, _ = parser.parse_known_args() device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=device_id) def add_write(file_path, print_str): with open(file_path, 'a+', encoding='utf-8') as file_out: file_out.write(print_str + '\n') if __name__ == '__main__': data_config = DataConfig() model_config = ModelConfig() train_config = TrainConfig() ds_eval = create_dataset(args_opt.dataset_path, train_mode=False, epochs=1, batch_size=train_config.batch_size, data_type=DataType(data_config.data_format)) model_builder = ModelBuilder(ModelConfig, TrainConfig) train_net, eval_net = model_builder.get_train_eval_net() train_net.set_train() eval_net.set_train(False) auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) param_dict = load_checkpoint(args_opt.checkpoint_path)
parser.add_argument('--loss_file_name', type=str, default="./loss.log", help='Loss log file path. Default: "./loss.log"') parser.add_argument('--do_eval', type=str, default='True', help='Do evaluation or not, only support "True" or "False". Default: "True"') parser.add_argument('--device_target', type=str, default="Ascend", choices=("Ascend", "GPU", "CPU"), help="device target, support Ascend, GPU and CPU.") args_opt, _ = parser.parse_known_args() args_opt.do_eval = args_opt.do_eval == 'True' rank_size = int(os.environ.get("RANK_SIZE", 1)) set_seed(1) if __name__ == '__main__': data_config = DataConfig() model_config = ModelConfig() train_config = TrainConfig() if rank_size > 1: if args_opt.device_target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, device_id=device_id) context.reset_auto_parallel_context() context.set_auto_parallel_context(parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True, all_reduce_fusion_config=[9, 11]) init() rank_id = int(os.environ.get('RANK_ID')) elif args_opt.device_target == "GPU": init() context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) context.reset_auto_parallel_context()