num_workers=0) EMBEDDING_TABLE_SHAPES_TUPLE = ( { CATEGORICAL_COLUMNS[0]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_COLUMNS[0]], CATEGORICAL_COLUMNS[1]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_COLUMNS[1]], }, { CATEGORICAL_MH_COLUMNS[0]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_MH_COLUMNS[0]] }, ) model = Model( embedding_table_shapes=EMBEDDING_TABLE_SHAPES_TUPLE, num_continuous=0, emb_dropout=0.0, layer_hidden_dims=[128, 128, 128], layer_dropout_rates=[0.0, 0.0, 0.0], ).cuda() lr_scaler = hvd.size() optimizer = torch.optim.Adam(model.parameters(), lr=0.01 * lr_scaler) hvd.broadcast_parameters(model.state_dict(), root_rank=0) hvd.broadcast_optimizer_state(optimizer, root_rank=0) optimizer = hvd.DistributedOptimizer(optimizer, named_parameters=model.named_parameters()) for epoch in range(args.epochs):
def test_empty_cols(tmpdir, engine, cat_names, mh_names, cont_names, label_name, num_rows): json_sample["num_rows"] = num_rows cols = datagen._get_cols_from_schema(json_sample) df_gen = datagen.DatasetGen(datagen.PowerLawDistro(0.1)) dataset = df_gen.create_df(num_rows, cols) dataset = nvt.Dataset(dataset) features = [] if cont_names: features.append(cont_names >> ops.FillMedian() >> ops.Normalize()) if cat_names or mh_names: features.append(cat_names + mh_names >> ops.Categorify()) # test out https://github.com/NVIDIA/NVTabular/issues/149 making sure we can iterate over # empty cats/conts graph = sum(features, nvt.WorkflowNode(label_name)) processor = nvt.Workflow(graph) output_train = os.path.join(tmpdir, "train/") os.mkdir(output_train) df_out = processor.fit_transform(dataset).to_ddf().compute( scheduler="synchronous") if processor.output_node.output_schema.apply_inverse( ColumnSelector("lab_1")): # if we don't have conts/cats/labels we're done return data_itr = None with pytest.raises(ValueError) as exc_info: data_itr = torch_dataloader.TorchAsyncItr( nvt.Dataset(df_out), cats=cat_names + mh_names, conts=cont_names, labels=label_name, batch_size=2, ) assert "Neither Categorical or Continuous columns were found by the dataloader. " in str( exc_info.value) if data_itr: for nvt_batch in data_itr: cats_conts, labels = nvt_batch if cat_names: assert set(cat_names).issubset(set(list(cats_conts.keys()))) if cont_names: assert set(cont_names).issubset(set(list(cats_conts.keys()))) if cat_names or cont_names or mh_names: emb_sizes = nvt.ops.get_embedding_sizes(processor) EMBEDDING_DROPOUT_RATE = 0.04 DROPOUT_RATES = [0.001, 0.01] HIDDEN_DIMS = [1000, 500] LEARNING_RATE = 0.001 model = Model( embedding_table_shapes=emb_sizes, num_continuous=len(cont_names), emb_dropout=EMBEDDING_DROPOUT_RATE, layer_hidden_dims=HIDDEN_DIMS, layer_dropout_rates=DROPOUT_RATES, ).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) def rmspe_func(y_pred, y): "Return y_pred and y to non-log space and compute RMSPE" y_pred, y = torch.exp(y_pred) - 1, torch.exp(y) - 1 pct_var = (y_pred - y) / y return (pct_var**2).mean().pow(0.5) train_loss, y_pred, y = process_epoch( data_itr, model, train=True, optimizer=optimizer, amp=False, ) train_rmspe = None train_rmspe = rmspe_func(y_pred, y) assert train_rmspe is not None assert len(y_pred) > 0 assert len(y) > 0
def test_mh_model_support(tmpdir): df = cudf.DataFrame({ "Authors": [["User_A"], ["User_A", "User_E"], ["User_B", "User_C"], ["User_C"]], "Reviewers": [["User_A"], ["User_A", "User_E"], ["User_B", "User_C"], ["User_C"]], "Engaging User": ["User_B", "User_B", "User_A", "User_D"], "Null User": ["User_B", "User_B", "User_A", "User_D"], "Post": [1, 2, 3, 4], "Cont1": [0.3, 0.4, 0.5, 0.6], "Cont2": [0.3, 0.4, 0.5, 0.6], "Cat1": ["A", "B", "A", "C"], }) cat_names = ["Cat1", "Null User", "Authors", "Reviewers"] # , "Engaging User"] cont_names = ["Cont1", "Cont2"] label_name = ["Post"] out_path = os.path.join(tmpdir, "train/") os.mkdir(out_path) cats = cat_names >> ops.Categorify() conts = cont_names >> ops.Normalize() processor = nvt.Workflow(cats + conts + label_name) df_out = processor.fit_transform(nvt.Dataset(df)).to_ddf().compute() data_itr = torch_dataloader.TorchAsyncItr( nvt.Dataset(df_out), cats=cat_names, conts=cont_names, labels=label_name, batch_size=2, ) emb_sizes = nvt.ops.get_embedding_sizes(processor) EMBEDDING_DROPOUT_RATE = 0.04 DROPOUT_RATES = [0.001, 0.01] HIDDEN_DIMS = [1000, 500] LEARNING_RATE = 0.001 model = Model( embedding_table_shapes=emb_sizes, num_continuous=len(cont_names), emb_dropout=EMBEDDING_DROPOUT_RATE, layer_hidden_dims=HIDDEN_DIMS, layer_dropout_rates=DROPOUT_RATES, ).cuda() optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE) def rmspe_func(y_pred, y): "Return y_pred and y to non-log space and compute RMSPE" y_pred, y = torch.exp(y_pred) - 1, torch.exp(y) - 1 pct_var = (y_pred - y) / y return (pct_var**2).mean().pow(0.5) train_loss, y_pred, y = process_epoch( data_itr, model, train=True, optimizer=optimizer, # transform=batch_transform, amp=False, ) train_rmspe = None train_rmspe = rmspe_func(y_pred, y) assert train_rmspe is not None assert len(y_pred) > 0 assert len(y) > 0
def runner(rank, world_size): setup(rank, world_size) train_dataset = TorchAsyncItr( nvt.Dataset(TRAIN_PATHS), batch_size=BATCH_SIZE, cats=CATEGORICAL_COLUMNS + CATEGORICAL_MH_COLUMNS, conts=NUMERIC_COLUMNS, labels=["rating"], device=rank, global_size=world_size, global_rank=rank, shuffle=True, seed_fn=seed_fn, ) train_loader = DLDataLoader( train_dataset, batch_size=None, collate_fn=collate_fn, pin_memory=False, num_workers=0 ) EMBEDDING_TABLE_SHAPES_TUPLE = ( { CATEGORICAL_COLUMNS[0]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_COLUMNS[0]], CATEGORICAL_COLUMNS[1]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_COLUMNS[1]], }, {CATEGORICAL_MH_COLUMNS[0]: EMBEDDING_TABLE_SHAPES[CATEGORICAL_MH_COLUMNS[0]]}, ) model = Model( embedding_table_shapes=EMBEDDING_TABLE_SHAPES_TUPLE, num_continuous=0, emb_dropout=0.0, layer_hidden_dims=[128, 128, 128], layer_dropout_rates=[0.0, 0.0, 0.0], ).cuda() model = nn.parallel.DistributedDataParallel( model, device_ids=[rank], find_unused_parameters=True ) lr_scaler = world_size # optimizer = DistributedOptimizer(torch.optim.Adam, model.parameters(), lr=0.01 * lr_scaler) optimizer = torch.optim.Adam(model.parameters(), lr=0.01 * lr_scaler) total_rows = 0 t_final = 0 for epoch in range(args.epochs): start = time() with model.join(): train_loss, y_pred, y = process_epoch( train_loader, model, train=True, optimizer=optimizer, ) # hvd.join(gpu_to_use) # hvd.broadcast_parameters(model.state_dict(), root_rank=0) print(f"Epoch {epoch:02d}. Train loss: {train_loss:.4f}.") t_final += time() - start total_rows += train_dataset.num_rows_processed print( f"run_time: {t_final} - rows: {total_rows} - " f"epochs: {epoch} - dl_thru: {total_rows / t_final}" )