"worker": conf.arch }) conf.arch_info["worker"] = conf.arch_info["worker"].split(":") # parse the fl_aggregate scheme. conf._fl_aggregate = conf.fl_aggregate conf.fl_aggregate = (param_parser.dict_parser(conf.fl_aggregate) if conf.fl_aggregate is not None else conf.fl_aggregate) [ setattr(conf, f"fl_aggregate_{k}", v) for k, v in conf.fl_aggregate.items() ] # define checkpoint for logging (for federated learning server). checkpoint.init_checkpoint(conf, rank=str(conf.graph.rank)) # configure logger. conf.logger = logging.Logger(conf.checkpoint_dir) # display the arguments' info. if conf.graph.rank == 0: logging.display_args(conf) # sync the processes. dist.barrier() if __name__ == "__main__": conf = get_args() main(conf)
else: # build runnable script for a single machine. cmd = build_mpi_script(conf) tasks[ip2slot[0]] = [ ( "cd {work_dir} && ".format(work_dir=conf.work_dir) if conf.work_dir is not None else "" ) + cmd ] # run cmd. create_job_on_nodes(conf, tasks) if __name__ == "__main__": # parse the arguments. conf = para.get_args() # get ip and the corresponding # of slots. ip2slots = read_hostfile(conf.hostfile) ip2slot = map_slot(ip2slots) # run the main script. if conf.backend == "nccl" or conf.backend == "gloo": main_nccl_or_gloo(conf, ip2slot) elif conf.backend == "mpi": main_mpi(conf, ip2slot)
import os import gc import numpy as np import pandas as pd import seaborn as sns import matplotlib.pyplot as plt from data import * from utils import * from parameters import get_args from models import kfold_ligthgbm def main(args, **model_kwargs): if (args.explain): data_explain( args ) # Giải thích các trường thông tin cho các file cho anh em xem (có translate) if (args.debug): data_debug(args) # Chạy debug xem xử lý data oke không if (args.train): df = read_data(args) print(df.shape) # df.to_csv('data.csv', index=False) # feat_importance = kfold_ligthgbm(df, num_folds=4, stratified=True) # feat_importance.to_csv('feat_importance.csv', index=False) if __name__ == "__main__": args = get_args() main(args)