def work_parallel(db, datarun_ids=None, aws_config=None, n_procs=4, total_time=None): print('starting workers...') kwargs = dict(db=db, datarun_ids=datarun_ids, save_files=False, choose_randomly=True, cloud_mode=False, aws_config=aws_config, wait=False, total_time=total_time) if n_procs > 1: # spawn a set of worker processes to work on the dataruns procs = [] for i in range(n_procs): p = Process(target=work, kwargs=kwargs) p.start() procs.append(p) # wait for them to finish for p in procs: p.join() else: work(**kwargs)
parser.add_argument('--dataruns', help='Only train on dataruns with these ids', nargs='+') parser.add_argument('--time', help='Number of seconds to run worker', type=int) parser.add_argument('--choose-randomly', action='store_true', help='Choose dataruns to work on randomly (default = sequential order)') parser.add_argument('--no-save', dest='save_files', default=True, action='store_const', const=False, help="don't save models and metrics at all") # parse arguments and load configuration args = parser.parse_args() # default logging config is different if initialized from the command line if args.log_config is None: args.log_config = os.path.join(PROJECT_ROOT, 'config/templates/log-script.yaml') sql_config, _, aws_config, log_config = load_config(**vars(args)) initialize_logging(log_config) # let's go work(db=Database(**vars(sql_config)), datarun_ids=args.dataruns, choose_randomly=args.choose_randomly, save_files=args.save_files, cloud_mode=args.cloud_mode, aws_config=aws_config, log_config=log_config, total_time=args.time, wait=False)
def start(self): db = Database(**vars(self._sql_config)) work(db, log_config=self._log_config, save_files=True)