def setup_train(args, machine_is_host=False): # set_up_gpu(args) exp_root, exp_group, exp_name = args.experiment_root, args.experiment_group, args.experiment_name assert exp_name is not None local_export_root = os.path.join(exp_root, exp_group, exp_name) if machine_is_host: remote_export_root = None communicator = None if os.path.exists(local_export_root): if exp_group == 'test': print('Removing local test export root {}'.format( local_export_root)) shutil.rmtree(local_export_root) else: print('Local export root exists {}'.format(local_export_root)) exit(0) create_local_export_root(args, local_export_root) export_config(args, local_export_root) print('Export root', local_export_root) else: remote_export_root = os.path.join(REMOTE_ROOT, local_export_root) communicator = Communicator(HOST, PORT, USERNAME, PASSWORD) created = communicator.create_dir(remote_dir_path=remote_export_root) if not created: if os.path.exists(local_export_root): print('Local export root exists while checking status') exit(0) os.makedirs(local_export_root) local_status = os.path.join(local_export_root, 'status.txt') remote_status = os.path.join(remote_export_root, 'status.txt') try: communicator.sftp.get(remote_status, local_status) status = open(local_status).readline() except Exception: status = 'failed to download status' print('Checking status') if status == STATUS_RECOVERY: print('Status is recovery') with open(local_status, 'w') as f: f.write('running\n') print("Write 'running' on remote") communicator.sftp.put(local_status, remote_status) print('Downloading remote export root') communicator.download_dir(remote_export_root, local_export_root) args.resume_training = True else: print('Status is not recovery') shutil.rmtree(local_export_root) print('Remote export root {} exists. Existing'.format( remote_export_root)) exit(0) else: print( 'Created export_root={} in remote'.format(remote_export_root)) create_local_export_root(args, local_export_root) export_config(args, local_export_root) print('Export root', local_export_root) return local_export_root, remote_export_root, communicator