Ejemplo n.º 1
0
def run_remote_worker(worker_id, worker_path, id):
    """Run worker on remote mochine."""
    from zeus.common.utils import init_log
    init_log(level="info",
             log_file=".temp_{}.log".format(worker_id),
             log_path=worker_path)

    config = _load_config(worker_id, worker_path, id)
    os.environ["LD_LIBRARY_PATH"] = config["env"]["LD_LIBRARY_PATH"]
    os.environ["PWD"] = config["env"]["PWD"]
    os.chdir(os.environ["PWD"])
    zeus.register_zeus(os.environ['BACKEND_TYPE'].lower())

    if zeus.is_gpu_device():
        sub_pid_list = call_in_gpu(config, id, worker_id, worker_path)
    elif zeus.is_npu_device():
        os.environ["PYTHONPATH"] = config["env"]["PYTHONPATH"]
        os.environ["PATH"] = config["env"]["PATH"]
        os.environ["ASCEND_OPP_PATH"] = config["env"]["ASCEND_OPP_PATH"]
        sub_pid_list = call_in_npu(config, id, worker_id, worker_path)
    logging.info("DistributedWorker finished!")
    for sub_pid in sub_pid_list:
        kill_proc_tree(pid=sub_pid)
    logging.info("DistributedWorker subprocess cleaned!")
    return 0
Ejemplo n.º 2
0
def load_config(config_file):
    """Load config from file."""
    import os
    import pickle

    with open(config_file, 'rb') as f:
        config = pickle.load(f)
    for (key, value) in config["env"].items():
        if value:
            os.environ[key] = value

    from zeus import register_zeus
    register_zeus(os.environ['BACKEND_TYPE'].lower())

    from zeus.common.class_factory import ClassFactory
    from zeus.common.general import General
    from zeus.datasets.conf.dataset import DatasetConfig
    from zeus.networks.model_config import ModelConfig
    from zeus.trainer.conf import TrainerConfig
    from zeus.evaluator.conf import EvaluatorConfig

    ClassFactory.__registry__ = config["class_factory"]
    General.from_dict(config["general"])
    DatasetConfig.from_dict(config["dataset"])
    ModelConfig.from_dict(config["model"])
    TrainerConfig.from_dict(config["trainer"])
    EvaluatorConfig.from_dict(config["evaluator"])
Ejemplo n.º 3
0
def set_backend(backend='pytorch', device_category='GPU'):
    """Set backend.

    :param backend: backend type, default pytorch
    :type backend: str
    """
    if "BACKEND_TYPE" in os.environ:
        return
    if 'NPU_VISIBLE_DEVICES' in os.environ:
        os.environ['NPU-VISIBLE-DEVICES'] = os.environ['NPU_VISIBLE_DEVICES']
    # CUDA visible
    if 'CUDA_VISIBLE_DEVICES' in os.environ:
        os.environ['DEVICE_CATEGORY'] = 'GPU'
    elif 'NPU-VISIBLE-DEVICES' in os.environ:
        os.environ['DEVICE_CATEGORY'] = 'NPU'
        os.environ['ORIGIN_RANK_TABLE_FILE'] = os.environ['RANK_TABLE_FILE']
        os.environ['ORIGIN_RANK_SIZE'] = os.environ['RANK_SIZE']

    # device
    if device_category is not None:
        os.environ['DEVICE_CATEGORY'] = device_category

    # backend
    if backend == 'pytorch':
        os.environ['BACKEND_TYPE'] = 'PYTORCH'
    elif backend == 'tensorflow':
        os.environ['BACKEND_TYPE'] = 'TENSORFLOW'
        import warnings
        warnings.filterwarnings("ignore", category=FutureWarning)
    elif backend == 'mindspore':
        os.environ['BACKEND_TYPE'] = 'MINDSPORE'
    else:
        raise Exception('backend must be pytorch, tensorflow or mindspore')
    set_data_format()
    register_zeus(backend)

    # vega
    import vega.core.search_algs.ps_differential
    import vega.algorithms

    from zeus.common.config_serializable import backup_configs
    backup_configs()
Ejemplo n.º 4
0
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
from xt.model.model_zeus import XTModelZeus

from zeus.common.util.common import import_config
from zeus.common.util.register import Registers
from zeus import set_backend, register_zeus
from zeus.trainer.trainer_api import Trainer
from zeus.common.class_factory import ClassFactory, ClassType
from zeus.trainer.modules.conf.loss import LossConfig
from zeus.trainer.modules.conf.optim import OptimConfig

set_backend(backend='tensorflow', device_category='GPU')
register_zeus('tensorflow')


@Registers.model
class DqnZeus(XTModelZeus):
    """Docstring for DqnMlp."""
    def __init__(self, model_info):
        model_config = model_info.get('model_config', None)
        import_config(globals(), model_config)
        self.state_dim = model_info['state_dim']
        self.action_dim = model_info['action_dim']

        super().__init__(model_info)

    def create_model(self, model_info):
        """Create Deep-Q network."""