Ejemplo n.º 1
0
    def __init__(self,
                 path,
                 items,
                 reduce_strategy={
                     'type': 'sum',
                     'key': 'image'
                 }):
        """Init a knowledge instance.
        Args:
            path(list<str>, str, optional): Specifies the storage path of the knowledge,
                       supports AFS/HDFS, local file system, and memory.
            items(list<str>): Save the tensor of the specified name
            reduce_strategy(dict, optional): The policy for performing the reduce
                                   operation. If it is set to None,
                                   the reduce operation is not performed.
            reduce_strategy.type(str): Type of reduce operation.
            reduce_strategy.key(str): The key of the reduce operation.
                                      It is an element in the item.
        """
        assert (isinstance(path, list) or isinstance(path, str)
                or (path is None)), "path type should be list or str or None"
        assert (isinstance(items, list)), "items should be a list"
        assert (isinstance(reduce_strategy,
                           dict)), "reduce_strategy should be a dict"
        self.path = path
        if isinstance(self.path, list):
            self.write_type = 'HDFS/AFS'
            assert (
                len(self.path) == 4 and isinstance(self.path[0], str)
                and isinstance(self.path[1], str)
                and isinstance(self.path[2], str)
                and isinstance(self.path[3], str)
            ), "path should contains four str, ['local hadoop home', 'fs.default.name', 'hadoop.job.ugi', 'FS path']"

            hadoop_home = self.path[0]
            configs = {
                "fs.default.name": self.path[1],
                "hadoop.job.ugi": self.path[2]
            }
            self.client = HDFSClient(hadoop_home, configs)
            assert (
                self.client.is_exist(self.path[3]) == True
            ), "Plese make sure your hadoop confiuration is correct and FS path exists"

            self.hdfs_local_path = "./teacher_knowledge"
            if not os.path.exists(self.hdfs_local_path):
                os.mkdir(self.hdfs_local_path)
        elif isinstance(self.path, str):
            self.write_type = "LocalFS"
            if not os.path.exists(path):
                raise ValueError("The local path [%s] does not exist." %
                                 (path))
        else:
            self.write_type = "MEM"
            self.knowledge_queue = Queue(64)

        self.items = items
        self.reduce_strategy = reduce_strategy
Ejemplo n.º 2
0
 def __init__(self, config):
     """R
     """
     if 'fs_name' in config:
         hadoop_home = "$HADOOP_HOME"
         hdfs_configs = {
             "hadoop.job.ugi": config['fs_ugi'],
             "fs.default.name": config['fs_name']
         }
         self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs)
     self._local_fs_client = LocalFSClient()
Ejemplo n.º 3
0
    def test(self):
        fs = HDFSClient("/usr/local/hadoop-2.7.7", None)
        dir_path = "./checkpointsaver_test"
        fs.delete(dir_path)

        s = CheckpointSaver(fs)

        fs.mkdirs("{}/exe.exe".format(dir_path))
        fs.mkdirs("{}/exe.1".format(dir_path))
        fs.mkdirs("{}/exe".format(dir_path))

        a = s.get_checkpoint_no(dir_path)
        self.assertEqual(len(a), 0)

        fs.mkdirs("{}/__paddle_checkpoint__.0".format(dir_path))
        fs.mkdirs("{}/__paddle_checkpoint__.exe".format(dir_path))

        a = s.get_checkpoint_no(dir_path)
        self.assertEqual(len(a), 1)

        s.clean_redundant_checkpoints(dir_path)
        s.clean_redundant_checkpoints(dir_path)

        fs.delete(dir_path)
Ejemplo n.º 4
0
class FileHandler(object):
    """
    A Smart file handler. auto judge local/afs by path 
    """

    def __init__(self, config):
        """R
        """
        if 'fs_name' in config:
            hadoop_home = "$HADOOP_HOME"
            hdfs_configs = {
                "hadoop.job.ugi": config['fs_ugi'],
                "fs.default.name": config['fs_name']
            }
            self._hdfs_client = HDFSClient(hadoop_home, hdfs_configs)
        self._local_fs_client = LocalFSClient()

    def is_exist(self, path):
        """R
        """
        if is_afs_path(path):
            return self._hdfs_client.is_exist(path)
        else:
            return self._local_fs_client.is_exist(path)

    def get_file_name(self, path):
        """R
        """
        sub_paths = path.split('/')
        return sub_paths[-1]

    def write(self, content, dest_path, mode='w'):
        """R
        """
        if is_afs_path(dest_path):
            file_name = self.get_file_name(dest_path)
            temp_local_file = "./tmp/" + file_name
            self._local_fs_client.remove(temp_local_file)
            org_content = ""
            if mode.find('a') >= 0:
                org_content = self._hdfs_client.cat(dest_path)
            content = content + org_content
            self._local_fs_client.write(
                content, temp_local_file, mode
            )  # fleet hdfs_client only support upload, so write tmp file
            self._hdfs_client.delete(dest_path + ".tmp")
            self._hdfs_client.upload(dest_path + ".tmp", temp_local_file)
            self._hdfs_client.delete(dest_path + ".bak")
            self._hdfs_client.rename(dest_path, dest_path + '.bak')
            self._hdfs_client.rename(dest_path + ".tmp", dest_path)
        else:
            self._local_fs_client.write(content, dest_path, mode)

    def cat(self, path):
        """R
        """
        if is_afs_path(path):
            hdfs_cat = self._hdfs_client.cat(path)
            return hdfs_cat
        else:
            return self._local_fs_client.cat(path)

    def ls(self, path):
        """R
        """
        files = []
        if is_afs_path(path):
            files = self._hdfs_client.ls(path)
            files = [path + '/' + self.get_file_name(fi)
                     for fi in files]  # absulte path
        else:
            files = self._local_fs_client.ls(path)
            files = [path + '/' + fi for fi in files]  # absulte path
        return files

    def cp(self, org_path, dest_path):
        """R
        """
        org_is_afs = is_afs_path(org_path)
        dest_is_afs = is_afs_path(dest_path)
        if not org_is_afs and not dest_is_afs:
            return self._local_fs_client.cp(org_path, dest_path)
        if not org_is_afs and dest_is_afs:
            return self._hdfs_client.upload(dest_path, org_path)
        if org_is_afs and not dest_is_afs:
            return self._hdfs_client.download(org_path, dest_path)
        print("Not Suppor hdfs cp currently")
Ejemplo n.º 5
0
class Knowledge(object):
    """
    The knowledge class describes how to extract and store the dark knowledge
    of the teacher model, and how the student model learns these dark knowledge.
    """
    def __init__(self,
                 path,
                 items,
                 reduce_strategy={
                     'type': 'sum',
                     'key': 'image'
                 }):
        """Init a knowledge instance.
        Args:
            path(list<str>, str, optional): Specifies the storage path of the knowledge,
                       supports AFS/HDFS, local file system, and memory.
            items(list<str>): Save the tensor of the specified name
            reduce_strategy(dict, optional): The policy for performing the reduce
                                   operation. If it is set to None,
                                   the reduce operation is not performed.
            reduce_strategy.type(str): Type of reduce operation.
            reduce_strategy.key(str): The key of the reduce operation.
                                      It is an element in the item.
        """
        assert (isinstance(path, list) or isinstance(path, str)
                or (path is None)), "path type should be list or str or None"
        assert (isinstance(items, list)), "items should be a list"
        assert (isinstance(reduce_strategy,
                           dict)), "reduce_strategy should be a dict"
        self.path = path
        if isinstance(self.path, list):
            self.write_type = 'HDFS/AFS'
            assert (
                len(self.path) == 4 and isinstance(self.path[0], str)
                and isinstance(self.path[1], str)
                and isinstance(self.path[2], str)
                and isinstance(self.path[3], str)
            ), "path should contains four str, ['local hadoop home', 'fs.default.name', 'hadoop.job.ugi', 'FS path']"

            hadoop_home = self.path[0]
            configs = {
                "fs.default.name": self.path[1],
                "hadoop.job.ugi": self.path[2]
            }
            self.client = HDFSClient(hadoop_home, configs)
            assert (
                self.client.is_exist(self.path[3]) == True
            ), "Plese make sure your hadoop confiuration is correct and FS path exists"

            self.hdfs_local_path = "./teacher_knowledge"
            if not os.path.exists(self.hdfs_local_path):
                os.mkdir(self.hdfs_local_path)
        elif isinstance(self.path, str):
            self.write_type = "LocalFS"
            if not os.path.exists(path):
                raise ValueError("The local path [%s] does not exist." %
                                 (path))
        else:
            self.write_type = "MEM"
            self.knowledge_queue = Queue(64)

        self.items = items
        self.reduce_strategy = reduce_strategy

    def _write(self, data):
        if self.write_type == 'HDFS/AFS':
            file_name = 'knowledge_' + str(self.file_cnt)
            file_path = os.path.join(self.hdfs_local_path, file_name)
            file_path += ".npy"
            np.save(file_path, data)
            self.file_cnt += 1
            self.client.upload(self.path[3], file_path)
            logger.info('{}.npy pushed to HDFS/AFS: {}'.format(
                file_name, self.path[3]))

        elif self.write_type == 'LocalFS':
            file_name = 'knowledge_' + str(self.file_cnt)
            file_path = os.path.join(self.path, file_name)
            np.save(file_path, data)
            logger.info('{}.npy saved'.format(file_name))
            self.file_cnt += 1

        else:
            self.knowledge_queue.put(data)
            logger.info('{} pushed to Queue'.format(file_name))

    def run(self, teacher_program, exe, place, scope, reader, inputs, outputs,
            call_back):
        """Start teacher model to do information.
        Args:
            teacher_program(Program): teacher program.
            scope(Scope): The scope used to execute the teacher,
                          which contains the initialized variables.
            reader(reader): The data reader used by the teacher.
            inputs(list<str>): The name of variables to feed the teacher program.
            outputs(list<str>): Need to write to the variable instance's names of
                                the Knowledge instance, which needs to correspond
                                to the Knowledge's items.
            call_back(func, optional): The callback function that handles the
                          outputs of the teacher, which is none by default,
                          that is, the output of the teacher is concat directly.
        Return:
            (bool): Whether the teacher task was successfully registered and started
        """
        assert (isinstance(
            teacher_program,
            fluid.Program)), "teacher_program should be a fluid.Program"
        assert (isinstance(inputs, list)), "inputs should be a list"
        assert (isinstance(outputs, list)), "outputs should be a list"
        assert (len(self.items) == len(outputs)
                ), "the length of outputs list should be equal with items list"
        assert (callable(call_back) or (call_back is None)
                ), "call_back should be a callable function or NoneType."

        for var in teacher_program.list_vars():
            var.stop_gradient = True

        compiled_teacher_program = fluid.compiler.CompiledProgram(
            teacher_program)
        self.file_cnt = 0
        if isinstance(reader, Variable) or (isinstance(reader, DataLoaderBase)
                                            and (not reader.iterable)):
            reader.start()
            try:
                while True:
                    logits = exe.run(compiled_teacher_program,
                                     scope=scope,
                                     fetch_list=outputs,
                                     feed=None)
                    knowledge = dict()
                    for index, array in enumerate(logits):
                        knowledge[self.items[index]] = array
                    self._write(knowledge)
            except EOFException:
                reader.reset()

        else:
            if not isinstance(reader, DataLoaderBase):
                feeder = fluid.DataFeeder(feed_list=inputs,
                                          place=place,
                                          program=teacher_program)
            for batch_id, data in enumerate(reader()):
                if not isinstance(reader, DataLoaderBase):
                    data = feeder.feed(data)
                logits = exe.run(compiled_teacher_program,
                                 scope=scope,
                                 fetch_list=outputs,
                                 feed=data)
                knowledge = dict()
                for index, array in enumerate(logits):
                    knowledge[self.items[index]] = array
                self._write(knowledge)
        return True

    def dist(self, student_program, losses):
        """Building the distillation network
        Args:
            student_program(Program): student program.
            losses(list<Variable>, optional): The losses need to add. If set to None
                              does not add any loss.
        Return:
            (Program): Program for distillation.
            (startup_program): Program for initializing distillation network.
            (reader): Data reader for distillation training.
            (Variable): Loss of distillation training
        """

    def loss(self, loss_func, *variables):
        """User-defined loss
        Args:
            loss_func(func): Function used to define loss.
            *variables(list<str>): Variable name list.
        Return:
            (Variable): Distillation loss.
        """
        pass

    def fsp_loss(self):
        """fsp loss
        """
        pass

    def l2_loss(self):
        """l2 loss
        """
        pass

    def softlabel_loss(self):
        """softlabel_loss
        """
        pass
Ejemplo n.º 6
0
from args import print_arguments, parse_args
from utils import tdm_sampler_prepare, tdm_child_prepare, tdm_emb_prepare

from train_network import TdmTrainNet

logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger("fluid")
logger.setLevel(logging.INFO)

hadoop_home = os.getenv("HADOOP_HOME")
configs = {
    "fs.default.name": os.getenv("FS_NAME"),
    "hadoop.job.ugi": os.getenv("FS_UGI")
}
client = HDFSClient(hadoop_home, configs)


def get_dataset(inputs, args):
    """get dataset"""
    dataset = fluid.DatasetFactory().create_dataset()
    dataset.set_use_var(inputs)
    dataset.set_pipe_command("python ./dataset_generator.py")
    dataset.set_batch_size(args.batch_size)
    dataset.set_thread(int(args.cpu_num))
    file_list = [
        str(args.train_files_path) + "/%s" % x
        for x in os.listdir(args.train_files_path)
    ]

    # 请确保每一个训练节点都持有不同的训练文件
Ejemplo n.º 7
0
        "fc_3.w_0", "fc_3.b_0", "fc_4.w_0", "fc_4.b_0", "fc_5.w_0", "fc_5.b_0"
    ]

    if fleet.is_server():
        fleet.run_server()
    elif fleet.is_worker():
        with fluid.scope_guard(scope3):
            exe.run(update_model._startup_program)
        with fluid.scope_guard(scope2):
            exe.run(join_common_model._startup_program)

        configs = {
            "fs.default.name": config.fs_name,
            "hadoop.job.ugi": config.fs_ugi
        }
        hdfs_client = HDFSClient("$HADOOP_HOME", configs)

        save_first_base = config.save_first_base
        path = config.train_data_path
        online_pass_interval = fleet_util.get_online_pass_interval(
            config.days, config.hours, config.split_interval,
            config.split_per_pass, False)
        pass_per_day = len(online_pass_interval)
        last_day, last_pass, last_path, xbox_base_key = [
            -1, -1, "", 123
        ]  #fleet_util.get_last_save_model(config.output_path, config.fs_name, config.fs_ugi)
        reqi = True if last_day != -1 else False

        if config.need_reqi_changeslot and config.reqi_dnn_plugin_day >= last_day and config.reqi_dnn_plugin_pass >= last_pass:
            pass
            # reqi_changeslot(config.hdfs_dnn_plugin_path, join_save_params, common_save_params, update_save_params, scope2, scope3)
Ejemplo n.º 8
0
 def test_hdfs_checkpoint(self):
     fs = HDFSClient("/usr/local/hadoop-2.7.7", None)
     dir_path = "./checkpoint_test_hdfs"
     self._test_checkpoint(fs, os.path.abspath(dir_path))