コード例 #1
0
    def _xlearning_submit(self, job_cmd, worker_num, data_names):
        # prepare result hdfs data path
        for name in data_names:
            path = os.path.join(self._hdfs_dir, f'{name}_pred')
            if hdfs.exists(path):
                hdfs.rm(path)
            hdfs.mkdir(path)

        inputs = [f'--input {self._hdfs_dir}/{i}#{i}' for i in data_names]

        cmd = ' '.join([
            f'{xlearning.XL_SUBMIT}',
            f'--app-type "tensorflow"',
            f'--app-name "prediction-{self._job_id}"',
            f'--cacheArchive {HDFS_CODE_CACHE}#libs,{PYTHON_ENV_CACHE}#python3',
            f'--launch-cmd "{job_cmd}"',
            f'--worker-memory {xlearning.WORKER_MEMORY}',
            f'--worker-num {worker_num}',
            # f'--worker-cores {XLearningConfig["worker_cores"]}',
            f'--ps-num {xlearning.PS_NUM}',
            f'--queue default',
            f'--user-path ./python3/bin',
            f'--board-enable false',
            f'--jars {xlearning.JARS}',
        ] + inputs)

        logger.info(cmd)
        run_cmd(cmd)
コード例 #2
0
    def readDays(self, start_date, end_date, prop, session=None, **kwargs):
        sqlList = self._feature.get_day_sql_list(start_date, end_date,
                                                 **kwargs)
        retDf = None
        for s, d in sqlList:

            kwargs[self._feature._data_date_col] = d
            output_file = self._feature.get_output_name(d, **kwargs)
            output_path = hadoop_conf.HDFS_FEATURE_ROOT + '/' + self._feature._name + '/' + output_file
            df = None
            if hdfs.exists(output_path):
                logger.info(
                    "feature {name} file {path} is exist! we will use file.".
                    format(name=self._feature._name, path=output_path))
                df = session.read.parquet(output_path)
            else:
                logger.info(
                    "feature {name} file {path} is not exist! we get data from clickhouse."
                    .format(name=self._feature._name, path=output_path))
                s = self.jdbc_sql(s)
                df = session.read.jdbc(self._url, s, properties=prop)
                df.write.parquet(path=output_path, mode='overwrite')
            if not retDf:
                retDf = df
            else:
                retDf = retDf.union(df)
        return retDf
コード例 #3
0
def pack_libs(overwrite=True):
    path = os.path.join(os.getcwd(), LIB_NAME)
    zip_path = f'{LIB_NAME}.zip'
    zip_dir(path, zip_path)

    from conf.hadoop import HDFS_CODE_CACHE
    if hdfs.exists(HDFS_CODE_CACHE):
        if not overwrite:
            logger.info(f'{zip_path} is exist! {HDFS_CODE_CACHE}')
            return
        hdfs.rm(HDFS_CODE_CACHE)
    hdfs.mkdir(os.path.dirname(HDFS_CODE_CACHE))

    hdfs.put(zip_path, HDFS_CODE_CACHE)

    logger.info(f'success upload {zip_path} to {HDFS_CODE_CACHE}')
コード例 #4
0
    def _xlearning_submit(self, epoch, batch_size, worker_num, input_dim,
                          data_name):
        output_path = os.path.join(self._hdfs_dir, self.get_model_name())
        if hdfs.exists(output_path):
            hdfs.rm(output_path)

        entrance = self.get_worker_entrance()
        logger.info('[%s] start xlearning submit ...', self._job_id)

        worker_cmd = ' '.join([
            f'{WORKER_PYTHON} {entrance}',
            f'--job_id={self._job_id}',
            f'--hdfs_dir={self._hdfs_dir}',
            f'--data={data_name}',
            f'--model={self.get_model_name()}',
            f'--log_dir={_training_log_dir}',
            f'--training_epochs={epoch}',
            f'--input_dim={input_dim}',
            f'--learning_rate={self._learning_rate}',
            f'--batch_size={batch_size}',
            f'--l2={self._l2}',
        ])

        driver_cmd = ' '.join([
            f'{xlearning.XL_SUBMIT}',
            f'--app-type "tensorflow"',
            f'--app-name "CTR-{self._job_id}"',
            f'--launch-cmd "{worker_cmd}"',
            f'--input {self._hdfs_dir}/{data_name}#{data_name}',
            f'--output {self._hdfs_dir}/{self.get_model_name()}#{self.get_model_name()}',
            f'--board-logdir {_training_log_dir}',
            f'--cacheArchive {HDFS_CODE_CACHE}#libs,{PYTHON_ENV_CACHE}#python3',
            f'--worker-memory {xlearning.WORKER_MEMORY}',
            f'--worker-num {worker_num}',
            f'--worker-cores {xlearning.WORKER_CORES}',
            f'--ps-memory {xlearning.PS_MEMORY}',
            f'--ps-num {xlearning.PS_NUM}',
            f'--ps-cores {xlearning.PS_CORES}',
            f'--queue default',
            f'--user-path ./python3/bin',
            f'--jars {xlearning.JARS}',
            # '-Duser.timezone=UTC+0800',
        ])
        logger.info(driver_cmd)

        run_cmd(driver_cmd)
        logger.info('finish training process successful.')
コード例 #5
0
def init_hdfs_dir(hdfs_dir, clean_old=True):
    if hdfs.exists(hdfs_dir) and clean_old:
        hdfs.rm(hdfs_dir)
コード例 #6
0
def clean_task_dir(runtime):
    if hdfs.exists(runtime.hdfs_dir):
        hdfs.rm(runtime.hdfs_dir)

    if os.path.exists(runtime.local_dir):
        shutil.rmtree(runtime.local_dir)
コード例 #7
0
    def readDaysWithSql(self,
                        start_date,
                        end_date,
                        sql_template,
                        output_template,
                        prop,
                        batch_cond=None,
                        use_jdbc=True,
                        session=None,
                        suffix="",
                        **kwargs):
        sqlList = []
        hdfs_files_list = []
        if batch_cond:
            for cond in batch_cond:
                kwargs.update(cond)
                sl = self._feature.get_day_sql_list(start_date, end_date,
                                                    sql_template, **kwargs)
                for sql, day in sl:
                    sqlList.append((sql, day, cond))
        else:
            sl = self._feature.get_day_sql_list(start_date, end_date,
                                                sql_template, **kwargs)
            for sql, day in sl:
                sqlList.append((sql, day, {}))

        ret_df = None
        for s, d, cond in sqlList:
            kwargs[self._feature._data_date_col] = d
            kwargs.update(cond)
            output_file = output_template.format(**kwargs) + suffix
            output_path = hadoop_conf.HDFS_FEATURE_ROOT + '/' + self._feature._name + '/' + output_file
            if hdfs.exists(output_path):
                logger.info(
                    "feature {name} file {path} is exist! we will use file.".
                    format(name=self._feature._name, path=output_path))
                hdfs_files_list.append(output_path)
                df = None
            else:
                logger.info(
                    "feature {name} file {path} is not exist! we get data from clickhouse."
                    .format(name=self._feature._name, path=output_path))
                if use_jdbc:
                    s = self.jdbc_sql(s)
                    df = session.read.jdbc(self._url, s, properties=prop)
                else:
                    df = session.sql(sql)
                df.write.parquet(path=output_path, mode='overwrite')
            if not ret_df:
                ret_df = df
            else:
                if df:
                    ret_df = ret_df.union(df)

        if not ret_df and len(hdfs_files_list) > 0:
            ret_df = session.read.parquet(*hdfs_files_list)
        else:
            df = None
            if len(hdfs_files_list) > 0:
                df = session.read.parquet(*hdfs_files_list)

            if df:
                ret_df.union(df)
        return ret_df