Esempio n. 1
0
    def __init__(self):
        self._engine = None

        self._steps = None
        self._lang = None
        self._log_level = None
        self._log_dir = None
        self._mode = None

        config.setting(config.sys("STEPS"), )("_SYS")
Esempio n. 2
0
class MyModel(ModelBase):

    cfg = config.setting(config.req("MODEL.learning_rate"),
                         config.req("MODEL.classes"),
                         config.req("MODEL.layers"),
                         config.opt("MODEL.batch_size", 8))

    def __init__(self, fmap):
        super(MyModel, self).__init__(fmap)

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.compute_loss = tf.keras.losses.SparseCategoricalCrossentropy()

        self.mean_loss = tf.keras.metrics.Mean()
        self.acc = tf.keras.metrics.SparseCategoricalAccuracy()

        self.metrics = {"mean_loss": self.mean_loss, "acc": self.acc}
        self.msg_frac = 10

    def build(self):

        concat_list = self.get_inputs(tp="nums")
        images = tf.concat(concat_list, axis=1)
        images = tf.reshape(images, (-1, 28, 28, 1))

        output = CNN(n_class=10)(images)

        arg_max = tf.argmax(output, axis=1)
        self.set_output(output, "softmax")
        self.set_output(arg_max, "argmax")

    @tf.function
    def train(self, feature, label):
        _label = label["label"]

        with tf.GradientTape() as tape:
            output, _ = self.model(feature)
            loss = self.compute_loss(_label, output)

        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        self.mean_loss(loss)
        self.acc(_label, output)

    @tf.function
    def evaluate(self, feature, label):
        _label = label["label"]

        output, _ = self.model(feature)
        loss = self.compute_loss(_label, output)
        self.mean_loss(loss)
        self.acc(_label, output)

    @tf.function
    def predict(self, feature):
        pred = self.model(feature)
        return pred
Esempio n. 3
0
class _Root(TaskNode):

    parent_tag = TaskNode.set_tag()
    output_tag = TaskNode.set_tag(UNIVERSAL_TAG, "_START", "_ROOT")

    cfg = config.setting(config.req("MODEL_TAG"), config.req("MODEL_DATE"),
                         config.req("HDFS_WORKSPACE"),
                         config.req("PRIMARY_KEYS"), config.req("FEATURE_TAG"),
                         config.req("FEATURE_DATE"),
                         config.opt("HDFS_FEATURE_DIR", _HDFS_FEATURE_DIR),
                         config.opt("HDFS_MODEL_DIR", _HDFS_MODEL_DIR),
                         config.opt("HDFS_FEMODEL_DIR", _HDFS_FEMODEL_DIR),
                         config.opt("LOCAL_WORKSPACE", "./dlflow_default"),
                         config.opt("LOCAL_MODEL_DIR", _LOCAL_MODEL_DIR),
                         config.opt("LOCAL_FEMODEL_DIR", _LOCAL_FEMODEL_DIR),
                         config.opt("LOCAL_TMP_DIR", _LOCAL_TMP_DIR),
                         config.opt("DROP_COLUMNS", []))

    def __init__(self):
        super(_Root, self).__init__()

    @TaskNode.timeit
    def run(self):
        def solve_local_path(raw_path):
            path = Path(raw_path).resolve()
            if not path.is_dir():
                path.mkdir(parents=True)

            return path.as_posix()

        config.LOCAL_WORKSPACE = solve_local_path(config.LOCAL_WORKSPACE)
        config.LOCAL_MODEL_DIR = solve_local_path(config.LOCAL_MODEL_DIR)
        config.LOCAL_FEMODEL_DIR = solve_local_path(config.LOCAL_FEMODEL_DIR)
        config.LOCAL_TMP_DIR = solve_local_path(config.LOCAL_TMP_DIR)

        def seq_conf_parser(v, sign=","):
            if isinstance(v, str):
                iter_v = v.split(sign)
            elif isinstance(v, list):
                iter_v = v
            else:
                iter_v = []

            return [i for i in map(lambda x: x.strip(), iter_v) if i]

        config.PRIMARY_KEYS = seq_conf_parser(config.PRIMARY_KEYS)
        config.LABELS = seq_conf_parser(config.LABELS)
        config.DROP_COLUMNS = seq_conf_parser(config.DROP_COLUMNS)

        if "SPARK" in config:
            app_name = ".".join(["DLFlow", config.uuid])
            spark_conf = config.SPARK.dense_dict if config.SPARK else {}
            spark_app = SparkBaseApp()
            spark_app.initialize_spark(app_name, spark_conf)

        else:
            hdfs = HDFS()
            hdfs.initialize_hdfs()
Esempio n. 4
0
class _Merge(TaskNode):

    parent_tag = TaskNode.set_tag("_START")
    output_tag = TaskNode.set_tag("RAW_FEATURE")

    cfg = config.setting(config.req("SPARK"), config.req("MERGE.config_file"),
                         config.req("MERGE.fit"), config.req("MERGE.seed_sql"))

    def __init__(self):
        super(_Merge, self).__init__()

    @TaskNode.timeit
    def run(self):
        fit = config.MERGE.fit

        seed_sql = config.MERGE.seed_sql

        feature_config_file = Path(config.MERGE.config_file).resolve()

        if fit:
            assert feature_config_file.is_file(), \
                i18n("Input feature configuration is not exists "
                     "when fit=true: {}") \
                .format(feature_config_file)

        _spark_conf = config.SPARK.dense_dict
        params = []
        for k, v in _spark_conf.items():
            if k.startswith("spark."):
                _param = '--conf "{}={}"'.format(k, v)
            else:
                _param = '--{} "{}"'.format(k, v)

            params.append(_param)

        spark_conf = "\n".join(params)

        _model_dir = Path(config.HDFS_FEMODEL_DIR)
        hdfs_merge_dir = _model_dir.joinpath("merge").as_posix()

        out_dir = Path(config.RAW_FEATURES).as_posix()

        spark_submit = SparkJARExec(spark_conf=spark_conf,
                                    seed_sql=seed_sql,
                                    feature_config=feature_config_file,
                                    feature_date=config.FEATURE_DATE,
                                    feature_model_hdfs=hdfs_merge_dir,
                                    feature_out_hdfs=out_dir,
                                    fit=fit)

        spark_submit.run()

        logging.info(i18n("Feature merge done."))
Esempio n. 5
0
class DemoInput(InputBase):

    cfg = config.setting(config.opt("DemoParam", "DemoDefaultValue"))

    def __init__(self, fmap):
        super(DemoInput, self).__init__(fmap)

    def tfr_inputs(self, files):
        ...

    def rdd_inputs(self, rdd):
        ...
Esempio n. 6
0
class DemoTask(TaskNode):

    parent_tag = TaskNode.set_tag("PARENT_TAG")
    output_tag = TaskNode.set_tag("OUTPUT_TAG")

    bind_tasks = "task name or list of tasks"

    cfg = config.setting(
        config.req("DemoParam")
    )

    def __init__(self):
        super(DemoTask, self).__init__()

    @TaskNode.timeit
    def run(self):
        logging.info("Running {}".format(self.__class__.__name__))
Esempio n. 7
0
class DemoModel(ModelBase):

    cfg = config.setting(config.opt("DemoParam", "DemoDefaultValue"))

    def __init__(self, fmap):
        super(DemoModel, self).__init__(fmap)

    def build(self):
        ...

    def train(self, feature, label):
        ...

    def evaluate(self, feature, label):
        ...

    def predict(self, feature):
        ...
Esempio n. 8
0
class DataPrepare(TaskNode):

    parent_tag = TaskNode.set_tag("_START")
    output_tag = TaskNode.set_tag("RAW_FEATURE")

    cfg = config.setting(
        config.req("SPARK"),
        config.req("IRIS_DATASET_FOR"),
        config.opt("DATA_DIR", "$LOCAL_WORKSPACE/data_dir"),
        config.opt("HDFS_FEATURE_DIR", _HDFS_FEATURE_DIR),
    )

    def __init__(self):
        super(DataPrepare, self).__init__()

    @TaskNode.timeit
    def run(self):

        spark_app = SparkBaseApp()
        spark = spark_app.spark
        hdfs = spark_app.hdfs

        hdfs_save_path = Path(config.HDFS_FEATURE_DIR)

        if hdfs.exists(hdfs_save_path / "_SUCCESS"):
            logging.info("Exists data found. {}".format(hdfs_save_path))
            logging.info("Data is ready! Skip prepare...")
            return

        train_data_path, test_data_path = get_data()
        if config.IRIS_DATASET_FOR == "train":
            local_data_path = train_data_path
        else:
            local_data_path = test_data_path

        pdf = pd.read_csv(local_data_path, header=None)
        pdf.columns = data_header
        pdf["idx"] = [i for i in range(1, len(pdf) + 1)]

        df = spark.createDataFrame(pdf)

        logging.info("Saving Dataset to {}".format(hdfs_save_path))
        df.repartition(1).write.save(hdfs_save_path.as_posix())
Esempio n. 9
0
class _Predict(TaskNode):

    parent_tag = TaskNode.set_tag("_BUILD", "TRAINED_MODEL", "ENCODE_FEATURE")
    output_tag = TaskNode.set_tag("PREDICT_RESULT")

    bind_tasks = "_Build"

    cfg = config.setting(
        config.req("SPARK"),
        config.opt("HDFS_PREDICT_DIR", _HDFS_PREDICT_DIR),
    )

    def __init__(self):
        super(_Predict, self).__init__()

    @TaskNode.timeit
    def run(self):
        spark_app = SparkBaseApp()
        spark = spark_app.spark
        sc = spark_app.sc
        hdfs = spark_app.hdfs

        dirs = config._build_dirs
        tmp_fmap_dir = dirs["tmp_fmap_dir"]
        hdfs_ckpt_dir = dirs["hdfs_ckpt_dir"]
        hdfs_static_dir = dirs["hdfs_static_dir"]

        sc.addFile(hdfs.hdfs_whole_path(hdfs_static_dir.as_posix()),
                   recursive=True)
        sc.addFile(hdfs.hdfs_whole_path(hdfs_ckpt_dir.as_posix()),
                   recursive=True)

        fmap = Fmap.load(tmp_fmap_dir)

        bc_static_model_dir = sc.broadcast("static")
        bc_fmap = sc.broadcast(fmap)
        bc_config = sc.broadcast(config)

        def predict_map(rdd):
            from pyspark.files import SparkFiles

            config = bc_config.value
            fmap = bc_fmap.value
            static_dir = SparkFiles.get(bc_static_model_dir.value)
            ckpt_dir = SparkFiles.get("ckpt")

            from dlflow.mgr import Collector, model
            collect = Collector()
            collect(static_dir, "Static models")

            input_cls = model[config.MODEL.input_name]
            dataset = input_cls(fmap).rdd_inputs(rdd, config.MODEL.batch_size)

            model_cls = model[config.MODEL.model_name]
            model_ins = model_cls(fmap)
            model_ins.load_model(ckpt_dir)

            return model_ins.predict_act(dataset)

        local_model = model[config.MODEL.model_name](fmap)
        df_title = local_model.pkey_names
        df_title.extend(local_model.output_names)

        df = spark.read.parquet(config.HDFS_ENCODE_DIR)

        parallelism = spark.conf.get("spark.default.parallelism", None)
        if parallelism:
            num_partitions = int(parallelism)
        else:
            num_partitions = df.rdd.getNumPartitions()

        pred_df = df.repartition(num_partitions) \
                    .rdd \
                    .mapPartitions(predict_map) \
                    .toDF(df_title)

        hdfs_predict_dir = config.HDFS_PREDICT_DIR
        spark_app.save_compress(pred_df, hdfs_predict_dir)

        logging.info(
            i18n("Predicting result save to {}").format(hdfs_predict_dir))
        logging.info(i18n("Predicting Done."))
Esempio n. 10
0
class DNNRegression(ModelBase):

    cfg = config.setting(config.req("MODEL.learning_rate"),
                         config.req("MODEL.layers"),
                         config.opt("MODEL.batch_size", 128))

    def __init__(self, fmap):
        super(DNNRegression, self).__init__(fmap)

        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=config.MODEL.learning_rate)
        self.compute_loss = tf.keras.losses.MeanSquaredError()

        self.mean_loss = tf.keras.metrics.Mean()

        self.metrics = {
            "mean_loss": self.mean_loss,
        }

        self.msg_frac = 100

    def build(self):

        concat_list = self.get_inputs(tp="nums")
        for ctg_inp, depth in self.get_inputs(tp="ctgs", with_depth=True):
            _emb = _Embedding(depth, 4)(ctg_inp)
            concat_list.append(_emb)

        net = tf.concat(concat_list, axis=1)

        for size in config.MODEL.layers:
            net = tf.keras.layers.Dense(size, activation=tf.nn.relu)(net)

        output = tf.keras.layers.Dense(1)(net)
        sigmoid = tf.nn.sigmoid(output)

        self.set_output(output, "output")
        self.set_output(sigmoid, "sigmoid")

    @tf.function
    def train(self, feature, label):
        _label = label["label"]

        with tf.GradientTape() as tape:
            logits, _ = self.model(feature)
            loss = self.compute_loss(_label, logits)

        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        self.mean_loss(loss)

    @tf.function
    def evaluate(self, feature, label):
        _label = label["label"]

        logits, _ = self.model(feature)
        loss = self.compute_loss(_label, logits)
        self.mean_loss(loss)

    @tf.function
    def predict(self, feature):
        pred = self.model(feature)
        return pred
Esempio n. 11
0
class _Encode(TaskNode):

    parent_tag = TaskNode.set_tag("RAW_FEATURE")
    output_tag = TaskNode.set_tag("ENCODE_FEATURE", "TFRECORD_FEATURE")

    cfg = config.setting(
        config.req("SPARK"),
        config.req("BUCKET", None),
        config.opt("HDFS_ENCODE_DIR", _HDFS_ENCODE_DIR),
    )

    def __init__(self):
        super(_Encode, self).__init__()

    @TaskNode.timeit
    def run(self):
        spark_app = SparkBaseApp()
        spark = spark_app.spark
        hdfs = spark_app.hdfs

        if "HDFS_TFRECORD_DIR" in config:
            hdsf_tfrecord_dir = Path(config.HDFS_TFRECORD_DIR)
            if hdfs.exists(hdsf_tfrecord_dir / "_SUCCESS"):
                logging.info(i18n("TFRecords already exists, skip encoding."))
                return

        elif "HDFS_ENCODE_DIR" in config:
            hdfs_encode_dir = Path(config.HDFS_ENCODE_DIR)
            if hdfs.exists(hdfs_encode_dir / "_SUCCESS"):
                logging.info(
                    i18n("Encode result already exists, encoding done."))
                return

        fmap_dir = "fmap_{}".format(config.uuid)
        tmp_fmap_dir = Path(config.LOCAL_TMP_DIR).joinpath(fmap_dir)

        local_fmap_dir = Path(config.LOCAL_FEMODEL_DIR).joinpath("fmap")
        local_norm_dir = Path(config.LOCAL_FEMODEL_DIR).joinpath("norm")

        hdfs_fmap_dir = Path(config.HDFS_FEMODEL_DIR).joinpath("fmap")
        hdfs_norm_dir = Path(config.HDFS_FEMODEL_DIR).joinpath("norm")

        if not hdfs.exists(config.HDFS_FEMODEL_DIR):
            hdfs.mkdirs(hdfs_fmap_dir.parent)

        spark_parser = Parser("spark")
        parser_cls = spark_parser.get_parser()
        normalizer_cls = spark_parser.get_normalizer()

        if "SPARK.spark.default.parallelism" in config:
            parallelism = int(config.SPARK.spark.default.parallelism)
        else:
            parallelism = _DEFAULT_PARALLELISM

        df = spark.read \
                  .parquet(config.HDFS_FEATURE_DIR) \
                  .repartition(parallelism)

        parser = parser_cls()

        if hdfs.exists(hdfs_fmap_dir.joinpath("fmap.meta")):
            logging.info(i18n("Using HDFS fmap: {}").format(hdfs_fmap_dir))
            hdfs.get(hdfs_fmap_dir, tmp_fmap_dir)

        else:
            logging.info(
                i18n("There is no fmap available, start to "
                     "generate fmap by parsing features."))

            primary_keys = config.PRIMARY_KEYS
            labels = config.LABELS
            drop_columns = config.DROP_COLUMNS
            buckets = None if config.BUCKET is None else config.BUCKET.dict

            parser.fit(df,
                       buckets=buckets,
                       drop_columns=drop_columns,
                       primary_keys=primary_keys,
                       labels=labels)
            parser.save(tmp_fmap_dir)

            logging.info(i18n("Put fmap to HDFS: {}").format(hdfs_fmap_dir))
            hdfs.delete(hdfs_fmap_dir)
            hdfs.put(tmp_fmap_dir, hdfs_fmap_dir)

            if local_fmap_dir.exists():
                logging.warning(
                    i18n("Local directory {} already exists, "
                         "it will be overwritten: {}").format(
                             "fmap", local_fmap_dir))
                shutil.rmtree(local_fmap_dir)
            shutil.copytree(tmp_fmap_dir, local_fmap_dir)

        parser.load(tmp_fmap_dir)
        encode_df = parser.transform(df)

        normalizer = normalizer_cls()

        if hdfs.exists(
                hdfs_norm_dir.joinpath("normalizers_metadata", "_SUCCESS")):
            normalizer.load(hdfs_norm_dir)

        else:
            hdfs.mkdirs(hdfs_norm_dir)

            try:
                bucket_conf = config.BUCKET.dict
            except AttributeError:
                bucket_conf = None
                if config.BUCKET is not None:
                    logging.error(i18n("Get wrong type bucket configuration."))

            normalizer.fit(encode_df, parser.fmap, bucket_conf=bucket_conf)
            normalizer.save(hdfs_norm_dir)

            if local_norm_dir.exists():
                logging.warning(
                    i18n("Local directory {} already exists, "
                         "it will be overwritten: {}").format(
                             "norm", local_norm_dir))
                shutil.rmtree(local_norm_dir)

            hdfs.get(hdfs_norm_dir, local_norm_dir)

        norm_df = normalizer.transform(encode_df)

        spark_app.save_compress(norm_df, config.HDFS_ENCODE_DIR, use_tfr=False)

        if "HDFS_TFRECORD_DIR" in config:
            spark_app.save_compress(norm_df,
                                    config.HDFS_TFRECORD_DIR,
                                    use_tfr=True)

        logging.info(i18n("Encoding done."))
Esempio n. 12
0
class NNDenseInput(InputBase):

    cfg = config.setting(
        config.opt("MODEL.epochs", 1),
        config.opt("MODEL.batch_size", 1),
        config.opt("MODEL.parallel", 4),
        config.opt("MODEL.shuffle_size", None)
    )

    def __init__(self, fmap):
        super(NNDenseInput, self).__init__(fmap)

    def tfr_inputs(self, files):
        """
        For train and evaluate.
        """

        feature_dict = OrderedDict()

        for fe in self.fmap.primary_keys.get_features():
            feature_dict[fe.name] = self._TF_FEATURE[fe.fetype]([1])

        for fe in self.fmap.labels.get_features():
            feature_dict[fe.name] = self._TF_FEATURE[fe.fetype]([1])

        buckets = self.fmap.get_buckets(drop=PRESET_BUCKETS)
        for bucket in buckets:
            nums_size = bucket.nums.fe_size
            ctgs_size = bucket.ctgs.fe_count

            if nums_size > 0:
                name = "_".join([bucket.name, "nums"])
                feature_dict[name] = self._float_feature([nums_size])

            if ctgs_size > 0:
                name = "_".join([bucket.name, "ctgs"])
                feature_dict[name] = self._int_feature([ctgs_size])

        def _parse_single_example(example):
            feature = tf.io.parse_single_example(example, feature_dict)

            return feature

        parallel = config.MODEL.parallel
        dataset = tf.data \
                    .TFRecordDataset(files, num_parallel_reads=parallel) \
                    .map(_parse_single_example, num_parallel_calls=parallel) \
                    .batch(config.MODEL.batch_size) \
                    .repeat(config.MODEL.epochs)

        if config.MODEL.shuffle_size:
            dataset = dataset.shuffle(config.MODEL.shuffle_size)

        return dataset

    def rdd_inputs(self, rdd, batch_size):
        """
        For spark predict.
        """

        primary_keys = []
        features = []

        out_dtype = []
        out_shape = []

        for fe in self.fmap.primary_keys.get_features():
            primary_keys.append(fe.name)
            out_dtype.append(self._TF_TYPE[fe.fetype])
            out_shape.append(tf.TensorShape([fe.size]))

        buckets = self.fmap.get_buckets(drop=PRESET_BUCKETS)
        for bucket in buckets:
            nums_size = bucket.nums.fe_size
            ctgs_size = bucket.ctgs.fe_count

            if nums_size > 0:
                name = "_".join([bucket.name, "nums"])
                features.append(name)
                out_dtype.append(tf.float32)
                out_shape.append(tf.TensorShape(nums_size))

            if ctgs_size > 0:
                name = "_".join([bucket.name, "ctgs"])
                features.append(name)
                out_dtype.append(tf.int64)
                out_shape.append(tf.TensorShape(ctgs_size))

        def rdd_generator():
            for row in rdd:
                row_data = []

                for k in primary_keys:
                    row_data.append([row[k]])

                for k in features:
                    row_data.append(list(row[k]))

                yield tuple(row_data)

        dataset = tf.data.Dataset \
                         .from_generator(generator=rdd_generator,
                                         output_shapes=tuple(out_shape),
                                         output_types=tuple(out_dtype)) \
                         .batch(batch_size)

        return dataset
Esempio n. 13
0
class _Build(TaskNode):

    parent_tag = TaskNode.set_tag("ENCODE_FEATURE", "TFRECORD_FEATURE")
    output_tag = TaskNode.set_tag("_BUILD")

    cfg = config.setting(
        config.req("MODELS_DIR"),
        config.req("MODEL.model_name"),
        config.req("MODEL.input_name"),

        config.opt("HDFS_ENCODE_DIR", _HDFS_ENCODE_DIR),
        config.opt("HDFS_TFRECORD_DIR", _HDFS_TFRECORD_DIR),
    )

    def __init__(self):
        super(_Build, self).__init__()

    @TaskNode.timeit
    def run(self):
        hdfs = HDFS()

        local_static_dir = Path(config.MODELS_DIR).resolve()
        hdfs_static_dir = Path(config.HDFS_MODEL_DIR).joinpath("static")

        fmap_dir = "fmap_{}".format(config.uuid)
        tmp_fmap_dir = Path(config.LOCAL_TMP_DIR).joinpath(fmap_dir)
        local_fmap_dir = Path(config.LOCAL_FEMODEL_DIR).joinpath("fmap")
        hdfs_fmap_dir = Path(config.HDFS_FEMODEL_DIR).joinpath("fmap")

        ckpt_dir = "ckpt_{}".format(config.uuid)
        tmp_ckpt_dir = Path(config.LOCAL_TMP_DIR).joinpath(ckpt_dir)
        local_ckpt_link = Path(config.LOCAL_MODEL_DIR).joinpath("ckpt")
        local_ckpt_dir = Path(config.LOCAL_MODEL_DIR).joinpath(ckpt_dir)
        hdfs_ckpt_dir = Path(config.HDFS_MODEL_DIR).joinpath("ckpt")

        if hdfs.exists(hdfs_fmap_dir.joinpath("fmap.meta")):
            logging.info(
                i18n("Using HDFS fmap: {}").format(hdfs_fmap_dir))
            hdfs.get(hdfs_fmap_dir, tmp_fmap_dir)

        elif local_fmap_dir.joinpath("fmap.meta").exists():
            logging.info(
                i18n("Using local fmap: {}").format(local_fmap_dir))
            shutil.copytree(local_fmap_dir, tmp_fmap_dir)

        else:
            raise FmapNotExists(
                i18n("No available fmap found, pleas "
                     "process feature encoding first."))

        if hdfs.exists(hdfs_ckpt_dir):
            logging.info(
                i18n("Getting ckpt from HDFS: {}").format(hdfs_ckpt_dir))
            hdfs.get(hdfs_ckpt_dir, tmp_ckpt_dir)

        elif local_ckpt_link.resolve().exists():
            logging.info(
                i18n("Using local ckpt: {}").format(local_ckpt_link))
            shutil.copytree(local_ckpt_link.resolve(), tmp_ckpt_dir)

        else:
            logging.info(
                i18n("No available ckpt found, reinitializing..."))

        config._build_dirs = {
            "fmap_dir": fmap_dir,
            "ckpt_dir": ckpt_dir,

            "hdfs_fmap_dir": hdfs_fmap_dir,
            "hdfs_ckpt_dir": hdfs_ckpt_dir,
            "hdfs_static_dir": hdfs_static_dir,

            "tmp_fmap_dir": tmp_fmap_dir,
            "tmp_ckpt_dir": tmp_ckpt_dir,

            "local_fmap_dir": local_fmap_dir,
            "local_ckpt_dir": local_ckpt_dir,
            "local_static_dir": local_static_dir,

            "local_ckpt_link": local_ckpt_link
        }
Esempio n. 14
0
class DNNBinaryClassifier(ModelBase):

    cfg = config.setting(config.req("MODEL.layers"),
                         config.opt("MODEL.learning_rate", 0.001),
                         config.opt("MODEL.batch_size", 128))

    def __init__(self, fmap):
        super(DNNBinaryClassifier, self).__init__(fmap)

        self.optimizer = tf.keras.optimizers.Adam(
            learning_rate=config.MODEL.learning_rate)
        self.compute_loss = tf.keras.losses.BinaryCrossentropy(
            from_logits=True)

        self.mean_loss = tf.keras.metrics.Mean()
        self.acc = tf.keras.metrics.BinaryAccuracy()
        self.auc = tf.keras.metrics.AUC()

        self.metrics = {
            "mean_loss": self.mean_loss,
            "acc": self.acc,
            "auc": self.auc
        }

    def build(self):
        concat_list = self.get_inputs(tp="nums")
        for ctg_inp, depth in self.get_inputs(tp="ctgs", with_depth=True):
            _emb = _Embedding(depth, 6)(ctg_inp)
            concat_list.append(_emb)

        net = tf.concat(concat_list, axis=1)

        for size in config.MODEL.layers:
            net = tf.keras.layers.Dense(size, activation=tf.nn.relu)(net)

        logits = tf.keras.layers.Dense(1)(net)
        sigmoid = tf.nn.sigmoid(logits)

        self.set_output(logits, "logits")
        self.set_output(sigmoid, "sigmoid")

    @tf.function
    def train(self, feature, label):
        _label = label["label"]

        with tf.GradientTape() as tape:
            logits, sigmoid = self.model(feature)
            loss = self.compute_loss(_label, logits)

        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        self.mean_loss(loss)
        self.acc(_label, sigmoid)
        self.auc(_label, sigmoid)

    @tf.function
    def evaluate(self, feature, label):
        _label = label["label"]

        logits, sigmoid = self.model(feature)
        loss = self.compute_loss(_label, logits)
        self.mean_loss(loss)
        self.acc(_label, sigmoid)
        self.auc(_label, sigmoid)

    @tf.function
    def predict(self, feature):
        pred = self.model(feature)
        return pred
Esempio n. 15
0
class MyModel(ModelBase):

    cfg = config.setting(config.req("MODEL.learning_rate"),
                         config.req("MODEL.classes"),
                         config.req("MODEL.layers"),
                         config.opt("MODEL.batch_size", 8))

    def __init__(self, fmap):
        super(MyModel, self).__init__(fmap)

        self.optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
        self.compute_loss = tf.keras.losses.SparseCategoricalCrossentropy()

        self.mean_loss = tf.keras.metrics.Mean()
        self.acc = tf.keras.metrics.SparseCategoricalAccuracy()

        self.metrics = {"mean_loss": self.mean_loss, "acc": self.acc}
        self.msg_frac = 10

    def build(self):

        concat_list = self.get_inputs(tp="nums")
        for ctg_inp, depth in self.get_inputs(tp="ctgs", with_depth=True):
            _emb = _Embedding(depth, 6)(ctg_inp)
            concat_list.append(_emb)

        net = tf.concat(concat_list, axis=1)

        for size in config.MODEL.layers:
            net = tf.keras.layers.Dense(size, activation=tf.nn.relu)(net)

        output = tf.keras.layers.Dense(config.MODEL.classes,
                                       activation=tf.nn.softmax)(net)

        arg_max = tf.argmax(output, axis=1)

        self.set_output(output, "softmax")
        self.set_output(arg_max, "argmax")

    @tf.function
    def train(self, feature, label):
        _label = label["species"]

        with tf.GradientTape() as tape:
            output, _ = self.model(feature)
            loss = self.compute_loss(_label, output)

        grads = tape.gradient(loss, self.model.trainable_variables)
        self.optimizer.apply_gradients(
            zip(grads, self.model.trainable_variables))

        self.mean_loss(loss)
        self.acc(_label, output)

    @tf.function
    def evaluate(self, feature, label):
        _label = label["species"]

        output, _ = self.model(feature)
        loss = self.compute_loss(_label, output)
        self.mean_loss(loss)
        self.acc(_label, output)

    @tf.function
    def predict(self, feature):
        pred = self.model(feature)
        return pred