Ejemplo n.º 1
0
 def setUp(self):
     super().setUp()
     settings.CLIENT_CONFIG.is_managed = False
     settings.CLIENT_CONFIG.is_offline = True
     os.environ[POLYAXON_KEYS_COLLECT_ARTIFACTS] = "false"
     os.environ[POLYAXON_KEYS_COLLECT_RESOURCES] = "false"
     with patch("polyaxon.tracking.run.Run._set_exit_handler") as exit_mock:
         self.run = Run(project="test.test", run_uuid="uid")
     assert exit_mock.call_count == 1
Ejemplo n.º 2
0
 def __init__(self, learn, run=None, monitor="auto", mode="auto"):
     super().__init__(learn, monitor=monitor, mode=mode)
     if monitor is None:
         # use default TrackerCallback monitor value
         super().__init__(learn, mode=mode)
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
Ejemplo n.º 3
0
 def __init__(self,
              tensors,
              run=None,
              every_n_iter=None,
              every_n_secs=None):
     super(PolyaxonLoggingTensorHook,
           self).__init__(tensors=tensors,
                          every_n_iter=every_n_iter,
                          every_n_secs=every_n_secs)
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
Ejemplo n.º 4
0
 def __init__(self,
              tensors,
              run=None,
              every_num_iterations=None,
              every_n_secs=None):
     super().__init__(
         tensors=tensors,
         every_num_iterations=every_num_iterations,
         every_n_secs=every_n_secs,
     )
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
Ejemplo n.º 5
0
def _run(ctx, name, owner, project_name, description, tags, specification,
         log):
    docker = DockerOperator()
    if not docker.check():
        raise PolyaxonException("Docker is required to run this command.")

    # Create Build
    project = "{}.{}".format(owner, project_name)
    build_job = Run(project=project)

    specification = CompiledOperationSpecification.apply_operation_contexts(
        specification)
    content = specification.to_dict(dump=True)
    build_job.create(name=name,
                     description=description,
                     tags=tags,
                     content=content)
    image = _create_docker_build(build_job, specification, project)

    experiment = Run(project=project)
    experiment.create(name=name,
                      tags=tags,
                      description=description,
                      content=content)

    cmd_args = ["run", "--rm"]
    data_paths, bind_mounts = _get_data_bind_mounts(specification.data_refs)
    for key, value in _get_env_vars(
            project=project,
            experiment_id=experiment.experiment_id,
            params=specification.params,
            data_paths=data_paths,
    ):
        cmd_args += ["-e", "{key}={value}".format(key=key, value=value)]
    cmd_args += _get_config_volume()
    cmd_args += _get_data_volumes(bind_mounts)
    cmd_args += [image]

    # Add cmd.run
    _, args = specification.container.get_container_command_args()
    for arg in args:
        cmd_args += arg
    try:
        print(cmd_args)
        docker.execute(cmd_args, stream=True)
    except Exception as e:
        handle_cli_error(e, message="Could start local run.")
        sys.exit(1)
Ejemplo n.º 6
0
    def __init__(self, *args: Any, **kwargs: Any):
        try:
            from polyaxon.tracking import Run

            self.experiment = Run(*args, **kwargs)
        except ImportError:
            try:
                from polyaxon_client.tracking import Experiment

                self.experiment = Experiment(*args, **kwargs)
            except ImportError:
                raise RuntimeError(
                    "This contrib module requires polyaxon to be installed.\n"
                    "For Polyaxon v1.x please install it with command: \n pip install polyaxon\n"
                    "For Polyaxon v0.x please install it with command: \n pip install polyaxon-client"
                )
Ejemplo n.º 7
0
 def __init__(self, summary_op=None, steps_per_log=1000, run=None):
     self._summary_op = summary_op
     self._steps_per_log = steps_per_log
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
Ejemplo n.º 8
0
        n_estimators=n_estimators,
        max_features=max_features,
        min_samples_leaf=min_samples_leaf,
    )
    return cross_val_score(classifier, X, y, cv=5)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--n_estimators', type=int, default=3)
    parser.add_argument('--max_features', type=int, default=3)
    parser.add_argument('--min_samples_leaf', type=int, default=80)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='random-forest')
    experiment.create(tags=['examples', 'scikit-learn'])
    experiment.log_inputs(n_estimators=args.n_estimators,
                          max_features=args.max_features,
                          min_samples_leaf=args.min_samples_leaf)

    (X, y) = load_data()

    # Polyaxon
    experiment.log_data_ref(content=X, name='dataset_X')
    experiment.log_data_ref(content=y, name='dataset_y')

    accuracies = model(X=X,
                       y=y,
                       n_estimators=args.n_estimators,
                       max_features=args.max_features,
Ejemplo n.º 9
0
        '--l1_ratio',
        type=float,
        default=1.0)
    parser.add_argument(
        '--max_iter',
        type=int,
        default=1000)
    parser.add_argument(
        '--tol',
        type=float,
        default=0.001
    )
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='sgd-classifier')
    experiment.create(tags=['examples', 'scikit-learn'])
    experiment.log_inputs(loss=args.loss,
                          penalty=args.penalty,
                          l1_ratio=args.l1_ratio,
                          max_iter=args.max_iter,
                          tol=args.tol)

    (X, y) = load_data()

    # Polyaxon
    experiment.log_data_ref(content=X, name='dataset_X')
    experiment.log_data_ref(content=y, name='dataset_y')

    accuracies = model(X=X,
                       y=y,
Ejemplo n.º 10
0
 def __init__(self, *args, **kwargs):
     self.experiment = kwargs.get("run", Run(*args, **kwargs))
Ejemplo n.º 11
0
class TestEventsSummaries(BaseTestCase):
    def setUp(self):
        super().setUp()
        settings.CLIENT_CONFIG.is_managed = False
        settings.CLIENT_CONFIG.is_offline = True
        os.environ[POLYAXON_KEYS_COLLECT_ARTIFACTS] = "false"
        os.environ[POLYAXON_KEYS_COLLECT_RESOURCES] = "false"
        with patch("polyaxon.tracking.run.Run._set_exit_handler") as exit_mock:
            self.run = Run(project="test.test", run_uuid="uid")
        assert exit_mock.call_count == 1

    def test_metrics_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="metric",
            last_check=None,
        )
        events = V1Events.read(
            name="metric_events",
            kind="metric",
            data=os.path.abspath(
                "tests/fixtures/polyboard/metric/metric_events.plx"),
        )
        assert events.name == "metric_events"
        assert summaries == [
            V1RunArtifact(
                name="metric_events",
                kind="metric",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/metric/metric_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {"metric_events": 0.3}

    def test_images_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="image",
            last_check=None,
        )
        events = V1Events.read(
            name="image_events",
            kind="image",
            data=os.path.abspath(
                "tests/fixtures/polyboard/image/image_events.plx"),
        )
        assert events.name == "image_events"
        assert summaries == [
            V1RunArtifact(
                name="image_events",
                kind="image",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/image/image_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_histograms_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="histogram",
            last_check=None,
        )
        events = V1Events.read(
            name="histogram_events",
            kind="histogram",
            data=os.path.abspath(
                "tests/fixtures/polyboard/histogram/histogram_events.plx"),
        )
        assert events.name == "histogram_events"
        assert summaries == [
            V1RunArtifact(
                name="histogram_events",
                kind="histogram",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/histogram/histogram_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_videos_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="video",
            last_check=None,
        )
        events = V1Events.read(
            name="video_events",
            kind="video",
            data=os.path.abspath(
                "tests/fixtures/polyboard/video/video_events.plx"),
        )
        assert events.name == "video_events"
        assert summaries == [
            V1RunArtifact(
                name="video_events",
                kind="video",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/video/video_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_audios_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="audio",
            last_check=None,
        )
        events = V1Events.read(
            name="audio_events",
            kind="audio",
            data=os.path.abspath(
                "tests/fixtures/polyboard/audio/audio_events.plx"),
        )
        assert events.name == "audio_events"
        assert summaries == [
            V1RunArtifact(
                name="audio_events",
                kind="audio",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/audio/audio_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_htmls_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="html",
            last_check=None,
        )
        events = V1Events.read(
            name="html_events",
            kind="html",
            data=os.path.abspath(
                "tests/fixtures/polyboard/html/html_events.plx"),
        )
        assert events.name == "html_events"
        assert summaries == [
            V1RunArtifact(
                name="html_events",
                kind="html",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/html/html_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_charts_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="chart",
            last_check=None,
        )
        events = V1Events.read(
            name="chart_events",
            kind="chart",
            data=os.path.abspath(
                "tests/fixtures/polyboard/chart/chart_events.plx"),
        )
        assert events.name == "chart_events"
        assert summaries == [
            V1RunArtifact(
                name="chart_events",
                kind="chart",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/chart/chart_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_curves_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="curve",
            last_check=None,
        )
        events = V1Events.read(
            name="curve_events",
            kind="curve",
            data=os.path.abspath(
                "tests/fixtures/polyboard/curve/curve_events.plx"),
        )
        assert events.name == "curve_events"
        assert summaries == [
            V1RunArtifact(
                name="curve_events",
                kind="curve",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/curve/curve_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_artifacts_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="artifact",
            last_check=None,
        )
        events = V1Events.read(
            name="artifact_events",
            kind="artifact",
            data=os.path.abspath(
                "tests/fixtures/polyboard/artifact/artifact_events.plx"),
        )
        assert events.name == "artifact_events"
        assert summaries == [
            V1RunArtifact(
                name="artifact_events",
                kind="artifact",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/artifact/artifact_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}

    def test_models_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="model",
            last_check=None,
        )
        summaries = {s.name: s for s in summaries}
        events = V1Events.read(
            name="model_events",
            kind="model",
            data=os.path.abspath(
                "tests/fixtures/polyboard/model/model_events.plx"),
        )
        assert events.name == "model_events"
        assert summaries["model_events"] == V1RunArtifact(
            name="model_events",
            kind="model",
            connection=None,
            summary=events.get_summary(),
            path="tests/fixtures/polyboard/model/model_events.plx",
            is_input=False,
        )

        events_without_step = V1Events.read(
            name="model_events_without_step",
            kind="model",
            data=os.path.abspath(
                "tests/fixtures/polyboard/model/model_events_without_step.plx"
            ),
        )
        assert events_without_step.name == "model_events_without_step"
        assert summaries["model_events_without_step"] == V1RunArtifact(
            name="model_events_without_step",
            kind="model",
            connection=None,
            summary=events_without_step.get_summary(),
            path="tests/fixtures/polyboard/model/model_events_without_step.plx",
            is_input=False,
        )
        assert last_values == {}

    def test_dataframes_summaries(self):
        summaries, last_values = self.run._sync_events_summaries(
            events_path="tests/fixtures/polyboard",
            events_kind="dataframe",
            last_check=None,
        )
        events = V1Events.read(
            name="dataframe_events",
            kind="dataframe",
            data=os.path.abspath(
                "tests/fixtures/polyboard/dataframe/dataframe_events.plx"),
        )
        assert events.name == "dataframe_events"
        assert summaries == [
            V1RunArtifact(
                name="dataframe_events",
                kind="dataframe",
                connection=None,
                summary=events.get_summary(),
                path="tests/fixtures/polyboard/dataframe/dataframe_events.plx",
                is_input=False,
            )
        ]
        assert last_values == {}
Ejemplo n.º 12
0
    )
    model.fit(X_train, y_train)
    pred = model.predict(X_test)
    return accuracy_score(pred, y_test)


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--max_depth', type=int, default=3)
    parser.add_argument('--num_rounds', type=int, default=10)
    parser.add_argument('--min_child_weight', type=int, default=5)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='iris')
    experiment.create(tags=['examples', 'xgboost'])
    experiment.log_inputs(log_learning_rate=args.log_learning_rate,
                          max_depth=args.max_depth,
                          num_rounds=args.num_rounds,
                          min_child_weight=args.min_child_weight)

    iris = load_iris()
    X = iris.data
    Y = iris.target

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2)

    # Polyaxon
    experiment.log_data_ref(content=X_train, name='x_train')
    experiment.log_data_ref(content=y_train, name='y_train')
Ejemplo n.º 13
0
 def __init__(self, learn, run=None, monitor="val_loss", mode="auto"):
     super(PolyaxonFastai, self).__init__(learn, monitor=monitor, mode=mode)
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
Ejemplo n.º 14
0
        '--l1_ratio',
        type=float,
        default=1.0)
    parser.add_argument(
        '--max_iter',
        type=int,
        default=1000)
    parser.add_argument(
        '--tol',
        type=float,
        default=0.001
    )
    args = parser.parse_args()

    # Polyaxon
    experiment = Run()

    (X, y) = load_data()

    # Polyaxon
    experiment.log_data_ref(content=X, name='dataset_X')
    experiment.log_data_ref(content=y, name='dataset_y')

    accuracies = model(X=X,
                       y=y,
                       loss=args.loss,
                       penalty=args.penalty,
                       l1_ratio=args.l1_ratio,
                       max_iter=args.max_iter,
                       tol=args.tol)
    accuracy_mean, accuracy_std = (np.mean(accuracies), np.std(accuracies))
Ejemplo n.º 15
0
def main(unused_argv):

    # Horovod: initialize Horovod.
    hvd.init()

    # Polyaxon
    if hvd.rank() == 0:
        experiment = Run()

    # Keras automatically creates a cache directory in ~/.keras/datasets for
    # storing the downloaded MNIST data. This creates a race
    # condition among the workers that share the same filesystem. If the
    # directory already exists by the time this worker gets around to creating
    # it, ignore the resulting exception and continue.
    cache_dir = os.path.join(os.path.expanduser('~'), '.keras', 'datasets')
    if not os.path.exists(cache_dir):
        try:
            os.mkdir(cache_dir)
        except OSError as e:
            if e.errno == errno.EEXIST and os.path.isdir(cache_dir):
                pass
            else:
                raise

    # Download and load MNIST dataset.
    (train_data, train_labels), (eval_data, eval_labels) = \
        keras.datasets.mnist.load_data('MNIST-data-%d' % hvd.rank())

    # Polyaxon
    if hvd.rank() == 0:
        experiment.log_data_ref(content=train_data, name='x_train')
        experiment.log_data_ref(content=train_labels, name='y_train')
        experiment.log_data_ref(content=eval_data, name='x_test')
        experiment.log_data_ref(content=eval_labels, name='y_test')

    # The shape of downloaded data is (-1, 28, 28), hence we need to reshape it
    # into (-1, 784) to feed into our network. Also, need to normalize the
    # features between 0 and 1.
    train_data = np.reshape(train_data, (-1, 784)) / 255.0
    eval_data = np.reshape(eval_data, (-1, 784)) / 255.0

    # Horovod: pin GPU to be used to process local rank (one GPU per process)
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.visible_device_list = str(hvd.local_rank())

    # Horovod: save checkpoints only on worker 0 to prevent other workers from
    # corrupting them.
    model_dir = './mnist_convnet_model' if hvd.rank() == 0 else None

    # Create the Estimator
    mnist_classifier = tf.estimator.Estimator(
        model_fn=cnn_model_fn,
        model_dir=model_dir,
        config=tf.estimator.RunConfig(session_config=config))

    # Set up logging for predictions
    # Log the values in the "Softmax" tensor with label "probabilities"
    tensors_to_log = {"probabilities": "softmax_tensor"}
    logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log,
                                              every_n_iter=500)

    # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states from
    # rank 0 to all other processes. This is necessary to ensure consistent
    # initialization of all workers when training is started with random weights or
    # restored from a checkpoint.
    bcast_hook = hvd.BroadcastGlobalVariablesHook(0)

    # Train the model
    train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": train_data},
                                                        y=train_labels,
                                                        batch_size=100,
                                                        num_epochs=None,
                                                        shuffle=True)

    # Horovod: adjust number of steps based on number of GPUs.
    mnist_classifier.train(input_fn=train_input_fn,
                           steps=3000 // hvd.size(),
                           hooks=[logging_hook, bcast_hook])

    # Evaluate the model and print results
    eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data},
                                                       y=eval_labels,
                                                       num_epochs=1,
                                                       shuffle=False)
    eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn)
    print(eval_results)

    # Polyaxon
    if hvd.rank() == 0:
        experiment.log_metrics(**eval_results)
Ejemplo n.º 16
0
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras import backend as K
import math
import tensorflow as tf
import horovod.keras as hvd

# Polyaxon
from polyaxon.tracking import Run

# Horovod: initialize Horovod.
hvd.init()

# Polyaxon
if hvd.rank() == 0:
    experiment = Run()

# Horovod: pin GPU to be used to process local rank (one GPU per process)
config = tf.ConfigProto()
config.gpu_options.allow_growth = True
config.gpu_options.visible_device_list = str(hvd.local_rank())
K.set_session(tf.Session(config=config))

batch_size = 128
num_classes = 10
# Polyaxon
if hvd.rank() == 0:
    experiment.log_inputs(batch_size=128, num_classes=10)

# Horovod: adjust number of epochs based on number of GPUs.
epochs = int(math.ceil(12.0 / hvd.size()))
Ejemplo n.º 17
0
def train(mnist):

    #定义输入输出占位
    input_x = tf.placeholder(tf.float32, [None,INPUT_SIZE,INPUT_SIZE,1 ], name= "input_x")
    input_y = tf.placeholder(tf.float32, [None,OUTPUT_SIZE], name= "input_y")
    dropout_keep_prob = tf.placeholder(tf.float32,name = "dropout_keep_prob")
    l2_loss = tf.constant(0.0)
    print("1 step ok!")

    #polyaxon
    experiment = Run()
    experiment.log_data_ref(content=input_x, name='input_x')
    experiment.log_data_ref(content=input_y, name='input_y')

    #第一层:卷积层conv1
    '''
    input  : [-1,28,28,1]
    filter : [5,5,32]
    output : [-1,28,28,32]
    '''
    with tf.name_scope("conv1"):
        w = get_weights([FILTER1_SIZE,FILTER1_SIZE,1,FILTER1_NUM])
        b = get_biases([FILTER1_NUM])
        with tf.device("/fpga:0"):
            conv1_op = tf.nn.conv2d(
                input  = input_x,
                filter = w,
                strides = [1,1,1,1],
                padding = "SAME",
                name = 'conv1_op')
        #    print("***********************")
        re1 = tf.nn.bias_add(conv1_op,b)
        with tf.device("/fpga:0"):
            conv1 = tf.nn.relu(re1 ,name = "relu")
    print("2 step ok!")

    #第二层:持化层pooling2
    '''
    input  : [-1,28,28,32]
    output : [-1,14,14,32]
    '''
    with tf.name_scope("pooling2"):
        with tf.device("/fpga:0"):
            pooling2 = tf.nn.max_pool(
                value = conv1,
                ksize = [1,2,2,1],
                strides = [1,2,2,1],
                padding = "SAME",
                name = "pooling1")
    print("3 step ok!")

    #第三层:卷积层conv3
    '''
    input  : [-1,14,14,32]
    filter : [5,5,64]
    output : [-1,14,14,64]
    '''
    with tf.name_scope("conv3"):
        w = get_weights([FILTER3_SIZE,FILTER3_SIZE,FILTER1_NUM,FILTER3_NUM])
        b = get_biases([FILTER3_NUM])
        with tf.device("/fpga:0"):
            conv3_op = tf.nn.conv2d(
            input = pooling2,
            filter = w ,
            strides = [1,1,1,1],
            padding = "SAME",
            name = "conv3_op")
        re3 = tf.nn.bias_add(conv3_op,b)
        with tf.device("/fpga:0"):
            conv3 = tf.nn.relu(re3 ,name = "relu")
    print("4 step ok!")

    #第四层:池化层pooling4
    '''
    input  : [-1,14,14,64]
    output : [-1,7,7,64]
    '''
    with tf.name_scope("pooling4"):
        with tf.device("/fpga:0"):
            pooling4 = tf.nn.max_pool(
                value = conv3,
                ksize = [1,2,2,1],
                strides = [1,2,2,1],
                padding = "SAME",
                name = "pooling4")
    #池化结果展开
    '''
    input  : [-1,7,7,64]
    output : [-1,3136]
    '''
    pooling4_flat = tf.reshape(pooling4,[-1,FLAT_SIZE])
    print("5 step ok!")

    #第五层:全连接层fc5
    '''
    input  : [-1,3136]
    output : [-1,512]
    '''
    with tf.name_scope("fc5"):
        w = get_weights([FLAT_SIZE,FC5_SIZE])
        b = get_biases([FC5_SIZE])
        xw_res = tf.nn.xw_plus_b(pooling4_flat,w,b,name=  "fc5")
        with tf.device("/fpga:0"):
            fc5 = tf.nn.relu(xw_res, name = "relu")
        fc5_drop = tf.nn.dropout( fc5,dropout_keep_prob)
        l2_loss += tf.nn.l2_loss(w) + tf.nn.l2_loss(b)
    print("6 step ok!")

    #第六层:全连接层(输出)
    '''
    input  : [-1,512]
    output : [-1,10]
    '''
    with tf.name_scope("fc6"):
        w = get_weights([FC5_SIZE,OUTPUT_SIZE])
        b = get_biases([OUTPUT_SIZE])
        y_hat = tf.nn.xw_plus_b(fc5_drop,w,b,name = "y_hat")
        l2_loss += tf.nn.l2_loss(w) + tf.nn.l2_loss(b)
    print("7 step ok!")


    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = y_hat, labels = input_y)
    #print("****************")
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    loss = cross_entropy_mean + L2NORM_RATE * l2_loss
    print("8 step ok!")

    correct_predictions = tf.equal(tf.argmax(y_hat,1),tf.argmax(input_y,1))
    accuracy = tf.reduce_mean( tf.cast(correct_predictions,tf.float32) )

    global_step = tf.Variable(0,trainable=False)
    train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss,global_step = global_step)
    print("9 step ok!")

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for i in range(TRAIN_STEP):
            xs_pre, ys = mnist.train.next_batch(BATCH_SIZE)
            xs = np.reshape(xs_pre,[-1,INPUT_SIZE,INPUT_SIZE,1])
            feed_dict = {
                input_x: xs,
                input_y: ys,
                dropout_keep_prob : 0.5
            }

            _, step, train_loss, train_acc = sess.run([train_op, global_step, loss, accuracy],feed_dict = feed_dict)

            if i%2 == 0:
                print("step:{} ,train loss:{:g}, train_acc:{:g}".format(step,train_loss,train_acc))
                experiment.log_metrics(loss=train_loss, accuracy=train_acc)

        test_x = np.reshape(mnist.test.images[0:100],[-1,INPUT_SIZE,INPUT_SIZE,1])
        test_y = mnist.test.labels[0:100]
        feed_test = {
            input_x : test_x,
            input_y : test_y,
            dropout_keep_prob : 1.0
        }
        test_loss, test_acc, data = sess.run([loss,accuracy,y_hat],feed_dict = feed_test)
      #  print(data)
      #  print(test_y[0])
        print("After {} training steps, in test dataset, loss is {:g}, acc is {:g}".format(TRAIN_STEP,test_loss,test_acc))
        experiment.log_metrics(loss=test_loss, accuracy=test_acc)
Ejemplo n.º 18
0
    parser.add_argument('--pool1_size', type=int, default=2)
    parser.add_argument('--conv2_size', type=int, default=5)
    parser.add_argument('--conv2_out', type=int, default=64)
    parser.add_argument('--conv2_activation', type=str, default='relu')
    parser.add_argument('--pool2_size', type=int, default=2)
    parser.add_argument('--dropout', type=float, default=0.2)
    parser.add_argument('--fc1_size', type=int, default=1024)
    parser.add_argument('--fc1_activation', type=str, default='sigmoid')
    parser.add_argument('--optimizer', type=str, default='adam')
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--epochs', type=int, default=1)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run()

    mnist = tf.keras.datasets.mnist

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Polyaxon
    experiment.log_data_ref(content=x_train, name='x_train')
    experiment.log_data_ref(content=y_train, name='y_train')
    experiment.log_data_ref(content=x_test, name='x_test')
    experiment.log_data_ref(content=y_test, name='y_test')

    with tf.Session() as sess:
        model = create_model(conv1_size=args.conv1_size,
                             conv1_out=args.conv1_out,
                             conv1_activation=args.conv1_activation,
Ejemplo n.º 19
0
def main(_):
    with tf.device(
            tf.train.replica_device_setter(
                worker_device="/job:%s/task:%d/%s" %
                (task["type"], task["index"], FLAGS.device),
                cluster=cluster)):
        worker_device = "/job:%s/task:%d/%s" % (task["type"], task["index"],
                                                FLAGS.device),
        logging.info("worker_device: %s", worker_device)

        ###
        ### Training
        ###

        #
        # read training data
        #

        # image - 784 (=28 x 28) elements of grey-scaled integer value [0, 1]
        # label - digit (0, 1, ..., 9)
        train_queue = tf.train.string_input_producer(
            [FLAGS.train_file], num_epochs=2
        )  # data is repeated and it raises OutOfRange when data is over
        train_reader = tf.TFRecordReader()
        _, train_serialized_exam = train_reader.read(train_queue)
        train_exam = tf.parse_single_example(train_serialized_exam,
                                             features={
                                                 'image_raw':
                                                 tf.FixedLenFeature([],
                                                                    tf.string),
                                                 'label':
                                                 tf.FixedLenFeature([],
                                                                    tf.int64)
                                             })
        train_image = tf.decode_raw(train_exam['image_raw'], tf.uint8)
        train_image.set_shape([784])
        train_image = tf.cast(train_image, tf.float32) * (1. / 255)
        train_label = tf.cast(train_exam['label'], tf.int32)
        train_batch_image, train_batch_label = tf.train.batch(
            [train_image, train_label], batch_size=batch_size)

        #polyaxon
        experiment = Run()
        # Polyaxon
        experiment.log_data_ref(content=train_image, name='train_image')
        experiment.log_data_ref(content=train_label, name='train_label')

        #
        # define training graph
        #

        # define input
        plchd_image = tf.placeholder(dtype=tf.float32, shape=(None, 784))
        plchd_label = tf.placeholder(dtype=tf.int32, shape=(None))

        # define network and inference
        # (simple 2 fully connected hidden layer : 784->128->64->10)
        with tf.name_scope('hidden1'):
            weights = tf.Variable(tf.truncated_normal([784, 128],
                                                      stddev=1.0 /
                                                      math.sqrt(float(784))),
                                  name='weights')
            biases = tf.Variable(tf.zeros([128]), name='biases')
            hidden1 = tf.nn.relu(tf.matmul(plchd_image, weights) + biases)
        with tf.name_scope('hidden2'):
            weights = tf.Variable(tf.truncated_normal([128, 64],
                                                      stddev=1.0 /
                                                      math.sqrt(float(128))),
                                  name='weights')
            biases = tf.Variable(tf.zeros([64]), name='biases')
            hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases)
        with tf.name_scope('softmax_linear'):
            weights = tf.Variable(tf.truncated_normal([64, 10],
                                                      stddev=1.0 /
                                                      math.sqrt(float(64))),
                                  name='weights')
            biases = tf.Variable(tf.zeros([10]), name='biases')
            logits = tf.matmul(hidden2, weights) + biases

        # define optimization
        global_step = tf.train.create_global_step()  # start without checkpoint
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.07)
        loss = tf.losses.sparse_softmax_cross_entropy(labels=plchd_label,
                                                      logits=logits)
        train_op = optimizer.minimize(loss=loss, global_step=global_step)

        #
        # run session
        #

        with tf.train.MonitoredTrainingSession(master=server.target,
                                               checkpoint_dir=FLAGS.out_dir,
                                               is_chief=is_chief) as sess:

            # when data is over, OutOfRangeError occurs and ends with MonitoredSession

            local_step_value = 0
            run_metadata = tf.RunMetadata()
            array_image, array_label = sess.run(
                [train_batch_image, train_batch_label],
                run_metadata=run_metadata)
            while not sess.should_stop():
                feed_dict = {
                    plchd_image: array_image,
                    plchd_label: array_label
                }
                _, global_step_value, loss_value, array_image, array_label = sess.run(
                    [
                        train_op, global_step, loss, train_batch_image,
                        train_batch_label
                    ],
                    feed_dict=feed_dict)
                local_step_value += 1
                if local_step_value % 100 == 0:  # You can also use tf.train.LoggingTensorHook for output
                    logging.info("Local Step %d, Global Step %d (Loss: %.2f)",
                                 local_step_value, global_step_value,
                                 loss_value)
                    # Polyaxon
                    experiment.log_metrics(step=local_step_value,
                                           loss=loss_value)

        print('training finished')
import argparse
import pickle
import os
import numpy as np
from polyaxon.tracking import Run
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

from my_project.data import load_data

# Polyaxon
experiment = Run()

def model(X, y, n_estimators, max_features, min_samples_leaf):
    classifier = RandomForestClassifier(n_estimators=n_estimators,
                                        max_features=max_features,
                                        min_samples_leaf=min_samples_leaf)
    return cross_val_score(classifier, X, y, cv=5), classifier


parser = argparse.ArgumentParser()
parser.add_argument('--n_estimators', type=int, default=3)
parser.add_argument('--max_features', type=int, default=3)
parser.add_argument('--min_samples_leaf', type=int, default=80)
args = parser.parse_args()

(X, y) = load_data()

# Polyaxon
# https://polyaxon.com/docs/experimentation/tracking/module/#log_data_ref
Ejemplo n.º 21
0
 def __init__(self, run=None, metrics=None):
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
     self.metrics = metrics
Ejemplo n.º 22
0
    metric = mx.metric.Accuracy()
    for _, batch in enumerate(data_iter):
        data = batch.data[0].as_in_context(context)
        label = batch.label[0].as_in_context(context)
        output = model(data.astype(args.dtype, copy=False))
        metric.update([label], [output])

    return metric.get()


# Initialize Horovod
hvd.init()

# Polyaxon
if hvd.rank() == 0:
    experiment = Run()

# Horovod: pin context to local rank
context = mx.cpu(hvd.local_rank()) if args.no_cuda else mx.gpu(hvd.local_rank())
num_workers = hvd.size()

# Load training and validation data
train_data, val_data = get_mnist_iterator(hvd.rank())

# Build model
model = conv_nets()
model.cast(args.dtype)
model.hybridize()

# Define hyper parameters
optimizer_params = {'momentum': args.momentum,
Ejemplo n.º 23
0
    return model.evaluate(x_test, y_test)[1]


if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--conv1_size', type=int, default=32)
    parser.add_argument('--conv2_size', type=int, default=64)
    parser.add_argument('--dropout', type=float, default=0.8)
    parser.add_argument('--hidden1_size', type=int, default=500)
    parser.add_argument('--optimizer', type=str, default='adam')
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--epochs', type=int, default=1)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='mnist')
    experiment.create(tags=['keras'])
    experiment.log_inputs(conv1_size=args.conv1_size,
                          conv2_size=args.conv2_size,
                          dropout=args.dropout,
                          hidden1_size=args.hidden1_size,
                          optimizer=args.optimizer,
                          log_learning_rate=args.log_learning_rate,
                          epochs=args.epochs)

    (x_train, y_train), (x_test, y_test) = mnist.load_data()

    # Polyaxon
    experiment.log_data_ref(content=x_train, name='x_train')
    experiment.log_data_ref(content=y_train, name='y_train')
    experiment.log_data_ref(content=x_test, name='x_test')
Ejemplo n.º 24
0
    parser = argparse.ArgumentParser()
    parser.add_argument('--conv1_kernel', type=int, default=5)
    parser.add_argument('--conv1_filters', type=int, default=10)
    parser.add_argument('--conv1_activation', type=str, default='relu')
    parser.add_argument('--conv2_kernel', type=int, default=5)
    parser.add_argument('--conv2_filters', type=int, default=10)
    parser.add_argument('--conv2_activation', type=str, default='relu')
    parser.add_argument('--fc1_hidden', type=int, default=10)
    parser.add_argument('--fc1_activation', type=str, default='relu')
    parser.add_argument('--optimizer', type=str, default='adam')
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--epochs', type=int, default=1)
    args = parser.parse_args()

    experiment = Run(project='mnist')
    experiment.create(tags=['examples', 'mxnet'])
    experiment.log_inputs(conv1_kernel=args.conv1_kernel,
                          conv1_filters=args.conv1_filters,
                          conv1_activation=args.conv1_activation,
                          conv2_kernel=args.conv1_kernel,
                          conv2_filters=args.conv1_filters,
                          conv2_activation=args.conv1_activation,
                          fc1_hidden=args.fc1_hidden,
                          fc1_activation=args.fc1_activation,
                          optimizer=args.optimizer,
                          log_learning_rate=args.log_learning_rate,
                          epochs=args.epochs)

    logger.info('Downloading data ...')
    mnist = mx.test_utils.get_mnist()
Ejemplo n.º 25
0
 def __init__(self, run, filepath, **kwargs):
     self.run = run
     if settings.CLIENT_CONFIG.is_managed:
         self.run = self.run or Run()
     super().__init__(filepath, **kwargs)
Ejemplo n.º 26
0
    metavar='N',
    help='how many batches to wait before logging training status')
parser.add_argument('--fp16-allreduce',
                    action='store_true',
                    default=False,
                    help='use fp16 compression during allreduce')
args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

# Horovod: initialize library.
hvd.init()
torch.manual_seed(args.seed)

# Polyaxon
if hvd.rank() == 0:
    experiment = Run()

if args.cuda:
    # Horovod: pin GPU to local rank.
    torch.cuda.set_device(hvd.local_rank())
    torch.cuda.manual_seed(args.seed)

kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {}
train_dataset = \
    datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ]))

# Horovod: use DistributedSampler to partition the training data.
def main():
    args = parse_args()
    experiment = Run()
    params = load_values(args.param_file)
    if params:
        experiment.log_inputs(**params)
    metrics = load_values(args.metric_file)
    if metrics:
        experiment.log_metrics(**metrics)
    if args.tag:
        experiment.log_tags(args.tag)
    for dataset in load_datasets(args.data_file):
        experiment.log_data_ref(**dataset)
    if args.capture_png:
        imgs = discover_png(experiment.get_outputs_path())
        for img in imgs:
            if isinstance(img, str):
                experiment.log_image(img)
            elif isinstance(img, SerialImages):
                for idx, path in enumerate(img.paths):
                    experiment.log_image(path, name=img.name, step=idx)
            else:
                raise NotImplementedError('We should never get here.')
Ejemplo n.º 28
0
import tensorflow as tf
from polyaxon.tracking import Run

mnist = tf.keras.datasets.mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

experiment = Run(project='mnist', artifacts_path='/tmp/mnist/')
experiment.create(tags=['examples', 'tensorflow'])


def create_model():
    return tf.keras.models.Sequential([
        tf.keras.layers.Flatten(input_shape=(28, 28)),
        tf.keras.layers.Dense(512, activation='relu'),
        tf.keras.layers.Dropout(0.2),
        tf.keras.layers.Dense(10, activation='softmax')
    ])


model = create_model()
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])


model.fit(x=x_train,
          y=y_train,
          epochs=5,
          validation_data=(x_test, y_test))
Ejemplo n.º 29
0
    parser.add_argument('--pool1_size', type=int, default=2)
    parser.add_argument('--conv2_size', type=int, default=5)
    parser.add_argument('--conv2_out', type=int, default=64)
    parser.add_argument('--conv2_activation', type=str, default='relu')
    parser.add_argument('--pool2_size', type=int, default=2)
    parser.add_argument('--dropout', type=float, default=0.2)
    parser.add_argument('--fc1_size', type=int, default=1024)
    parser.add_argument('--fc1_activation', type=str, default='sigmoid')
    parser.add_argument('--optimizer', type=str, default='adam')
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--batch_size', type=int, default=100)
    parser.add_argument('--epochs', type=int, default=1)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='mnist', artifacts_path='/tmp/mnist')
    experiment.create(tags=['examples', 'tensorflow'])
    experiment.log_inputs(conv1_size=args.conv1_size,
                          conv1_out=args.conv1_out,
                          conv1_activation=args.conv1_activation,
                          pool1_size=args.pool1_size,
                          conv2_size=args.conv2_size,
                          conv2_out=args.conv2_out,
                          conv2_activation=args.conv2_activation,
                          pool2_size=args.pool2_size,
                          fc1_activation=args.fc1_activation,
                          fc1_size=args.fc1_size,
                          optimizer=args.optimizer,
                          log_learning_rate=args.log_learning_rate,
                          batch_size=args.batch_size,
                          dropout=args.dropout,
Ejemplo n.º 30
0
    parser.add_argument('--skip_top',
                        type=int,
                        default=30,
                        help='Top occurring words to skip')
    parser.add_argument('--maxlen', type=int, default=100)
    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--num_nodes', type=int, default=8)
    parser.add_argument('--optimizer', type=str, default='adam')
    parser.add_argument('--log_learning_rate', type=int, default=-3)
    parser.add_argument('--dropout', type=float, default=0.8)
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument('--seed', type=int, default=234)
    args = parser.parse_args()

    # Polyaxon
    experiment = Run(project='bidirectional-lstm')
    experiment.create(tags=['examples', 'keras'])
    experiment.log_inputs(max_features=args.max_features,
                          skip_top=args.skip_top,
                          maxlen=args.maxlen,
                          batch_size=args.batch_size,
                          num_nodes=args.num_nodes,
                          optimizer=args.optimizer,
                          log_learning_rate=args.log_learning_rate,
                          dropout=args.dropout,
                          epochs=args.epochs,
                          seed=args.seed)

    logger.info('Loading data...')
    (x_train, y_train), (x_test,
                         y_test) = imdb.load_data(num_words=args.max_features,