def setUp(self): super().setUp() settings.CLIENT_CONFIG.is_managed = False settings.CLIENT_CONFIG.is_offline = True os.environ[POLYAXON_KEYS_COLLECT_ARTIFACTS] = "false" os.environ[POLYAXON_KEYS_COLLECT_RESOURCES] = "false" with patch("polyaxon.tracking.run.Run._set_exit_handler") as exit_mock: self.run = Run(project="test.test", run_uuid="uid") assert exit_mock.call_count == 1
def __init__(self, learn, run=None, monitor="auto", mode="auto"): super().__init__(learn, monitor=monitor, mode=mode) if monitor is None: # use default TrackerCallback monitor value super().__init__(learn, mode=mode) self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run()
def __init__(self, tensors, run=None, every_n_iter=None, every_n_secs=None): super(PolyaxonLoggingTensorHook, self).__init__(tensors=tensors, every_n_iter=every_n_iter, every_n_secs=every_n_secs) self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run()
def __init__(self, tensors, run=None, every_num_iterations=None, every_n_secs=None): super().__init__( tensors=tensors, every_num_iterations=every_num_iterations, every_n_secs=every_n_secs, ) self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run()
def _run(ctx, name, owner, project_name, description, tags, specification, log): docker = DockerOperator() if not docker.check(): raise PolyaxonException("Docker is required to run this command.") # Create Build project = "{}.{}".format(owner, project_name) build_job = Run(project=project) specification = CompiledOperationSpecification.apply_operation_contexts( specification) content = specification.to_dict(dump=True) build_job.create(name=name, description=description, tags=tags, content=content) image = _create_docker_build(build_job, specification, project) experiment = Run(project=project) experiment.create(name=name, tags=tags, description=description, content=content) cmd_args = ["run", "--rm"] data_paths, bind_mounts = _get_data_bind_mounts(specification.data_refs) for key, value in _get_env_vars( project=project, experiment_id=experiment.experiment_id, params=specification.params, data_paths=data_paths, ): cmd_args += ["-e", "{key}={value}".format(key=key, value=value)] cmd_args += _get_config_volume() cmd_args += _get_data_volumes(bind_mounts) cmd_args += [image] # Add cmd.run _, args = specification.container.get_container_command_args() for arg in args: cmd_args += arg try: print(cmd_args) docker.execute(cmd_args, stream=True) except Exception as e: handle_cli_error(e, message="Could start local run.") sys.exit(1)
def __init__(self, *args: Any, **kwargs: Any): try: from polyaxon.tracking import Run self.experiment = Run(*args, **kwargs) except ImportError: try: from polyaxon_client.tracking import Experiment self.experiment = Experiment(*args, **kwargs) except ImportError: raise RuntimeError( "This contrib module requires polyaxon to be installed.\n" "For Polyaxon v1.x please install it with command: \n pip install polyaxon\n" "For Polyaxon v0.x please install it with command: \n pip install polyaxon-client" )
def __init__(self, summary_op=None, steps_per_log=1000, run=None): self._summary_op = summary_op self._steps_per_log = steps_per_log self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run()
n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, ) return cross_val_score(classifier, X, y, cv=5) if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('--n_estimators', type=int, default=3) parser.add_argument('--max_features', type=int, default=3) parser.add_argument('--min_samples_leaf', type=int, default=80) args = parser.parse_args() # Polyaxon experiment = Run(project='random-forest') experiment.create(tags=['examples', 'scikit-learn']) experiment.log_inputs(n_estimators=args.n_estimators, max_features=args.max_features, min_samples_leaf=args.min_samples_leaf) (X, y) = load_data() # Polyaxon experiment.log_data_ref(content=X, name='dataset_X') experiment.log_data_ref(content=y, name='dataset_y') accuracies = model(X=X, y=y, n_estimators=args.n_estimators, max_features=args.max_features,
'--l1_ratio', type=float, default=1.0) parser.add_argument( '--max_iter', type=int, default=1000) parser.add_argument( '--tol', type=float, default=0.001 ) args = parser.parse_args() # Polyaxon experiment = Run(project='sgd-classifier') experiment.create(tags=['examples', 'scikit-learn']) experiment.log_inputs(loss=args.loss, penalty=args.penalty, l1_ratio=args.l1_ratio, max_iter=args.max_iter, tol=args.tol) (X, y) = load_data() # Polyaxon experiment.log_data_ref(content=X, name='dataset_X') experiment.log_data_ref(content=y, name='dataset_y') accuracies = model(X=X, y=y,
def __init__(self, *args, **kwargs): self.experiment = kwargs.get("run", Run(*args, **kwargs))
class TestEventsSummaries(BaseTestCase): def setUp(self): super().setUp() settings.CLIENT_CONFIG.is_managed = False settings.CLIENT_CONFIG.is_offline = True os.environ[POLYAXON_KEYS_COLLECT_ARTIFACTS] = "false" os.environ[POLYAXON_KEYS_COLLECT_RESOURCES] = "false" with patch("polyaxon.tracking.run.Run._set_exit_handler") as exit_mock: self.run = Run(project="test.test", run_uuid="uid") assert exit_mock.call_count == 1 def test_metrics_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="metric", last_check=None, ) events = V1Events.read( name="metric_events", kind="metric", data=os.path.abspath( "tests/fixtures/polyboard/metric/metric_events.plx"), ) assert events.name == "metric_events" assert summaries == [ V1RunArtifact( name="metric_events", kind="metric", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/metric/metric_events.plx", is_input=False, ) ] assert last_values == {"metric_events": 0.3} def test_images_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="image", last_check=None, ) events = V1Events.read( name="image_events", kind="image", data=os.path.abspath( "tests/fixtures/polyboard/image/image_events.plx"), ) assert events.name == "image_events" assert summaries == [ V1RunArtifact( name="image_events", kind="image", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/image/image_events.plx", is_input=False, ) ] assert last_values == {} def test_histograms_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="histogram", last_check=None, ) events = V1Events.read( name="histogram_events", kind="histogram", data=os.path.abspath( "tests/fixtures/polyboard/histogram/histogram_events.plx"), ) assert events.name == "histogram_events" assert summaries == [ V1RunArtifact( name="histogram_events", kind="histogram", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/histogram/histogram_events.plx", is_input=False, ) ] assert last_values == {} def test_videos_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="video", last_check=None, ) events = V1Events.read( name="video_events", kind="video", data=os.path.abspath( "tests/fixtures/polyboard/video/video_events.plx"), ) assert events.name == "video_events" assert summaries == [ V1RunArtifact( name="video_events", kind="video", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/video/video_events.plx", is_input=False, ) ] assert last_values == {} def test_audios_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="audio", last_check=None, ) events = V1Events.read( name="audio_events", kind="audio", data=os.path.abspath( "tests/fixtures/polyboard/audio/audio_events.plx"), ) assert events.name == "audio_events" assert summaries == [ V1RunArtifact( name="audio_events", kind="audio", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/audio/audio_events.plx", is_input=False, ) ] assert last_values == {} def test_htmls_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="html", last_check=None, ) events = V1Events.read( name="html_events", kind="html", data=os.path.abspath( "tests/fixtures/polyboard/html/html_events.plx"), ) assert events.name == "html_events" assert summaries == [ V1RunArtifact( name="html_events", kind="html", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/html/html_events.plx", is_input=False, ) ] assert last_values == {} def test_charts_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="chart", last_check=None, ) events = V1Events.read( name="chart_events", kind="chart", data=os.path.abspath( "tests/fixtures/polyboard/chart/chart_events.plx"), ) assert events.name == "chart_events" assert summaries == [ V1RunArtifact( name="chart_events", kind="chart", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/chart/chart_events.plx", is_input=False, ) ] assert last_values == {} def test_curves_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="curve", last_check=None, ) events = V1Events.read( name="curve_events", kind="curve", data=os.path.abspath( "tests/fixtures/polyboard/curve/curve_events.plx"), ) assert events.name == "curve_events" assert summaries == [ V1RunArtifact( name="curve_events", kind="curve", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/curve/curve_events.plx", is_input=False, ) ] assert last_values == {} def test_artifacts_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="artifact", last_check=None, ) events = V1Events.read( name="artifact_events", kind="artifact", data=os.path.abspath( "tests/fixtures/polyboard/artifact/artifact_events.plx"), ) assert events.name == "artifact_events" assert summaries == [ V1RunArtifact( name="artifact_events", kind="artifact", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/artifact/artifact_events.plx", is_input=False, ) ] assert last_values == {} def test_models_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="model", last_check=None, ) summaries = {s.name: s for s in summaries} events = V1Events.read( name="model_events", kind="model", data=os.path.abspath( "tests/fixtures/polyboard/model/model_events.plx"), ) assert events.name == "model_events" assert summaries["model_events"] == V1RunArtifact( name="model_events", kind="model", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/model/model_events.plx", is_input=False, ) events_without_step = V1Events.read( name="model_events_without_step", kind="model", data=os.path.abspath( "tests/fixtures/polyboard/model/model_events_without_step.plx" ), ) assert events_without_step.name == "model_events_without_step" assert summaries["model_events_without_step"] == V1RunArtifact( name="model_events_without_step", kind="model", connection=None, summary=events_without_step.get_summary(), path="tests/fixtures/polyboard/model/model_events_without_step.plx", is_input=False, ) assert last_values == {} def test_dataframes_summaries(self): summaries, last_values = self.run._sync_events_summaries( events_path="tests/fixtures/polyboard", events_kind="dataframe", last_check=None, ) events = V1Events.read( name="dataframe_events", kind="dataframe", data=os.path.abspath( "tests/fixtures/polyboard/dataframe/dataframe_events.plx"), ) assert events.name == "dataframe_events" assert summaries == [ V1RunArtifact( name="dataframe_events", kind="dataframe", connection=None, summary=events.get_summary(), path="tests/fixtures/polyboard/dataframe/dataframe_events.plx", is_input=False, ) ] assert last_values == {}
) model.fit(X_train, y_train) pred = model.predict(X_test) return accuracy_score(pred, y_test) if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--max_depth', type=int, default=3) parser.add_argument('--num_rounds', type=int, default=10) parser.add_argument('--min_child_weight', type=int, default=5) args = parser.parse_args() # Polyaxon experiment = Run(project='iris') experiment.create(tags=['examples', 'xgboost']) experiment.log_inputs(log_learning_rate=args.log_learning_rate, max_depth=args.max_depth, num_rounds=args.num_rounds, min_child_weight=args.min_child_weight) iris = load_iris() X = iris.data Y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2) # Polyaxon experiment.log_data_ref(content=X_train, name='x_train') experiment.log_data_ref(content=y_train, name='y_train')
def __init__(self, learn, run=None, monitor="val_loss", mode="auto"): super(PolyaxonFastai, self).__init__(learn, monitor=monitor, mode=mode) self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run()
'--l1_ratio', type=float, default=1.0) parser.add_argument( '--max_iter', type=int, default=1000) parser.add_argument( '--tol', type=float, default=0.001 ) args = parser.parse_args() # Polyaxon experiment = Run() (X, y) = load_data() # Polyaxon experiment.log_data_ref(content=X, name='dataset_X') experiment.log_data_ref(content=y, name='dataset_y') accuracies = model(X=X, y=y, loss=args.loss, penalty=args.penalty, l1_ratio=args.l1_ratio, max_iter=args.max_iter, tol=args.tol) accuracy_mean, accuracy_std = (np.mean(accuracies), np.std(accuracies))
def main(unused_argv): # Horovod: initialize Horovod. hvd.init() # Polyaxon if hvd.rank() == 0: experiment = Run() # Keras automatically creates a cache directory in ~/.keras/datasets for # storing the downloaded MNIST data. This creates a race # condition among the workers that share the same filesystem. If the # directory already exists by the time this worker gets around to creating # it, ignore the resulting exception and continue. cache_dir = os.path.join(os.path.expanduser('~'), '.keras', 'datasets') if not os.path.exists(cache_dir): try: os.mkdir(cache_dir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(cache_dir): pass else: raise # Download and load MNIST dataset. (train_data, train_labels), (eval_data, eval_labels) = \ keras.datasets.mnist.load_data('MNIST-data-%d' % hvd.rank()) # Polyaxon if hvd.rank() == 0: experiment.log_data_ref(content=train_data, name='x_train') experiment.log_data_ref(content=train_labels, name='y_train') experiment.log_data_ref(content=eval_data, name='x_test') experiment.log_data_ref(content=eval_labels, name='y_test') # The shape of downloaded data is (-1, 28, 28), hence we need to reshape it # into (-1, 784) to feed into our network. Also, need to normalize the # features between 0 and 1. train_data = np.reshape(train_data, (-1, 784)) / 255.0 eval_data = np.reshape(eval_data, (-1, 784)) / 255.0 # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) # Horovod: save checkpoints only on worker 0 to prevent other workers from # corrupting them. model_dir = './mnist_convnet_model' if hvd.rank() == 0 else None # Create the Estimator mnist_classifier = tf.estimator.Estimator( model_fn=cnn_model_fn, model_dir=model_dir, config=tf.estimator.RunConfig(session_config=config)) # Set up logging for predictions # Log the values in the "Softmax" tensor with label "probabilities" tensors_to_log = {"probabilities": "softmax_tensor"} logging_hook = tf.train.LoggingTensorHook(tensors=tensors_to_log, every_n_iter=500) # Horovod: BroadcastGlobalVariablesHook broadcasts initial variable states from # rank 0 to all other processes. This is necessary to ensure consistent # initialization of all workers when training is started with random weights or # restored from a checkpoint. bcast_hook = hvd.BroadcastGlobalVariablesHook(0) # Train the model train_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": train_data}, y=train_labels, batch_size=100, num_epochs=None, shuffle=True) # Horovod: adjust number of steps based on number of GPUs. mnist_classifier.train(input_fn=train_input_fn, steps=3000 // hvd.size(), hooks=[logging_hook, bcast_hook]) # Evaluate the model and print results eval_input_fn = tf.estimator.inputs.numpy_input_fn(x={"x": eval_data}, y=eval_labels, num_epochs=1, shuffle=False) eval_results = mnist_classifier.evaluate(input_fn=eval_input_fn) print(eval_results) # Polyaxon if hvd.rank() == 0: experiment.log_metrics(**eval_results)
from keras.layers import Dense, Dropout, Flatten from keras.layers import Conv2D, MaxPooling2D from keras import backend as K import math import tensorflow as tf import horovod.keras as hvd # Polyaxon from polyaxon.tracking import Run # Horovod: initialize Horovod. hvd.init() # Polyaxon if hvd.rank() == 0: experiment = Run() # Horovod: pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.visible_device_list = str(hvd.local_rank()) K.set_session(tf.Session(config=config)) batch_size = 128 num_classes = 10 # Polyaxon if hvd.rank() == 0: experiment.log_inputs(batch_size=128, num_classes=10) # Horovod: adjust number of epochs based on number of GPUs. epochs = int(math.ceil(12.0 / hvd.size()))
def train(mnist): #定义输入输出占位 input_x = tf.placeholder(tf.float32, [None,INPUT_SIZE,INPUT_SIZE,1 ], name= "input_x") input_y = tf.placeholder(tf.float32, [None,OUTPUT_SIZE], name= "input_y") dropout_keep_prob = tf.placeholder(tf.float32,name = "dropout_keep_prob") l2_loss = tf.constant(0.0) print("1 step ok!") #polyaxon experiment = Run() experiment.log_data_ref(content=input_x, name='input_x') experiment.log_data_ref(content=input_y, name='input_y') #第一层:卷积层conv1 ''' input : [-1,28,28,1] filter : [5,5,32] output : [-1,28,28,32] ''' with tf.name_scope("conv1"): w = get_weights([FILTER1_SIZE,FILTER1_SIZE,1,FILTER1_NUM]) b = get_biases([FILTER1_NUM]) with tf.device("/fpga:0"): conv1_op = tf.nn.conv2d( input = input_x, filter = w, strides = [1,1,1,1], padding = "SAME", name = 'conv1_op') # print("***********************") re1 = tf.nn.bias_add(conv1_op,b) with tf.device("/fpga:0"): conv1 = tf.nn.relu(re1 ,name = "relu") print("2 step ok!") #第二层:持化层pooling2 ''' input : [-1,28,28,32] output : [-1,14,14,32] ''' with tf.name_scope("pooling2"): with tf.device("/fpga:0"): pooling2 = tf.nn.max_pool( value = conv1, ksize = [1,2,2,1], strides = [1,2,2,1], padding = "SAME", name = "pooling1") print("3 step ok!") #第三层:卷积层conv3 ''' input : [-1,14,14,32] filter : [5,5,64] output : [-1,14,14,64] ''' with tf.name_scope("conv3"): w = get_weights([FILTER3_SIZE,FILTER3_SIZE,FILTER1_NUM,FILTER3_NUM]) b = get_biases([FILTER3_NUM]) with tf.device("/fpga:0"): conv3_op = tf.nn.conv2d( input = pooling2, filter = w , strides = [1,1,1,1], padding = "SAME", name = "conv3_op") re3 = tf.nn.bias_add(conv3_op,b) with tf.device("/fpga:0"): conv3 = tf.nn.relu(re3 ,name = "relu") print("4 step ok!") #第四层:池化层pooling4 ''' input : [-1,14,14,64] output : [-1,7,7,64] ''' with tf.name_scope("pooling4"): with tf.device("/fpga:0"): pooling4 = tf.nn.max_pool( value = conv3, ksize = [1,2,2,1], strides = [1,2,2,1], padding = "SAME", name = "pooling4") #池化结果展开 ''' input : [-1,7,7,64] output : [-1,3136] ''' pooling4_flat = tf.reshape(pooling4,[-1,FLAT_SIZE]) print("5 step ok!") #第五层:全连接层fc5 ''' input : [-1,3136] output : [-1,512] ''' with tf.name_scope("fc5"): w = get_weights([FLAT_SIZE,FC5_SIZE]) b = get_biases([FC5_SIZE]) xw_res = tf.nn.xw_plus_b(pooling4_flat,w,b,name= "fc5") with tf.device("/fpga:0"): fc5 = tf.nn.relu(xw_res, name = "relu") fc5_drop = tf.nn.dropout( fc5,dropout_keep_prob) l2_loss += tf.nn.l2_loss(w) + tf.nn.l2_loss(b) print("6 step ok!") #第六层:全连接层(输出) ''' input : [-1,512] output : [-1,10] ''' with tf.name_scope("fc6"): w = get_weights([FC5_SIZE,OUTPUT_SIZE]) b = get_biases([OUTPUT_SIZE]) y_hat = tf.nn.xw_plus_b(fc5_drop,w,b,name = "y_hat") l2_loss += tf.nn.l2_loss(w) + tf.nn.l2_loss(b) print("7 step ok!") cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = y_hat, labels = input_y) #print("****************") cross_entropy_mean = tf.reduce_mean(cross_entropy) loss = cross_entropy_mean + L2NORM_RATE * l2_loss print("8 step ok!") correct_predictions = tf.equal(tf.argmax(y_hat,1),tf.argmax(input_y,1)) accuracy = tf.reduce_mean( tf.cast(correct_predictions,tf.float32) ) global_step = tf.Variable(0,trainable=False) train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(loss,global_step = global_step) print("9 step ok!") with tf.Session() as sess: tf.global_variables_initializer().run() for i in range(TRAIN_STEP): xs_pre, ys = mnist.train.next_batch(BATCH_SIZE) xs = np.reshape(xs_pre,[-1,INPUT_SIZE,INPUT_SIZE,1]) feed_dict = { input_x: xs, input_y: ys, dropout_keep_prob : 0.5 } _, step, train_loss, train_acc = sess.run([train_op, global_step, loss, accuracy],feed_dict = feed_dict) if i%2 == 0: print("step:{} ,train loss:{:g}, train_acc:{:g}".format(step,train_loss,train_acc)) experiment.log_metrics(loss=train_loss, accuracy=train_acc) test_x = np.reshape(mnist.test.images[0:100],[-1,INPUT_SIZE,INPUT_SIZE,1]) test_y = mnist.test.labels[0:100] feed_test = { input_x : test_x, input_y : test_y, dropout_keep_prob : 1.0 } test_loss, test_acc, data = sess.run([loss,accuracy,y_hat],feed_dict = feed_test) # print(data) # print(test_y[0]) print("After {} training steps, in test dataset, loss is {:g}, acc is {:g}".format(TRAIN_STEP,test_loss,test_acc)) experiment.log_metrics(loss=test_loss, accuracy=test_acc)
parser.add_argument('--pool1_size', type=int, default=2) parser.add_argument('--conv2_size', type=int, default=5) parser.add_argument('--conv2_out', type=int, default=64) parser.add_argument('--conv2_activation', type=str, default='relu') parser.add_argument('--pool2_size', type=int, default=2) parser.add_argument('--dropout', type=float, default=0.2) parser.add_argument('--fc1_size', type=int, default=1024) parser.add_argument('--fc1_activation', type=str, default='sigmoid') parser.add_argument('--optimizer', type=str, default='adam') parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--epochs', type=int, default=1) args = parser.parse_args() # Polyaxon experiment = Run() mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() # Polyaxon experiment.log_data_ref(content=x_train, name='x_train') experiment.log_data_ref(content=y_train, name='y_train') experiment.log_data_ref(content=x_test, name='x_test') experiment.log_data_ref(content=y_test, name='y_test') with tf.Session() as sess: model = create_model(conv1_size=args.conv1_size, conv1_out=args.conv1_out, conv1_activation=args.conv1_activation,
def main(_): with tf.device( tf.train.replica_device_setter( worker_device="/job:%s/task:%d/%s" % (task["type"], task["index"], FLAGS.device), cluster=cluster)): worker_device = "/job:%s/task:%d/%s" % (task["type"], task["index"], FLAGS.device), logging.info("worker_device: %s", worker_device) ### ### Training ### # # read training data # # image - 784 (=28 x 28) elements of grey-scaled integer value [0, 1] # label - digit (0, 1, ..., 9) train_queue = tf.train.string_input_producer( [FLAGS.train_file], num_epochs=2 ) # data is repeated and it raises OutOfRange when data is over train_reader = tf.TFRecordReader() _, train_serialized_exam = train_reader.read(train_queue) train_exam = tf.parse_single_example(train_serialized_exam, features={ 'image_raw': tf.FixedLenFeature([], tf.string), 'label': tf.FixedLenFeature([], tf.int64) }) train_image = tf.decode_raw(train_exam['image_raw'], tf.uint8) train_image.set_shape([784]) train_image = tf.cast(train_image, tf.float32) * (1. / 255) train_label = tf.cast(train_exam['label'], tf.int32) train_batch_image, train_batch_label = tf.train.batch( [train_image, train_label], batch_size=batch_size) #polyaxon experiment = Run() # Polyaxon experiment.log_data_ref(content=train_image, name='train_image') experiment.log_data_ref(content=train_label, name='train_label') # # define training graph # # define input plchd_image = tf.placeholder(dtype=tf.float32, shape=(None, 784)) plchd_label = tf.placeholder(dtype=tf.int32, shape=(None)) # define network and inference # (simple 2 fully connected hidden layer : 784->128->64->10) with tf.name_scope('hidden1'): weights = tf.Variable(tf.truncated_normal([784, 128], stddev=1.0 / math.sqrt(float(784))), name='weights') biases = tf.Variable(tf.zeros([128]), name='biases') hidden1 = tf.nn.relu(tf.matmul(plchd_image, weights) + biases) with tf.name_scope('hidden2'): weights = tf.Variable(tf.truncated_normal([128, 64], stddev=1.0 / math.sqrt(float(128))), name='weights') biases = tf.Variable(tf.zeros([64]), name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1, weights) + biases) with tf.name_scope('softmax_linear'): weights = tf.Variable(tf.truncated_normal([64, 10], stddev=1.0 / math.sqrt(float(64))), name='weights') biases = tf.Variable(tf.zeros([10]), name='biases') logits = tf.matmul(hidden2, weights) + biases # define optimization global_step = tf.train.create_global_step() # start without checkpoint optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.07) loss = tf.losses.sparse_softmax_cross_entropy(labels=plchd_label, logits=logits) train_op = optimizer.minimize(loss=loss, global_step=global_step) # # run session # with tf.train.MonitoredTrainingSession(master=server.target, checkpoint_dir=FLAGS.out_dir, is_chief=is_chief) as sess: # when data is over, OutOfRangeError occurs and ends with MonitoredSession local_step_value = 0 run_metadata = tf.RunMetadata() array_image, array_label = sess.run( [train_batch_image, train_batch_label], run_metadata=run_metadata) while not sess.should_stop(): feed_dict = { plchd_image: array_image, plchd_label: array_label } _, global_step_value, loss_value, array_image, array_label = sess.run( [ train_op, global_step, loss, train_batch_image, train_batch_label ], feed_dict=feed_dict) local_step_value += 1 if local_step_value % 100 == 0: # You can also use tf.train.LoggingTensorHook for output logging.info("Local Step %d, Global Step %d (Loss: %.2f)", local_step_value, global_step_value, loss_value) # Polyaxon experiment.log_metrics(step=local_step_value, loss=loss_value) print('training finished')
import argparse import pickle import os import numpy as np from polyaxon.tracking import Run from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import cross_val_score from my_project.data import load_data # Polyaxon experiment = Run() def model(X, y, n_estimators, max_features, min_samples_leaf): classifier = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf) return cross_val_score(classifier, X, y, cv=5), classifier parser = argparse.ArgumentParser() parser.add_argument('--n_estimators', type=int, default=3) parser.add_argument('--max_features', type=int, default=3) parser.add_argument('--min_samples_leaf', type=int, default=80) args = parser.parse_args() (X, y) = load_data() # Polyaxon # https://polyaxon.com/docs/experimentation/tracking/module/#log_data_ref
def __init__(self, run=None, metrics=None): self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run() self.metrics = metrics
metric = mx.metric.Accuracy() for _, batch in enumerate(data_iter): data = batch.data[0].as_in_context(context) label = batch.label[0].as_in_context(context) output = model(data.astype(args.dtype, copy=False)) metric.update([label], [output]) return metric.get() # Initialize Horovod hvd.init() # Polyaxon if hvd.rank() == 0: experiment = Run() # Horovod: pin context to local rank context = mx.cpu(hvd.local_rank()) if args.no_cuda else mx.gpu(hvd.local_rank()) num_workers = hvd.size() # Load training and validation data train_data, val_data = get_mnist_iterator(hvd.rank()) # Build model model = conv_nets() model.cast(args.dtype) model.hybridize() # Define hyper parameters optimizer_params = {'momentum': args.momentum,
return model.evaluate(x_test, y_test)[1] if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--conv1_size', type=int, default=32) parser.add_argument('--conv2_size', type=int, default=64) parser.add_argument('--dropout', type=float, default=0.8) parser.add_argument('--hidden1_size', type=int, default=500) parser.add_argument('--optimizer', type=str, default='adam') parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--epochs', type=int, default=1) args = parser.parse_args() # Polyaxon experiment = Run(project='mnist') experiment.create(tags=['keras']) experiment.log_inputs(conv1_size=args.conv1_size, conv2_size=args.conv2_size, dropout=args.dropout, hidden1_size=args.hidden1_size, optimizer=args.optimizer, log_learning_rate=args.log_learning_rate, epochs=args.epochs) (x_train, y_train), (x_test, y_test) = mnist.load_data() # Polyaxon experiment.log_data_ref(content=x_train, name='x_train') experiment.log_data_ref(content=y_train, name='y_train') experiment.log_data_ref(content=x_test, name='x_test')
parser = argparse.ArgumentParser() parser.add_argument('--conv1_kernel', type=int, default=5) parser.add_argument('--conv1_filters', type=int, default=10) parser.add_argument('--conv1_activation', type=str, default='relu') parser.add_argument('--conv2_kernel', type=int, default=5) parser.add_argument('--conv2_filters', type=int, default=10) parser.add_argument('--conv2_activation', type=str, default='relu') parser.add_argument('--fc1_hidden', type=int, default=10) parser.add_argument('--fc1_activation', type=str, default='relu') parser.add_argument('--optimizer', type=str, default='adam') parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--epochs', type=int, default=1) args = parser.parse_args() experiment = Run(project='mnist') experiment.create(tags=['examples', 'mxnet']) experiment.log_inputs(conv1_kernel=args.conv1_kernel, conv1_filters=args.conv1_filters, conv1_activation=args.conv1_activation, conv2_kernel=args.conv1_kernel, conv2_filters=args.conv1_filters, conv2_activation=args.conv1_activation, fc1_hidden=args.fc1_hidden, fc1_activation=args.fc1_activation, optimizer=args.optimizer, log_learning_rate=args.log_learning_rate, epochs=args.epochs) logger.info('Downloading data ...') mnist = mx.test_utils.get_mnist()
def __init__(self, run, filepath, **kwargs): self.run = run if settings.CLIENT_CONFIG.is_managed: self.run = self.run or Run() super().__init__(filepath, **kwargs)
metavar='N', help='how many batches to wait before logging training status') parser.add_argument('--fp16-allreduce', action='store_true', default=False, help='use fp16 compression during allreduce') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() # Horovod: initialize library. hvd.init() torch.manual_seed(args.seed) # Polyaxon if hvd.rank() == 0: experiment = Run() if args.cuda: # Horovod: pin GPU to local rank. torch.cuda.set_device(hvd.local_rank()) torch.cuda.manual_seed(args.seed) kwargs = {'num_workers': 1, 'pin_memory': True} if args.cuda else {} train_dataset = \ datasets.MNIST('data-%d' % hvd.rank(), train=True, download=True, transform=transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,)) ])) # Horovod: use DistributedSampler to partition the training data.
def main(): args = parse_args() experiment = Run() params = load_values(args.param_file) if params: experiment.log_inputs(**params) metrics = load_values(args.metric_file) if metrics: experiment.log_metrics(**metrics) if args.tag: experiment.log_tags(args.tag) for dataset in load_datasets(args.data_file): experiment.log_data_ref(**dataset) if args.capture_png: imgs = discover_png(experiment.get_outputs_path()) for img in imgs: if isinstance(img, str): experiment.log_image(img) elif isinstance(img, SerialImages): for idx, path in enumerate(img.paths): experiment.log_image(path, name=img.name, step=idx) else: raise NotImplementedError('We should never get here.')
import tensorflow as tf from polyaxon.tracking import Run mnist = tf.keras.datasets.mnist (x_train, y_train), (x_test, y_test) = mnist.load_data() x_train, x_test = x_train / 255.0, x_test / 255.0 experiment = Run(project='mnist', artifacts_path='/tmp/mnist/') experiment.create(tags=['examples', 'tensorflow']) def create_model(): return tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(28, 28)), tf.keras.layers.Dense(512, activation='relu'), tf.keras.layers.Dropout(0.2), tf.keras.layers.Dense(10, activation='softmax') ]) model = create_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x=x_train, y=y_train, epochs=5, validation_data=(x_test, y_test))
parser.add_argument('--pool1_size', type=int, default=2) parser.add_argument('--conv2_size', type=int, default=5) parser.add_argument('--conv2_out', type=int, default=64) parser.add_argument('--conv2_activation', type=str, default='relu') parser.add_argument('--pool2_size', type=int, default=2) parser.add_argument('--dropout', type=float, default=0.2) parser.add_argument('--fc1_size', type=int, default=1024) parser.add_argument('--fc1_activation', type=str, default='sigmoid') parser.add_argument('--optimizer', type=str, default='adam') parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--epochs', type=int, default=1) args = parser.parse_args() # Polyaxon experiment = Run(project='mnist', artifacts_path='/tmp/mnist') experiment.create(tags=['examples', 'tensorflow']) experiment.log_inputs(conv1_size=args.conv1_size, conv1_out=args.conv1_out, conv1_activation=args.conv1_activation, pool1_size=args.pool1_size, conv2_size=args.conv2_size, conv2_out=args.conv2_out, conv2_activation=args.conv2_activation, pool2_size=args.pool2_size, fc1_activation=args.fc1_activation, fc1_size=args.fc1_size, optimizer=args.optimizer, log_learning_rate=args.log_learning_rate, batch_size=args.batch_size, dropout=args.dropout,
parser.add_argument('--skip_top', type=int, default=30, help='Top occurring words to skip') parser.add_argument('--maxlen', type=int, default=100) parser.add_argument('--batch_size', type=int, default=32) parser.add_argument('--num_nodes', type=int, default=8) parser.add_argument('--optimizer', type=str, default='adam') parser.add_argument('--log_learning_rate', type=int, default=-3) parser.add_argument('--dropout', type=float, default=0.8) parser.add_argument('--epochs', type=int, default=1) parser.add_argument('--seed', type=int, default=234) args = parser.parse_args() # Polyaxon experiment = Run(project='bidirectional-lstm') experiment.create(tags=['examples', 'keras']) experiment.log_inputs(max_features=args.max_features, skip_top=args.skip_top, maxlen=args.maxlen, batch_size=args.batch_size, num_nodes=args.num_nodes, optimizer=args.optimizer, log_learning_rate=args.log_learning_rate, dropout=args.dropout, epochs=args.epochs, seed=args.seed) logger.info('Loading data...') (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=args.max_features,