def main(run, data_path, output_path, log_path, layer_width, batch_size, epochs, learning_rate): info('Data') file = download(data_path) with np.load(file) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] print('Drawing samples...') draw_samples(run, log_path, 'training_set', x_train, y_train) draw_samples(run, log_path, 'test_set', x_test, y_test) print('Done!') train_set = x_train.reshape((len(x_train), -1)) / 255. info('Training') # function shape model = keras.Sequential([ keras.layers.Reshape((28, 28, 1)), keras.layers.Conv2D(32, (3, 3), activation='relu'), keras.layers.Conv2D(64, (3, 3), activation='relu'), keras.layers.MaxPooling2D(pool_size=(2, 2)), keras.layers.Dropout(0.25), keras.layers.Flatten(), keras.layers.Dense(layer_width, activation='relu'), keras.layers.Dropout(0.5), keras.layers.Dense(10, activation='softmax') ]) # how to optimize the function model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy', metrics=['accuracy']) # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint( os.path.join(output_path, filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5')) model.fit(train_set, y_train, epochs=epochs, batch_size=batch_size, callbacks=[logaml, checkpoint]) model.summary() # add time prefix folder file_output = os.path.join(output_path, 'latest.hdf5') print('Serializing h5 model to:\n{}'.format(file_output)) model.save(file_output) info('Test') test_set = x_test.reshape((len(x_test), -1)) / 255. test_loss, test_acc = model.evaluate(test_set, y_test) print('\nTest accuracy:', test_acc)
def main( run, data_path, output_path, log_path, layer_width, batch_size, epochs, learning_rate, ): info("Data") file = download(data_path) with np.load(file) as f: x_train, y_train = f["x_train"], f["y_train"] x_test, y_test = f["x_test"], f["y_test"] print("Drawing samples...") draw_samples(run, log_path, "training_set", x_train, y_train) draw_samples(run, log_path, "test_set", x_test, y_test) print("Done!") train_set = x_train.reshape((len(x_train), -1)) / 255.0 info("Training") # function shape model = keras.Sequential([ keras.layers.Dense(layer_width, activation="relu"), keras.layers.Dense(10, activation="softmax"), ]) # how to optimize the function model.compile( optimizer=Adam(learning_rate=learning_rate), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint( os.path.join(output_path, filename + ".e{epoch:02d}-{accuracy:.2f}.hdf5")) model.fit( train_set, y_train, epochs=epochs, batch_size=batch_size, callbacks=[logaml, checkpoint], ) info("Test") test_set = x_test.reshape((len(x_test), -1)) / 255.0 test_loss, test_acc = model.evaluate(test_set, y_test) print("\nTest accuracy:", test_acc)
def main(run, data_path, output_path, log_path, layer_width, batch_size, epochs, learning_rate): info('Data') file = download(data_path) with np.load(file) as f: x_train, y_train = f['x_train'], f['y_train'] x_test, y_test = f['x_test'], f['y_test'] print('Drawing samples...') draw_samples(run, log_path, 'training_set', x_train, y_train) draw_samples(run, log_path, 'test_set', x_test, y_test) print('Done!') train_set = x_train.reshape((len(x_train), -1)) / 255. info('Training') # function shape model = keras.Sequential([ keras.layers.Dense(layer_width, activation='relu'), keras.layers.Dense(10, activation='softmax') ]) # how to optimize the function model.compile(optimizer=Adam(learning_rate=learning_rate), loss='sparse_categorical_crossentropy', metrics=['accuracy']) # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint( os.path.join(output_path, filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5')) model.fit(train_set, y_train, epochs=epochs, batch_size=batch_size, callbacks=[logaml, checkpoint]) info('Test') test_set = x_test.reshape((len(x_test), -1)) / 255. test_loss, test_acc = model.evaluate(test_set, y_test) print('\nTest accuracy:', test_acc)
def main(run, source_path, target_path, epochs, batch, lr): info('Preprocess') print(f'Using Tensorflow v.{tf.__version__}') print( f'GPUs Available: {len(tf.config.experimental.list_physical_devices("GPU"))}' ) if not os.path.exists(target_path): os.makedirs(target_path) # load tfrecord metadata prep_step = os.path.join(source_path, 'metadata.json') with open(prep_step) as f: prep = json.load(f) for i in prep: print('{} => {}'.format(i, prep[i])) labels = prep['categories'] img_shape = (prep['image_size'], prep['image_size'], 3) record_sz = prep['records'] records = os.path.join(source_path, prep['file']) print('Loading {}'.format(records)) with open(records, 'r') as f: filenames = [ os.path.join(source_path, s.strip()) for s in f.readlines() ] print('Splitting data:') train, test = split(filenames) print(' Train: {}'.format(len(train))) print(' Test: {}'.format(len(test))) print('Creating training dataset') train_ds = tf.data.TFRecordDataset(train) train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5) train_ds = train_ds.shuffle(buffer_size=10000) train_ds = train_ds.batch(batch) train_ds = train_ds.prefetch(buffer_size=5) train_ds = train_ds.repeat(epochs) # model info('Creating Model') base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, include_top=False, weights='imagenet', pooling='avg') base_model.trainable = True model = tf.keras.Sequential( [base_model, tf.keras.layers.Dense(len(labels), activation='softmax')]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() # training info('Training') # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint(os.path.join( target_path, filename + '.e{epoch:02d}-{accuracy:.2f}-v{val_accuracy:.2f}.hdf5'), monitor='val_accuracy', save_best_only=True) # using both test and val in this case test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch) test_steps = math.ceil((len(test) * record_sz) / batch) steps_per_epoch = math.ceil((len(train) * record_sz) / batch) history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=[logaml, checkpoint], validation_data=test_ds, validation_steps=test_steps) info('Writing metadata') out_file = os.path.join(target_path, 'metadata.json') output = { 'image_size': prep['image_size'], 'categories': prep['categories'], 'index': prep['index'], 'generated': datetime.now().strftime('%m/%d/%y %H:%M:%S'), 'run': str(run.id) } print('Writing out metadata to {}'.format(out_file)) with open(str(out_file), 'w') as f: json.dump(output, f) print('Done!')
def main(run, data_path, output_path, target_output, epochs, batch, lr): info('Preprocess') # load tfrecord metadata prep_step = os.path.join(output_path, 'prep.json') with open(prep_step) as f: prep = json.load(f) for i in prep: print('{} => {}'.format(i, prep[i])) labels = prep['categories'] img_shape = (prep['image_size'], prep['image_size'], 3) record_sz = prep['records'] records = os.path.join(data_path, prep['file']) print('Loading {}'.format(records)) with open(records, 'r') as f: filenames = [os.path.join(data_path, s.strip()) for s in f.readlines()] print('Splitting data:') train, test, val = split(filenames) print(' Train: {}'.format(len(train))) print(' Test: {}'.format(len(test))) print(' Val: {}'.format(len(val))) print('Creating training dataset') train_ds = tf.data.TFRecordDataset(train) train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5) train_ds = train_ds.shuffle(buffer_size=10000) train_ds = train_ds.batch(batch) train_ds = train_ds.prefetch(buffer_size=5) train_ds = train_ds.repeat(epochs) # model info('Creating Model') #base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, # include_top=False, # weights='imagenet', # pooling='avg') base_model = tf.keras.applications.VGG19(input_shape=img_shape, include_top=False, weights='imagenet', pooling='avg') base_model.trainable = True model = tf.keras.Sequential([ base_model, tf.keras.layers.Dense(len(labels), activation='softmax') ]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() # training info('Training') # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") model_path = os.path.join(target_output, 'model') if not os.path.exists(model_path): os.makedirs(model_path) checkpoint = ModelCheckpoint(os.path.join(model_path, filename + '.e{epoch:02d}-{accuracy:.2f}.hdf5')) test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch) test_steps = math.ceil((len(test)*record_sz)/batch) steps_per_epoch = math.ceil((len(train)*record_sz)/batch) history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=[logaml, checkpoint], validation_data=test_ds, validation_steps=test_steps) print('Done!')
def main( run, data_path, output_path, log_path, layer_width, batch_size, epochs, learning_rate, ): info("Data") file = download(data_path) with np.load(file) as f: x_train, y_train = f["x_train"], f["y_train"] x_test, y_test = f["x_test"], f["y_test"] print("Drawing samples...") draw_samples(run, log_path, "training_set", x_train, y_train) draw_samples(run, log_path, "test_set", x_test, y_test) print("Done!") train_set = x_train.reshape((len(x_train), -1)) / 255.0 info("Training") # function shape model = keras.Sequential([ keras.layers.Reshape((28, 28, 1)), keras.layers.Conv2D(32, (3, 3), activation="relu"), keras.layers.Conv2D(64, (3, 3), activation="relu"), keras.layers.MaxPooling2D(pool_size=(2, 2)), keras.layers.Dropout(0.25), keras.layers.Flatten(), keras.layers.Dense(layer_width, activation="relu"), keras.layers.Dropout(0.5), keras.layers.Dense(10, activation="softmax"), ]) # how to optimize the function model.compile( optimizer=Adam(learning_rate=0.001), loss="sparse_categorical_crossentropy", metrics=["accuracy"], ) # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint( os.path.join(output_path, filename + ".e{epoch:02d}-{accuracy:.2f}.hdf5")) model.fit( train_set, y_train, epochs=epochs, batch_size=batch_size, callbacks=[logaml, checkpoint], ) model.summary() # add time prefix folder file_output = os.path.join(output_path, "latest.hdf5") print("Serializing h5 model to:\n{}".format(file_output)) model.save(file_output) info("Test") test_set = x_test.reshape((len(x_test), -1)) / 255.0 test_loss, test_acc = model.evaluate(test_set, y_test) print("\nTest accuracy:", test_acc)
def main(run, source_path, target_path, epochs, batch, lr): info('Preprocess') if not os.path.exists(target_path): os.makedirs(target_path) # load tfrecord metadata prep_step = os.path.join(source_path, 'metadata.json') with open(prep_step) as f: prep = json.load(f) for i in prep: print('{} => {}'.format(i, prep[i])) if not run.id.startswith('OfflineRun'): run.log('total_records', prep['total_records']) labels = prep['categories'] img_shape = (prep['image_size'], prep['image_size'], 3) record_sz = prep['records'] records = os.path.join(source_path, prep['file']) print('Loading {}'.format(records)) with open(records, 'r') as f: filenames = [ os.path.join(source_path, s.strip()) for s in f.readlines() ] print('Splitting data:') train, test = split(filenames) print(' Train: {}'.format(len(train))) print(' Test: {}'.format(len(test))) print('Creating training dataset') train_ds = tf.data.TFRecordDataset(train) train_ds = train_ds.map(map_func=parse_record, num_parallel_calls=5) train_ds = train_ds.shuffle(buffer_size=10000) train_ds = train_ds.batch(batch) train_ds = train_ds.prefetch(buffer_size=5) train_ds = train_ds.repeat(epochs) # model info('Creating Model') # base_model = tf.keras.applications.MobileNetV2(input_shape=img_shape, # include_top=False, # weights='imagenet', # pooling='avg') base_model = tf.keras.applications.VGG19(input_shape=img_shape, include_top=False, weights='imagenet', pooling='avg') #base_model = tf.keras.applications.ResNet50(input_shape=img_shape, # include_top=False, # weights='imagenet', # pooling='avg') tf.keras.applications base_model.trainable = True # # Transfer learning to boost the model's accuracy # # UnFreeze the following layers # trainableLayers = ['global_average_pooling2d','block5_pool','block5_conv4'] # # Layer names from: # # https://www.tensorflow.org/tutorials/generative/style_transfer # for layer in base_model.layers: # print("Detected layer " + layer.name) # if layer.name in trainableLayers: # print(" - Trainable " + layer.name) # layer.trainable = True # else: # layer.trainable = False model = tf.keras.Sequential( [base_model, tf.keras.layers.Dense(len(labels), activation='softmax')]) model.compile(optimizer=tf.keras.optimizers.Adam(lr=lr), loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.summary() # training info('Training') # callbacks logaml = AMLCallback(run) filename = datetime.now().strftime("%d.%b.%Y.%H.%M") checkpoint = ModelCheckpoint(os.path.join( target_path, filename + '.e{epoch:02d}-{accuracy:.2f}-v{val_accuracy:.2f}.hdf5'), monitor='val_accuracy', save_best_only=True) # using both test and val in this case test_ds = tf.data.TFRecordDataset(test).map(parse_record).batch(batch) test_steps = math.ceil((len(test) * record_sz) / batch) steps_per_epoch = math.ceil((len(train) * record_sz) / batch) history = model.fit(train_ds, epochs=epochs, steps_per_epoch=steps_per_epoch, callbacks=[logaml, checkpoint], validation_data=test_ds, validation_steps=test_steps) info('Writing metadata') out_file = os.path.join(target_path, 'metadata.json') output = { 'image_size': prep['image_size'], 'categories': prep['categories'], 'index': prep['index'], 'generated': datetime.now().strftime('%m/%d/%y %H:%M:%S'), } # If we are online if not run.id.startswith('OfflineRun'): # Log history for i in history.history: # We want to log only the last run metric run.log(i, history.history[i][len(history.history[i]) - 1]) print('Writing out metadata to {}'.format(out_file)) with open(str(out_file), 'w') as f: json.dump(output, f) print('Done!')