def load_tensorflow(self, model_file, serialization_mode, init_args): import tensorflow as tf import numpy as np import pandas as pd from builtin_models.tensorflow import save_model model_folder = os.path.dirname(os.path.abspath( model_file)) if os.path.isfile(model_file) else model_file print(f'IN_MODEL_PATH = {model_folder}') print(f'IN_MODLE_FILES = {os.listdir(model_folder)}') tf_graph = tf.Graph() session = tf.Session(graph=tf_graph) graph_tags, signature_def_key = self.parse_tensorflow_init_args( init_args) print( f'tags = {graph_tags}, def_key = {signature_def_key}, model_folder = {model_folder}' ) graph_def = tf.saved_model.load(session, graph_tags, model_folder) print(f'graph_def = {graph_def}') signature_def = graph_def.signature_def[signature_def_key] input_tensors = signature_def.inputs output_tensors = signature_def.outputs save_model(session, input_tensors, output_tensors, path=self.out_model_path)
def load_keras(self, model_file, serialization_mode): import keras from keras.models import load_model from builtin_models.keras import save_model model = load_model(model_file) save_model(model, self.out_model_path)
def load_sklearn(self, model_file, serialization_mode): import pickle import joblib from builtin_models.sklearn import save_model model = None try: model = joblib.load(model_file) except: with open(model_file, 'rb') as fp: model = pickle(fp) save_model(model, self.out_model_path)
def run_pipeline(action, model_path): input_size = 784 # img_size = (28,28) ---> 28*28=784 in total hidden_size = 500 # number of nodes at hidden layer num_classes = 10 # number of output classes discrete range [0,9] num_epochs = 5 # number of times which the entire dataset is passed throughout the model batch_size = 64 # the size of input data took for one iteration lr = 1e-3 # size of step train_data = dsets.MNIST(root='./data', train=True, transform=transforms.ToTensor(), download=True) test_data = dsets.MNIST(root='./data', train=False, transform=transforms.ToTensor()) train_gen = torch.utils.data.DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True) test_gen = torch.utils.data.DataLoader(dataset=test_data, batch_size=batch_size, shuffle=False) net = MnistNet(input_size, hidden_size, num_classes) device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'DEVICE={device}') print(f'os.environ={os.environ}') net = net.to(device) loss_function = nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(net.parameters(), lr=lr) for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_gen): images = Variable(images.view(-1, 28 * 28)).to(device) labels = Variable(labels).to(device) optimizer.zero_grad() outputs = net(images) loss = loss_function(outputs, labels) loss.backward() optimizer.step() if (i + 1) % 100 == 0: print('Epoch [%d/%d], Step [%d/%d]' % (epoch + 1, num_epochs, i + 1, len(train_data) // batch_size)) save_model(net, model_path, conda_env=None) print("save_model Done")
def load_pytorch(model_file, serialization, out_model_path): from builtin_models.pytorch import save_model if serialization == 'cloudpickle': print(f'model loading(cloudpickle): {model_file} to {out_model_path}') import cloudpickle with open(model_file, 'rb') as fp: model = cloudpickle.load(fp) save_model(model, out_model_path) print(f'model loaded: {out_model_path}') elif serialization == 'savedmodel': pass else: pass
def entrance(model_path='pretrained', data_path='', save_path='saved_model', label_map_path='saved_path', model_type='densenet201', pretrained=True, memory_efficient=False, num_classes=2, epochs=100, batch_size=16, learning_rate=0.001, random_seed=231, patience=2): train_transforms, test_transforms = get_transform() train_set = datasets.ImageFolder(data_path, transform=train_transforms) valid_set = datasets.ImageFolder(data_path, transform=test_transforms) indices = torch.randperm(len(train_set)) valid_size = len(train_set) // 10 train_indices = indices[:len(indices) - valid_size] valid_indices = indices[len(indices) - valid_size:] train_set = torch.utils.data.Subset(train_set, train_indices) valid_set = torch.utils.data.Subset(valid_set, valid_indices) model = MyDenseNet(model_type=model_type, model_path=model_path, pretrained=pretrained, memory_efficient=memory_efficient, classes=num_classes) os.makedirs(save_path, exist_ok=True) train(model=model, train_set=train_set, valid_set=valid_set, save_path=save_path, epochs=epochs, batch_size=batch_size, lr=learning_rate, random_seed=random_seed, patience=patience) # current_dir = os.path.dirname(os.path.realpath(__file__)) # dependencies = [os.path.join(current_dir, filename) for filename in ["densenet.py", "utils.py"]] # save_model(model, save_path, dependencies=dependencies) save_model(model, save_path, dependencies=[]) os.makedirs(label_map_path, exist_ok=True) copyfile(os.path.join(data_path, 'index_to_label.json'), os.path.join(label_map_path, 'index_to_label.json')) logger.info('This experiment has been completed.')
def load_keras(model_file, serialization, out_model_path): from builtin_models.keras import save_model, load_model_from_local_file model = load_model_from_local_file(model_file) path = './model' save_model(model, path)
def train(model, train_set, valid_set, test_set, save_path, num_classes, epochs, batch_size, lr=0.001, wd=0.0001, momentum=0.9, random_seed=None, model_type='densenet201', memory_efficient=False, label_list=None, dependencies=[]): if random_seed is not None: if torch.cuda.is_available(): if torch.cuda.device_count() > 1: torch.cuda.manual_seed_all(random_seed) else: torch.cuda.manual_seed(random_seed) else: torch.manual_seed(random_seed) train_loader = torch.utils.data.DataLoader( train_set, batch_size=batch_size, shuffle=True, pin_memory=(torch.cuda.is_available()), num_workers=0) test_loader = torch.utils.data.DataLoader( test_set, batch_size=batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=0) if valid_set is None: valid_loader = None else: valid_loader = torch.utils.data.DataLoader( valid_set, batch_size=batch_size, shuffle=False, pin_memory=(torch.cuda.is_available()), num_workers=0) if torch.cuda.is_available(): model = model.cuda() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=momentum, nesterov=True, weight_decay=wd) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=[0.5 * epochs, 0.75 * epochs], gamma=0.1) with open(os.path.join(save_path, 'results.csv'), 'w') as f: f.write( 'epoch,train_loss,train_error,valid_loss,valid_error,test_error\n') best_error = 1 for epoch in range(epochs): scheduler.step() _, train_loss, train_error = train_epoch(model=model, loader=train_loader, optimizer=optimizer, epoch=epoch, epochs=epochs) _, valid_loss, valid_error = test( model=model, loader=valid_loader if valid_loader else test_loader, is_test=not valid_loader) # Determine if model is the best if valid_loader: if valid_error < best_error: best_error = valid_error print('New best error: {:.4f}'.format(best_error)) if torch.cuda.device_count() > 1: torch.save(model.module.state_dict(), os.path.join(save_path, 'model.pth')) else: torch.save(model.state_dict(), os.path.join(save_path, 'model.pth')) else: if torch.cuda.device_count() > 1: torch.save(model.module.state_dict(), os.path.join(save_path, 'model.pth')) else: torch.save(model.state_dict(), os.path.join(save_path, 'model.pth')) from builtin_models.pytorch import save_model curr_path = "." print(f'CURRENT PATH = {os.path.abspath(curr_path)}') print(f'CURRENT FOLDER = {os.listdir(curr_path)}') print(f'DEPENDENCIES = {dependencies}') for dep in dependencies: print(f'{dep} exists: {os.path.exists(dep)}') save_model(model, save_path, dependencies=dependencies) print(f'MODEL PATH(OUT) = {save_path}') print(f'MODEL FOLDER(OUT) = {os.listdir(save_path)}') # Log results with open(os.path.join(save_path, 'results.csv'), 'a') as f: f.write('{:3d},{:.6f},{:.6f},{:.5f},{:.5f},\n'.format( epoch + 1, train_loss, train_error, valid_loss, valid_error)) model = MyDenseNet(model_type=model_type, pretrained=False, memory_efficient=memory_efficient, classes=num_classes) model.load_state_dict( torch.load(os.path.join(save_path, 'model.pth'), map_location='cpu')) if torch.cuda.is_available(): model = model.cuda() if torch.cuda.device_count() > 1: model = torch.nn.DataParallel(model).cuda() test_results = test(model=model, loader=test_loader, is_test=True) _, _, test_error = test_results with open(os.path.join(save_path, 'results.csv'), 'a') as f: f.write(',,,,,{:.5f}\n'.format(test_error)) print('Final test error: {:.4f}'.format(test_error))
def load_pytorch(model_file, serialization, out_model_path, model_class_file, init_args=None): import torch from builtin_models.pytorch import save_model dependencies = [] modules = {} if model_class_file: dependencies.append(model_class_file) basepath = os.path.dirname(model_class_file) or '.' modules = load_scripts(basepath) class_name, init_args = parse_init(init_args) model = None device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'DEVICE={device}') if serialization == 'cloudpickle': print(f'model loading(cloudpickle): {model_file} to {out_model_path}') import cloudpickle retry = True while retry: try: with open(model_file, 'rb') as fp: model = cloudpickle.load(fp) retry = False except ModuleNotFoundError as ex: name = ex.name.rpartition('.')[-1] if name in modules: sys.modules[ex.name] = modules[name] retry = True else: raise ex elif serialization == 'savedmodel': print(f'model loading(savedmodel): {model_file} to {out_model_path}') retry = True while retry: try: with open(model_file, 'rb') as fp: model = torch.load(model_file, map_location=device) retry = False except ModuleNotFoundError as ex: name = ex.name.rpartition('.')[-1] if name in modules: sys.modules[ex.name] = modules[name] retry = True else: raise ex elif serialization == 'statedict': print(f'model loading(statedict): {model_file} to {out_model_path}') model_class = None for module in modules.values(): model_class = getattr(module, class_name) if model_class: break if not model_class: raise NotImplementedError print(f'init_args = {init_args}') if init_args: model = model_class(**init_args) else: model = model_class() print(f'MODEL1 = {model}') model.load_state_dict(torch.load(model_file, map_location=device)) print(f'MODEL2 = {model}') else: raise NotImplementedError print(f'model loaded: {out_model_path}') print(f'model={model}, dependencies={dependencies}') save_model(model, out_model_path, dependencies=dependencies) print(f'MODEL_FOLDER: {os.listdir(out_model_path)}')
def load_pytorch(self, model_file, serialization_mode, init_args): import torch from builtin_models.pytorch import save_model dependencies = [] model = None device = 'cuda' if torch.cuda.is_available() else 'cpu' print(f'DEVICE={device}') if serialization_mode == 'cloudpickle': print(f'CLOUDPICKLE: {model_file} to {self.out_model_path}') import cloudpickle retry = True while retry: try: with open(model_file, 'rb') as fp: model = cloudpickle.load(fp) retry = False except ModuleNotFoundError as ex: name = ex.name.rpartition('.')[-1] if name in modules: sys.modules[ex.name] = modules[name] retry = True else: raise ex elif serialization_mode == 'savedmodel': print(f'SAVEDMODEL: {model_file} to {self.out_model_path}') retry = True while retry: try: with open(model_file, 'rb') as fp: model = torch.load(model_file, map_location=device) retry = False except ModuleNotFoundError as ex: name = ex.name.rpartition('.')[-1] if name in modules: sys.modules[ex.name] = modules[name] retry = True else: raise ex elif serialization_mode == 'statedict': print(f'STATEDICT: {model_file} to {self.out_model_path}') class_name, init_args = self.parse_pytorch_init_args(init_args) if not class_name: if len(self.modules) == 1: class_name = list(self.modules.keys())[0] else: raise NotImplementedError print(f'CLASS_NAME={class_name}') model_class = None for module in self.modules.values(): model_class = getattr(module, class_name, None) if model_class: break if not model_class: raise NotImplementedError print(f'INIT = {init_args}') if init_args: model = model_class(**init_args) else: model = model_class() model.load_state_dict(torch.load(model_file, map_location=device)) else: raise NotImplementedError save_model(model, self.out_model_path, dependencies=self.dependencies)