def trainEval(param): pconf = conf(param) #print("======= CREATE MODEL") model, optim = fun.makeModel(pconf, device) #print("======= LOAD DATA") dataloaders, _ = fun.processData(pconf) #print("======= TRAIN MODEL") fun.runTrain(pconf, model, optim, dataloaders, tuneLog=True)
def myRun(conf, k, lock): valid = False while not valid: valid = True lock.acquire() try: print("======= CREATE MODEL {}".format(k)) model, optim = fun.makeModel(conf[k], device) print("======= LOAD DATA {}".format(k)) dataloaders, _ = fun.processData(conf[k]) print("======= TRAIN MODEL {}".format(k)) except RuntimeError as e: print("======= RETRY MODEL {}".format(k)) valid = False lock.release() if valid: fun.runTrain(conf[k], model, optim, dataloaders) lock.acquire() print("======= FINISH MODEL {}".format(k)) lock.release() else: time.sleep(120)
#import os #os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; #os.environ["CUDA_VISIBLE_DEVICES"]="0"; import configurationsPrimeTest as configurations import funPytorch as fun conf = configurations.configRun1 device = "cuda:0" #model.main(configurations.configRun1) print("======= CREATE MODEL") model, optim = fun.makeModel(conf, device) print("======= LOAD DATA") X, y, train, valid, test = fun.processData(conf) print("======= TRAIN MODEL") fun.runTrain(conf, model, optim, X, y, train, valid, test) #print("======= TEST MODEL") #fun.runTest(conf, model, X, y, test) #print("======= SAVE MODEL") #fun.saveModel(conf, model, optim)
#import os #os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; #os.environ["CUDA_VISIBLE_DEVICES"]="1"; import configurations import funPytorch as fun confContinue = configurations.configRun1Continue #confSave = configurations.configRun1Save device = "cuda:0" #model.main(configurations.configRun1) print("======= CREATE MODEL") model, optim = fun.loadModel(confContinue, device) print("======= LOAD DATA") X, y, train, valid, test = fun.processData(confContinue) print("======= TRAIN MODEL") fun.runTrain(confContinue, model, optim, X, y, train, valid, test) #print("======= TEST MODEL") #fun.runTest(confContinue, model, X, y, test) #print("======= SAVE MODEL") #fun.saveModel(confSave, model, optim)
#import os #os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"; #os.environ["CUDA_VISIBLE_DEVICES"]="0"; import sys import socket import configurations import funPytorch as fun import notifier device = "cuda:0" if len(sys.argv) != 2: print("Use {} configName".format(sys.argv[0])) else: conf = getattr(sys.modules['configurations'], sys.argv[1]) print("====================") print("RUN USING {}".format(sys.argv[1])) print("====================") print("======= CREATE MODEL") model, optim = fun.makeModel(conf, device) print("======= LOAD DATA") dataloaders, _ = fun.processData(conf) print("======= TRAIN MODEL") fun.runTrain(conf, model, optim, dataloaders, verbose=True) notifier.sendMessage("Training of {} finished on {}".format( sys.argv[1], socket.gethostname()))