def from_class_model(cls, path): path = tb.P(path) data_obj = DataReader.from_saved(path) hp_obj = HyperParam.from_saved(path) model_obj = cls.load_model(path.search('*_save_*')[0]) # static method. wrapper_class = cls(hp_obj, data_obj, model_obj) return wrapper_class
def test_path_module(): # ===================================== File Specs ================================================================ p = tb.P(r"dir1\\dir2\\dir3\\file.ext") assert p[0].string == "dir1" assert p[-1].string == "file.ext" assert p[1].string == "dir2" assert ( p + "_converted.ext3").string == "dir1\\dir2\\dir3\\file_converted.ext3" assert p.append( "_modified").string == "dir1\\dir2\\dir3\\file_modified.ext" assert p.prepend( "modified_").string == "dir1\\dir2\\dir3\\modified_file.ext"
def __init__(self, engine, db=None, sch=None, vws=False): self.eng = engine self.path = tb.P(self.eng.url.database) self.con = self.eng.connect() self.ses = sessionmaker()(bind=self.eng) # ORM style self.db = db self.sch = sch self.vws = vws self.insp = None self.meta = MetaData() self.schema = None self.tables = None self.views = None self.sch_tab = None self.sch_vws = None self.refresh()
def from_class_weights(cls, path, hparam_class=None, data_class=None, device_name=None): path = tb.P(path) if hparam_class is not None: hp_obj = hparam_class.from_saved(path) else: hp_obj = (path / HyperParam.subpath + ".HyperParam.pkl").readit() if device_name: hp_obj.device_name = device_name if data_class is not None: d_obj = data_class.from_saved(path, hp=hp_obj) else: d_obj = (path / DataReader.subpath / "data_reader.DataReader.pkl").readit() d_obj.hp = hp_obj model_obj = cls(hp_obj, d_obj) model_obj.load_weights(path.search('*_save_*')[0]) model_obj.history = (path / "metadata/history.pkl").readit(notfound=tb.L()) print(f"Class {model_obj.__class__} Loaded Successfully.") return model_obj
def from_saved(cls, path, *args, r=False, scope=None, **kwargs): return super(HyperParam, cls).from_saved(path=tb.P(path) / cls.subpath / "hparams.HyperParam.dat.pkl")
def from_saved_weights(cls, parent_dir, model_class): obj = cls(model_class=model_class, path=parent_dir, size=len(tb.P(parent_dir).search('*__model__*'))) obj.models = tb.P(parent_dir).search('*__model__*').apply(model_class.from_class_weights) return obj
def save_dir(self): return tb.P(self.root) / self.name
def from_saved(cls, path, *args, **kwargs): instance = cls(*args, **kwargs) data = (tb.P(path) / cls.subpath / "data_reader.DataReader.dat.pkl").readit() instance.__setstate__(data) return instance
class HyperParam(tb.Struct): """Use this class to organize model hyperparameters: * one place to control everything: a control panel. * When doing multiple experiments, one command in console reminds you of settings used in that run (hp.__dict__). * Ease of saving settings of experiments! and also replicating it later. """ subpath = tb.P('metadata/hyper_params') # location within model directory where this will be saved. def __init__(self, **kwargs): super().__init__( # ==================== Enviroment ========================= name='default_model_name_' + tb.randstr(), root=tb.P.tmp(folder="tmp_models"), pkg_name='tensorflow', device_name=Device.gpu0, # ===================== Data ============================== seed=234, shuffle=True, precision='float32', # ===================== Model ============================= # depth = 3 # ===================== Training ========================== test_split=0.2, # test split learning_rate=0.0005, batch_size=32, epochs=30, ) self._configured = False self.device_name = None self.save_type = ["data", "whole", "both"][-1] self.update(**kwargs) @property def save_dir(self): return tb.P(self.root) / self.name def save(self, path=None, itself=True, r=False, include_code=False, add_suffix=True): self.save_dir.joinpath(self.subpath / 'hparams.txt').create(parent_only=True).write_text(data=str(self)) if self.save_type in {"whole", "both"}: super(HyperParam, self).save(path=self.save_dir.joinpath(self.subpath / "hparams.HyperParam.pkl"), itself=True, add_suffix=False) if self.save_type in {"data", "both"}: super(HyperParam, self).save(path=self.save_dir.joinpath(self.subpath) / "hparams.HyperParam.dat.pkl", itself=False, add_suffix=False) @classmethod def from_saved(cls, path, *args, r=False, scope=None, **kwargs): return super(HyperParam, cls).from_saved(path=tb.P(path) / cls.subpath / "hparams.HyperParam.dat.pkl") def __repr__(self): return tb.Struct(self.__dict__).print(config=True, return_str=True) @property def pkg(self): if self.pkg_name == "tensorflow": handle = __import__("tensorflow") elif self.pkg_name == "torch": handle = __import__("torch") else: raise ValueError(f"pkg_name must be either `tensorflow` or `torch`") return handle @property def device(self): handle = self.pkg if handle.__name__ == 'tensorflow': """ To disable gpu, here's one way: # before importing tensorflow do this: if device == 'cpu': os.environ["CUDA_VISIBLE_DEVICES"] = "-1" handle.device(device) # used as context, every tensor constructed and every computation takes place therein For more manual control, use .cpu() and .gpu('0') .gpu('1') attributes. """ devices = handle.config.experimental.list_physical_devices('CPU') devices += handle.config.experimental.list_physical_devices('GPU') device_dict = dict(zip(['cpu', 'gpu0', 'gpu1'], devices)) if self.device_name is Device.auto: chosen_device = Device.gpu0 if len(devices) > 1 else Device.cpu else: chosen_device = self.device_name device_str = chosen_device.value if 1 > 0 else "haha" if device_str not in device_dict.keys(): for i in range(10): print(f"This machine has no such a device to be chosen! ({device_str})") # Revert to cpu, keep going, instead of throwing an error. device_str = "cpu" try: device = device_dict[device_str] return device except KeyError: # 2gpus not a key in the dict. assert len(handle.config.experimental.get_visible_devices()) > 2 mirrored_strategy = handle.distribute.MirroredStrategy() return mirrored_strategy elif handle.__name__ == 'torch': device = self.device_name if device is Device.auto: return handle.device('cuda:0') if handle.cuda.is_available() else handle.device('cpu') elif device is Device.gpu0: assert handle.cuda.device_count() > 0, f"GPU {device} not available" return handle.device('cuda:0') elif device is Device.gpu1: assert handle.cuda.device_count() > 1, f"GPU {device} not available" return handle.device('cuda:1') elif device is Device.cpu: return handle.device('cpu') # How to run Torch model on 2 GPUs ? else: raise NotImplementedError(f"I don't know how to configure devices for this package {handle}") def config_device(self): """ """ handle = self.pkg device_str = self.device_name.value device = self.device if handle.__name__ == 'torch': return None try: # Now we want only one device to be seen: if device_str in ['gpu0', 'gpu1']: limit_memory = True if limit_memory: # memory growth can only be limited for GPU devices. handle.config.experimental.set_memory_growth(device, True) handle.config.experimental.set_visible_devices(device, 'GPU') # will only see this device # logical_gpus = handle.config.experimental.list_logical_devices('GPU') # now, logical gpu is created only for visible device # print(len(devices), "Physical devices,", len(logical_gpus), "Logical GPU") else: # for cpu devices, we want no gpu to be seen: handle.config.experimental.set_visible_devices([], 'GPU') # will only see this device # logical_gpus = handle.config.experimental.list_logical_devices('GPU') # now, logical gpu is created only for visible device # print(len(devices), "Physical devices,", len(logical_gpus), "Logical GPU") except AssertionError as e: print(e) print(f"Trying again with auto-device {Device.auto}") self.device_name = Device.auto self.config_device() except ValueError: print("Cannot set memory growth on non-GPU devices") except RuntimeError as e: print(e) print(f"Device already configured, skipping ... ")
def save(self, path=None, *args, **kwargs): base = (tb.P(path) if path is not None else self.hp.save_dir).joinpath(self.subpath) if self.hp.save_type in {"whole", "both"}: super(DataReader, self).save(path=base / "data_reader.DataReader.pkl", itself=True, add_suffix=False) if self.hp.save_type in {"data", "both"}: super(DataReader, self).save(path=base / "data_reader.DataReader.dat.pkl", itself=False, add_suffix=False)
class DataReader(tb.Base): subpath = tb.P("metadata/data_reader") """This class holds the dataset for training and testing. However, it also holds meta data for preprocessing and postprocessing. The latter is essential at inference time, but the former need not to be saved. As such, at save time, this class only remember the attributes inside `.specs` `Struct`. Thus, whenever encountering such type of data, make sure to keep them inside that `Struct`. Lastly, for convenience purpose, the class has implemented a fallback `getattr` method that allows accessing those attributes from the class itself, without the need to reference `.dataspects`. """ def __init__(self, hp: HyperParam = None, specs=None, split=None, *args, **kwargs): super().__init__(*args, **kwargs) self.hp = hp self.split = split self.plotter = None # attributes to be saved. self.specs = specs if specs else tb.Struct() self.scaler = None def save(self, path=None, *args, **kwargs): base = (tb.P(path) if path is not None else self.hp.save_dir).joinpath(self.subpath) if self.hp.save_type in {"whole", "both"}: super(DataReader, self).save(path=base / "data_reader.DataReader.pkl", itself=True, add_suffix=False) if self.hp.save_type in {"data", "both"}: super(DataReader, self).save(path=base / "data_reader.DataReader.dat.pkl", itself=False, add_suffix=False) @classmethod def from_saved(cls, path, *args, **kwargs): instance = cls(*args, **kwargs) data = (tb.P(path) / cls.subpath / "data_reader.DataReader.dat.pkl").readit() instance.__setstate__(data) return instance def __getstate__(self): return dict(specs=self.specs, scaler=self.scaler) def __setstate__(self, state): """hp is miassing, deliberate by design.""" return self.__dict__.update(state) def __repr__(self): return f"DataReader Object with these keys: \n" + tb.Struct(self.__dict__).print(config=True, return_str=True) def split_the_data(self, *args, strings=None, **kwargs): """ :param args: whatever to be sent to train_test_split :param kwargs: whatever to be sent to train_test_split :param strings: :return: """ # import sklearn.preprocessing as preprocessing from sklearn.model_selection import train_test_split result = train_test_split(*args, test_size=self.hp.test_split, shuffle=self.hp.shuffle, random_state=self.hp.seed, **kwargs) self.split = tb.Struct(train_loader=None, test_loader=None) if strings is None: strings = ["x", "y"] self.split.update({astring + '_train': result[ii * 2] for ii, astring in enumerate(strings)}) self.split.update({astring + '_test': result[ii * 2 + 1] for ii, astring in enumerate(strings)}) self.specs.ip_shape = self.split.x_train.shape[1:] # useful info for instantiating models. self.specs.op_shape = self.split.y_train.shape[1:] # useful info for instantiating models. print(f"================== Training Data Split ===========================") self.split.print() def sample_dataset(self, aslice=None, dataset="test"): if aslice is None: aslice = slice(0, self.hp.batch_size) # returns a tuple containing a slice of data (x_test, x_test, names_test, index_test etc) keys = self.split.keys().filter(f"'_{dataset}' in x") return tuple([self.split[key][aslice] for key in keys]) def get_random_input_output(self, ip_shape=None, op_shape=None): if ip_shape is None: ip_shape = self.specs.ip_shape if op_shape is None: op_shape = self.specs.op_shape if hasattr(self.hp, "precision"): dtype = self.hp.precision else: dtype = "float32" ip = np.random.randn(*((self.hp.batch_size,) + ip_shape)).astype(dtype) op = np.random.randn(*((self.hp.batch_size,) + op_shape)).astype(dtype) return ip, op def preprocess(self, *args, **kwargs): _ = args, kwargs, self return args[0] # acts like identity. def postprocess(self, *args, **kwargs): _ = args, kwargs, self return args[0] # acts like identity def standardize(self): assert self.split is not None, "Load up the data first." from sklearn.preprocessing import StandardScaler self.scaler = StandardScaler() self.split.x_train = self.scaler.fit_transform(self.split.x_train) self.split.x_test = self.scaler.transform(self.split.x_test) def image_viz(self, pred, gt=None, names=None, **kwargs): """ Assumes numpy inputs """ if gt is None: labels = None self.plotter = tb.ImShow(pred, labels=labels, sup_titles=names, origin='lower', **kwargs) else: labels = ['Reconstruction', 'Ground Truth'] self.plotter = tb.ImShow(pred, gt, labels=labels, sup_titles=names, origin='lower', **kwargs) def viz(self, *args, **kwargs): """Implement here how you would visualize a batch of input and ouput pair. Assume Numpy arguments rather than tensors.""" _ = self, args, kwargs return None
def compute_num_of_lines_of_code_in_repo( path=tb.P.cwd(), extension=".py", r=True, **kwargs): return tb.P(path).search( f"*{extension}", r=r, **kwargs).read_text().splitlines().apply(len).np.sum()
def get_list_of_executables_defined_in_shell(): return tb.L(tb.os.environ["Path"].split(";")).apply( lambda x: tb.P(x).search("*.exe")).flatten().print()
def build_parser(): parser = argparse.ArgumentParser( description="Generic Parser to launch a script in a separate window.") # POSITIONAL ARGUMENT (UNNAMED) parser.add_argument(dest="file", help="Python file path.", default="this") # if dest is not specified, then, it has same path as keyword, e.g. "--dest" # parser.add_argument("--file", "-f", dest="file", help="Python file path.", default="") parser.add_argument("--cmd", "-c", dest="cmd", help="Python command.", default="") # A FLAG: parser.add_argument("--main", help="Flag tells to run the file as main.", action="store_true") # default is False # default is running as module, unless indicated by --main flag, which runs the script as main parser.add_argument("--here", "-H", help="Flag for running in this window.", action="store_true") # default is False parser.add_argument("-s", "--solitary", help="Specify a non-interactive session.", action="store_true") # default is False parser.add_argument("-p", "--python", help="Use python over IPython.", action="store_true") # default is False parser.add_argument("-e", help="Explore the file (what are its contents).", action="store_true") # default is False # OPTIONAL KEYWORD parser.add_argument("--func", "-F", dest="func", help=f"function to be run after import", default="") parser.add_argument( "--terminal", "-t", dest="terminal", help=f"Flag to specify which terminal to be used. Default CMD.", default="") # can choose `wt` parser.add_argument( "--shell", "-S", dest="shell", help=f"Flag to specify which terminal to be used. Default CMD.", default="") args = parser.parse_args() print(f"Crocodile.run: args of the firing command: ") tb.Struct(args.__dict__).print(dtype=False) # if args.cmd == "" and args.file == "": raise ValueError(f"Pass either a command (using -c) or .py file path (-f)") # ================================================================================== if args.main is True and args.file != "": # run the file itself, don't import it. tb.Terminal().run_async(f"ipython", "-i", f"{args.file}", terminal=args.terminal, new_window=not args.here) else: # run as a module (i.e. import it) if args.file != "": # non empty file path: path = tb.P(args.file) if path.suffix == ".py": # ==> a regular path was passed (a\b) ==> converting to: a.b format. if path.is_absolute(): path = path.rel2cwd() path = str((path - path.suffix)).replace(tb.os.sep, ".") else: # It must be that user passed a.b format assert path.exists( ) is False, f"I could not determine whether this is a.b or a/b format." # script = f""" # import importlib # module = importlib.import_module('{path}') # globals().update(module.__dict__) # """ script = fr""" from {path} import * """ script += args.cmd script += "\n" else: script = args.cmd if args.func != "": script += f"tb.E.run_globally({args.func}, globals())" tb.Terminal().run_script(script=script, terminal=args.terminal, new_window=not args.here, interactive=not args.solitary, ipython=not args.python)