예제 #1
0
 def from_class_model(cls, path):
     path = tb.P(path)
     data_obj = DataReader.from_saved(path)
     hp_obj = HyperParam.from_saved(path)
     model_obj = cls.load_model(path.search('*_save_*')[0])  # static method.
     wrapper_class = cls(hp_obj, data_obj, model_obj)
     return wrapper_class
예제 #2
0
def test_path_module():
    # ===================================== File Specs ================================================================

    p = tb.P(r"dir1\\dir2\\dir3\\file.ext")
    assert p[0].string == "dir1"
    assert p[-1].string == "file.ext"
    assert p[1].string == "dir2"

    assert (
        p +
        "_converted.ext3").string == "dir1\\dir2\\dir3\\file_converted.ext3"
    assert p.append(
        "_modified").string == "dir1\\dir2\\dir3\\file_modified.ext"
    assert p.prepend(
        "modified_").string == "dir1\\dir2\\dir3\\modified_file.ext"
예제 #3
0
    def __init__(self, engine, db=None, sch=None, vws=False):
        self.eng = engine
        self.path = tb.P(self.eng.url.database)
        self.con = self.eng.connect()
        self.ses = sessionmaker()(bind=self.eng)  # ORM style
        self.db = db
        self.sch = sch
        self.vws = vws

        self.insp = None
        self.meta = MetaData()
        self.schema = None
        self.tables = None
        self.views = None
        self.sch_tab = None
        self.sch_vws = None
        self.refresh()
예제 #4
0
    def from_class_weights(cls, path, hparam_class=None, data_class=None, device_name=None):
        path = tb.P(path)

        if hparam_class is not None: hp_obj = hparam_class.from_saved(path)
        else: hp_obj = (path / HyperParam.subpath + ".HyperParam.pkl").readit()
        if device_name: hp_obj.device_name = device_name

        if data_class is not None: d_obj = data_class.from_saved(path, hp=hp_obj)
        else: d_obj = (path / DataReader.subpath / "data_reader.DataReader.pkl").readit()
        d_obj.hp = hp_obj

        model_obj = cls(hp_obj, d_obj)
        model_obj.load_weights(path.search('*_save_*')[0])
        model_obj.history = (path / "metadata/history.pkl").readit(notfound=tb.L())

        print(f"Class {model_obj.__class__} Loaded Successfully.")
        return model_obj
예제 #5
0
 def from_saved(cls, path, *args, r=False, scope=None, **kwargs):
     return super(HyperParam, cls).from_saved(path=tb.P(path) / cls.subpath / "hparams.HyperParam.dat.pkl")
예제 #6
0
 def from_saved_weights(cls, parent_dir, model_class):
     obj = cls(model_class=model_class, path=parent_dir, size=len(tb.P(parent_dir).search('*__model__*')))
     obj.models = tb.P(parent_dir).search('*__model__*').apply(model_class.from_class_weights)
     return obj
예제 #7
0
 def save_dir(self):
     return tb.P(self.root) / self.name
예제 #8
0
 def from_saved(cls, path, *args, **kwargs):
     instance = cls(*args, **kwargs)
     data = (tb.P(path) / cls.subpath / "data_reader.DataReader.dat.pkl").readit()
     instance.__setstate__(data)
     return instance
예제 #9
0
class HyperParam(tb.Struct):
    """Use this class to organize model hyperparameters:
    * one place to control everything: a control panel.
    * When doing multiple experiments, one command in console reminds you of settings used in that run (hp.__dict__).
    * Ease of saving settings of experiments! and also replicating it later.
    """
    subpath = tb.P('metadata/hyper_params')  # location within model directory where this will be saved.

    def __init__(self, **kwargs):
        super().__init__(
            # ==================== Enviroment =========================
            name='default_model_name_' + tb.randstr(),
            root=tb.P.tmp(folder="tmp_models"),
            pkg_name='tensorflow',
            device_name=Device.gpu0,
            # ===================== Data ==============================
            seed=234,
            shuffle=True,
            precision='float32',
            # ===================== Model =============================
            # depth = 3
            # ===================== Training ==========================
            test_split=0.2,  # test split
            learning_rate=0.0005,
            batch_size=32,
            epochs=30,
        )
        self._configured = False
        self.device_name = None
        self.save_type = ["data", "whole", "both"][-1]
        self.update(**kwargs)

    @property
    def save_dir(self):
        return tb.P(self.root) / self.name

    def save(self, path=None, itself=True, r=False, include_code=False, add_suffix=True):
        self.save_dir.joinpath(self.subpath / 'hparams.txt').create(parent_only=True).write_text(data=str(self))
        if self.save_type in {"whole", "both"}:
            super(HyperParam, self).save(path=self.save_dir.joinpath(self.subpath / "hparams.HyperParam.pkl"),
                                         itself=True, add_suffix=False)
        if self.save_type in {"data", "both"}:
            super(HyperParam, self).save(path=self.save_dir.joinpath(self.subpath) / "hparams.HyperParam.dat.pkl",
                                         itself=False, add_suffix=False)

    @classmethod
    def from_saved(cls, path, *args, r=False, scope=None, **kwargs):
        return super(HyperParam, cls).from_saved(path=tb.P(path) / cls.subpath / "hparams.HyperParam.dat.pkl")

    def __repr__(self):
        return tb.Struct(self.__dict__).print(config=True, return_str=True)

    @property
    def pkg(self):
        if self.pkg_name == "tensorflow":
            handle = __import__("tensorflow")
        elif self.pkg_name == "torch":
            handle = __import__("torch")
        else:
            raise ValueError(f"pkg_name must be either `tensorflow` or `torch`")
        return handle

    @property
    def device(self):
        handle = self.pkg
        if handle.__name__ == 'tensorflow':
            """
            To disable gpu, here's one way: # before importing tensorflow do this:
            if device == 'cpu':
                os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
            handle.device(device)  # used as context, every tensor constructed and every computation takes place therein
            For more manual control, use .cpu() and .gpu('0') .gpu('1') attributes.
            """
            devices = handle.config.experimental.list_physical_devices('CPU')
            devices += handle.config.experimental.list_physical_devices('GPU')
            device_dict = dict(zip(['cpu', 'gpu0', 'gpu1'], devices))

            if self.device_name is Device.auto:
                chosen_device = Device.gpu0 if len(devices) > 1 else Device.cpu
            else:
                chosen_device = self.device_name

            device_str = chosen_device.value if 1 > 0 else "haha"
            if device_str not in device_dict.keys():
                for i in range(10):
                    print(f"This machine has no such a device to be chosen! ({device_str})")
                # Revert to cpu, keep going, instead of throwing an error.
                device_str = "cpu"

            try:
                device = device_dict[device_str]
                return device
            except KeyError:  # 2gpus not a key in the dict.
                assert len(handle.config.experimental.get_visible_devices()) > 2
                mirrored_strategy = handle.distribute.MirroredStrategy()
                return mirrored_strategy

        elif handle.__name__ == 'torch':
            device = self.device_name
            if device is Device.auto:
                return handle.device('cuda:0') if handle.cuda.is_available() else handle.device('cpu')
            elif device is Device.gpu0:
                assert handle.cuda.device_count() > 0, f"GPU {device} not available"
                return handle.device('cuda:0')
            elif device is Device.gpu1:
                assert handle.cuda.device_count() > 1, f"GPU {device} not available"
                return handle.device('cuda:1')
            elif device is Device.cpu:
                return handle.device('cpu')
            # How to run Torch model on 2 GPUs ?
        else:
            raise NotImplementedError(f"I don't know how to configure devices for this package {handle}")

    def config_device(self):
        """
        """
        handle = self.pkg
        device_str = self.device_name.value
        device = self.device
        if handle.__name__ == 'torch':
            return None
        try:
            # Now we want only one device to be seen:
            if device_str in ['gpu0', 'gpu1']:
                limit_memory = True
                if limit_memory:  # memory growth can only be limited for GPU devices.
                    handle.config.experimental.set_memory_growth(device, True)
                handle.config.experimental.set_visible_devices(device, 'GPU')  # will only see this device
                # logical_gpus = handle.config.experimental.list_logical_devices('GPU')
                # now, logical gpu is created only for visible device
                # print(len(devices), "Physical devices,", len(logical_gpus), "Logical GPU")
            else:  # for cpu devices, we want no gpu to be seen:
                handle.config.experimental.set_visible_devices([], 'GPU')  # will only see this device
                # logical_gpus = handle.config.experimental.list_logical_devices('GPU')
                # now, logical gpu is created only for visible device
                # print(len(devices), "Physical devices,", len(logical_gpus), "Logical GPU")

        except AssertionError as e:
            print(e)
            print(f"Trying again with auto-device {Device.auto}")
            self.device_name = Device.auto
            self.config_device()

        except ValueError:
            print("Cannot set memory growth on non-GPU devices")

        except RuntimeError as e:
            print(e)
            print(f"Device already configured, skipping ... ")
예제 #10
0
 def save(self, path=None, *args, **kwargs):
     base = (tb.P(path) if path is not None else self.hp.save_dir).joinpath(self.subpath)
     if self.hp.save_type in {"whole", "both"}:
         super(DataReader, self).save(path=base / "data_reader.DataReader.pkl", itself=True, add_suffix=False)
     if self.hp.save_type in {"data", "both"}:
         super(DataReader, self).save(path=base / "data_reader.DataReader.dat.pkl", itself=False, add_suffix=False)
예제 #11
0
class DataReader(tb.Base):
    subpath = tb.P("metadata/data_reader")
    """This class holds the dataset for training and testing. However, it also holds meta data for preprocessing
    and postprocessing. The latter is essential at inference time, but the former need not to be saved. As such,
    at save time, this class only remember the attributes inside `.specs` `Struct`. Thus, whenever encountering
    such type of data, make sure to keep them inside that `Struct`. Lastly, for convenience purpose, the class has
    implemented a fallback `getattr` method that allows accessing those attributes from the class itself, without the 
    need to reference `.dataspects`.
    """
    def __init__(self, hp: HyperParam = None, specs=None, split=None, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.hp = hp
        self.split = split
        self.plotter = None
        # attributes to be saved.
        self.specs = specs if specs else tb.Struct()
        self.scaler = None

    def save(self, path=None, *args, **kwargs):
        base = (tb.P(path) if path is not None else self.hp.save_dir).joinpath(self.subpath)
        if self.hp.save_type in {"whole", "both"}:
            super(DataReader, self).save(path=base / "data_reader.DataReader.pkl", itself=True, add_suffix=False)
        if self.hp.save_type in {"data", "both"}:
            super(DataReader, self).save(path=base / "data_reader.DataReader.dat.pkl", itself=False, add_suffix=False)

    @classmethod
    def from_saved(cls, path, *args, **kwargs):
        instance = cls(*args, **kwargs)
        data = (tb.P(path) / cls.subpath / "data_reader.DataReader.dat.pkl").readit()
        instance.__setstate__(data)
        return instance

    def __getstate__(self):
        return dict(specs=self.specs, scaler=self.scaler)

    def __setstate__(self, state):
        """hp is miassing, deliberate by design."""
        return self.__dict__.update(state)

    def __repr__(self):
        return f"DataReader Object with these keys: \n" + tb.Struct(self.__dict__).print(config=True, return_str=True)

    def split_the_data(self, *args, strings=None, **kwargs):
        """
        :param args: whatever to be sent to train_test_split
        :param kwargs: whatever to be sent to train_test_split
        :param strings:
        :return:
        """
        # import sklearn.preprocessing as preprocessing
        from sklearn.model_selection import train_test_split
        result = train_test_split(*args, test_size=self.hp.test_split, shuffle=self.hp.shuffle,
                                  random_state=self.hp.seed, **kwargs)
        self.split = tb.Struct(train_loader=None, test_loader=None)
        if strings is None:
            strings = ["x", "y"]
        self.split.update({astring + '_train': result[ii * 2] for ii, astring in enumerate(strings)})
        self.split.update({astring + '_test': result[ii * 2 + 1] for ii, astring in enumerate(strings)})
        self.specs.ip_shape = self.split.x_train.shape[1:]  # useful info for instantiating models.
        self.specs.op_shape = self.split.y_train.shape[1:]  # useful info for instantiating models.
        print(f"================== Training Data Split ===========================")
        self.split.print()

    def sample_dataset(self, aslice=None, dataset="test"):
        if aslice is None:
            aslice = slice(0, self.hp.batch_size)
        # returns a tuple containing a slice of data (x_test, x_test, names_test, index_test etc)
        keys = self.split.keys().filter(f"'_{dataset}' in x")
        return tuple([self.split[key][aslice] for key in keys])

    def get_random_input_output(self, ip_shape=None, op_shape=None):
        if ip_shape is None:
            ip_shape = self.specs.ip_shape
        if op_shape is None:
            op_shape = self.specs.op_shape
        if hasattr(self.hp, "precision"):
            dtype = self.hp.precision
        else:
            dtype = "float32"
        ip = np.random.randn(*((self.hp.batch_size,) + ip_shape)).astype(dtype)
        op = np.random.randn(*((self.hp.batch_size,) + op_shape)).astype(dtype)
        return ip, op

    def preprocess(self, *args, **kwargs):
        _ = args, kwargs, self
        return args[0]  # acts like identity.

    def postprocess(self, *args, **kwargs):
        _ = args, kwargs, self
        return args[0]  # acts like identity

    def standardize(self):
        assert self.split is not None, "Load up the data first."
        from sklearn.preprocessing import StandardScaler
        self.scaler = StandardScaler()
        self.split.x_train = self.scaler.fit_transform(self.split.x_train)
        self.split.x_test = self.scaler.transform(self.split.x_test)

    def image_viz(self, pred, gt=None, names=None, **kwargs):
        """
        Assumes numpy inputs
        """
        if gt is None:
            labels = None
            self.plotter = tb.ImShow(pred, labels=labels, sup_titles=names, origin='lower', **kwargs)
        else:
            labels = ['Reconstruction', 'Ground Truth']
            self.plotter = tb.ImShow(pred, gt, labels=labels, sup_titles=names, origin='lower', **kwargs)

    def viz(self, *args, **kwargs):
        """Implement here how you would visualize a batch of input and ouput pair.
        Assume Numpy arguments rather than tensors."""
        _ = self, args, kwargs
        return None
예제 #12
0
def compute_num_of_lines_of_code_in_repo(
        path=tb.P.cwd(), extension=".py", r=True, **kwargs):
    return tb.P(path).search(
        f"*{extension}", r=r,
        **kwargs).read_text().splitlines().apply(len).np.sum()
예제 #13
0
def get_list_of_executables_defined_in_shell():
    return tb.L(tb.os.environ["Path"].split(";")).apply(
        lambda x: tb.P(x).search("*.exe")).flatten().print()
예제 #14
0
def build_parser():
    parser = argparse.ArgumentParser(
        description="Generic Parser to launch a script in a separate window.")

    # POSITIONAL ARGUMENT (UNNAMED)
    parser.add_argument(dest="file", help="Python file path.", default="this")
    # if dest is not specified, then, it has same path as keyword, e.g. "--dest"

    # parser.add_argument("--file", "-f", dest="file", help="Python file path.", default="")
    parser.add_argument("--cmd",
                        "-c",
                        dest="cmd",
                        help="Python command.",
                        default="")

    # A FLAG:
    parser.add_argument("--main",
                        help="Flag tells to run the file as main.",
                        action="store_true")  # default is False
    # default is running as module, unless indicated by --main flag, which runs the script as main
    parser.add_argument("--here",
                        "-H",
                        help="Flag for running in this window.",
                        action="store_true")  # default is False
    parser.add_argument("-s",
                        "--solitary",
                        help="Specify a non-interactive session.",
                        action="store_true")  # default is False
    parser.add_argument("-p",
                        "--python",
                        help="Use python over IPython.",
                        action="store_true")  # default is False
    parser.add_argument("-e",
                        help="Explore the file (what are its contents).",
                        action="store_true")  # default is False

    # OPTIONAL KEYWORD
    parser.add_argument("--func",
                        "-F",
                        dest="func",
                        help=f"function to be run after import",
                        default="")
    parser.add_argument(
        "--terminal",
        "-t",
        dest="terminal",
        help=f"Flag to specify which terminal to be used. Default CMD.",
        default="")  # can choose `wt`
    parser.add_argument(
        "--shell",
        "-S",
        dest="shell",
        help=f"Flag to specify which terminal to be used. Default CMD.",
        default="")

    args = parser.parse_args()
    print(f"Crocodile.run: args of the firing command: ")
    tb.Struct(args.__dict__).print(dtype=False)

    # if args.cmd == "" and args.file == "": raise ValueError(f"Pass either a command (using -c) or .py file path (-f)")
    # ==================================================================================

    if args.main is True and args.file != "":  # run the file itself, don't import it.
        tb.Terminal().run_async(f"ipython",
                                "-i",
                                f"{args.file}",
                                terminal=args.terminal,
                                new_window=not args.here)
    else:  # run as a module (i.e. import it)

        if args.file != "":  # non empty file path:

            path = tb.P(args.file)
            if path.suffix == ".py":  # ==> a regular path was passed (a\b) ==> converting to: a.b format.
                if path.is_absolute():
                    path = path.rel2cwd()
                path = str((path - path.suffix)).replace(tb.os.sep, ".")
            else:  # It must be that user passed a.b format
                assert path.exists(
                ) is False, f"I could not determine whether this is a.b or a/b format."
                #         script = f"""
                # import importlib
                # module = importlib.import_module('{path}')
                # globals().update(module.__dict__)
                # """

            script = fr"""
from {path} import *

"""
            script += args.cmd
            script += "\n"
        else:
            script = args.cmd

        if args.func != "":
            script += f"tb.E.run_globally({args.func}, globals())"
        tb.Terminal().run_script(script=script,
                                 terminal=args.terminal,
                                 new_window=not args.here,
                                 interactive=not args.solitary,
                                 ipython=not args.python)