예제 #1
0
 def fit(self, viz=False, **kwargs):
     default_settings = tb.Struct(x=self.data.split.x_train, y=self.data.split.y_train,
                                  validation_data=(self.data.split.x_test, self.data.split.y_test),
                                  batch_size=self.hp.batch_size, epochs=self.hp.epochs, verbose=1,
                                  shuffle=self.hp.shuffle, callbacks=[])
     default_settings.update(kwargs)
     hist = self.model.fit(**default_settings.dict)
     self.history.append(tb.Struct(tb.copy.deepcopy(hist.history)))
     # it is paramount to copy, cause source can change.
     if viz:
         self.plot_loss()
     return self
예제 #2
0
    def compile(self, loss=None, optimizer=None, metrics=None, compile_model=True, **kwargs):
        """ Updates compiler attributes. This acts like a setter.

        .. note:: * this method is as good as setting attributes of `compiler` directly in case of PyTorch.
                  * In case of TF, this is not the case as TF requires actual futher different
                    compilation before changes take effect.

        Remember:

        * Must be run prior to fit method.
        * Can be run only after defining model attribute.

        """
        pkg = self.hp.pkg
        if self.hp.pkg_name == 'tensorflow':
            if loss is None:
                loss = pkg.keras.losses.MeanSquaredError()
            if optimizer is None:
                optimizer = pkg.keras.optimizers.Adam(self.hp.learning_rate)
            if metrics is None:
                metrics = tb.List()  # [pkg.keras.metrics.MeanSquaredError()]
        elif self.hp.pkg_name == 'torch':
            if loss is None:
                loss = pkg.nn.MSELoss()
            if optimizer is None:
                optimizer = pkg.optim.Adam(self.model.parameters(), lr=self.hp.learning_rate)
            if metrics is None:
                metrics = tb.List()  # [tmp.MeanSquareError()]
        # Create a new compiler object
        self.compiler = tb.Struct(loss=loss, optimizer=optimizer, metrics=tb.L(metrics), **kwargs)

        # in both cases: pass the specs to the compiler if we have TF framework
        if self.hp.pkg.__name__ == "tensorflow" and compile_model:
            self.model.compile(**self.compiler.__dict__)
예제 #3
0
 def __init__(self, hp: HyperParam = None, specs=None, split=None, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.hp = hp
     self.split = split
     self.plotter = None
     # attributes to be saved.
     self.specs = specs if specs else tb.Struct()
     self.scaler = None
예제 #4
0
 def deduce(self, obj, viz=True, **kwargs):
     """Assumes that contents of the object are in the form of a batch."""
     preprocessed = self.preprocess(obj, **kwargs)
     prediction = self.infer(preprocessed)
     postprocessed = self.postprocess(prediction, **kwargs)
     result = tb.Struct(input=obj, preprocessed=preprocessed, prediction=prediction, postprocessed=postprocessed)
     if viz:
         self.viz(postprocessed, **kwargs)
     return result
예제 #5
0
 def describe_table(self, table, sch=None, dtype=True):
     print(table.center(100, "="))
     self.refresh()
     tbl = self.meta.tables[table]
     count = self.ses.query(tbl).count()
     res = tb.Struct(name=table,
                     count=count,
                     size_mb=count * len(tbl.exported_columns) * 10 / 1e6)
     res.print(dtype=False, config=True)
     dat = self.read_table(table=table, sch=sch, size=2)
     cols = self.get_columns(table, sch=sch)
     df = pd.DataFrame.from_records(dat, columns=cols)
     print("SAMPLE:\n", df)
     if dtype:
         print("\n")
         print("DETAILED COLUMNS:\n",
               tb.pd.DataFrame(self.insp.get_columns(table)))
         # print("DETAILED COLUMNS:\n", list(self.meta.tables[self._get_table_identifier(table, sch)].columns))
     print("\n" * 3)
예제 #6
0
 def split_the_data(self, *args, strings=None, **kwargs):
     """
     :param args: whatever to be sent to train_test_split
     :param kwargs: whatever to be sent to train_test_split
     :param strings:
     :return:
     """
     # import sklearn.preprocessing as preprocessing
     from sklearn.model_selection import train_test_split
     result = train_test_split(*args, test_size=self.hp.test_split, shuffle=self.hp.shuffle,
                               random_state=self.hp.seed, **kwargs)
     self.split = tb.Struct(train_loader=None, test_loader=None)
     if strings is None:
         strings = ["x", "y"]
     self.split.update({astring + '_train': result[ii * 2] for ii, astring in enumerate(strings)})
     self.split.update({astring + '_test': result[ii * 2 + 1] for ii, astring in enumerate(strings)})
     self.specs.ip_shape = self.split.x_train.shape[1:]  # useful info for instantiating models.
     self.specs.op_shape = self.split.y_train.shape[1:]  # useful info for instantiating models.
     print(f"================== Training Data Split ===========================")
     self.split.print()
예제 #7
0
    def evaluate(self, x_test=None, y_test=None, names_test=None, idx=None, viz=True, sample=5, **kwargs):
        # ================= Data Procurement ===================================
        x_test = x_test if x_test is not None else self.data.split.x_test
        y_test = y_test if y_test is not None else self.data.split.y_test
        this = self.data.split.names_test if hasattr(self.data.split, "names_test") else range(len(x_test))
        names_test = names_test if names_test is not None else this
        if idx is None:
            def get_rand(x, y):
                idx_ = np.random.choice(len(x) - sample)
                return x[idx_:idx_ + sample], y[idx_:idx_ + sample], \
                    names_test[idx_: idx_ + sample], np.arange(idx_, idx_ + sample)

            assert self.data is not None, 'Data attribute is not defined'
            x_test, y_test, names_test, idx = get_rand(x_test, y_test)  # already processed S's
        else:
            if type(idx) is int:
                assert idx < len(x_test), f"Index passed {idx} exceeds length of x_test {len(x_test)}"
                x_test, y_test, names_test = x_test[idx: idx + 1], y_test[idx: idx + 1], names_test[idx: idx + 1]
                # idx = [idx]
            else:
                x_test, y_test, names_test = x_test[idx], y_test[idx], names_test[idx]
        # ==========================================================================

        prediction = self.infer(x_test)
        loss_dict = self.get_metrics_evaluations(prediction, y_test)
        if loss_dict is not None:
            loss_dict['names'] = names_test
        pred = self.postprocess(prediction, per_instance_kwargs=dict(name=names_test), legend="Prediction", **kwargs)
        gt = self.postprocess(y_test, per_instance_kwargs=dict(name=names_test), legend="Ground Truth", **kwargs)
        results = tb.Struct(pp_prediction=pred, prediction=prediction, input=x_test, pp_gt=gt, gt=y_test,
                            names=names_test, loss_df=loss_dict, )
        if viz:
            loss_name = results.loss_df.columns.to_list()[0]  # first loss path
            loss_label = results.loss_df[loss_name].apply(lambda x: f"{loss_name} = {x}").to_list()
            names = [f"{aname}. Case: {anindex}" for aname, anindex in zip(loss_label, names_test)]
            self.viz(pred, gt, names=names, **kwargs)
        return results
예제 #8
0
 def __repr__(self):
     return tb.Struct(self.__dict__).print(config=True, return_str=True)
예제 #9
0
 def __repr__(self):
     return f"DataReader Object with these keys: \n" + tb.Struct(self.__dict__).print(config=True, return_str=True)
예제 #10
0
 def __init__(self, *args, **kwargs):
     super().__init__(*args, **kwargs)
     self.dataset = tb.Struct(x=np.random.normal(1000, 10),
                              y=np.random.normal(1000, 1))
     self.split_the_data(self.dataset.x, self.dataset.y)
예제 #11
0
def build_parser():
    parser = argparse.ArgumentParser(
        description="Generic Parser to launch a script in a separate window.")

    # POSITIONAL ARGUMENT (UNNAMED)
    parser.add_argument(dest="file", help="Python file path.", default="this")
    # if dest is not specified, then, it has same path as keyword, e.g. "--dest"

    # parser.add_argument("--file", "-f", dest="file", help="Python file path.", default="")
    parser.add_argument("--cmd",
                        "-c",
                        dest="cmd",
                        help="Python command.",
                        default="")

    # A FLAG:
    parser.add_argument("--main",
                        help="Flag tells to run the file as main.",
                        action="store_true")  # default is False
    # default is running as module, unless indicated by --main flag, which runs the script as main
    parser.add_argument("--here",
                        "-H",
                        help="Flag for running in this window.",
                        action="store_true")  # default is False
    parser.add_argument("-s",
                        "--solitary",
                        help="Specify a non-interactive session.",
                        action="store_true")  # default is False
    parser.add_argument("-p",
                        "--python",
                        help="Use python over IPython.",
                        action="store_true")  # default is False
    parser.add_argument("-e",
                        help="Explore the file (what are its contents).",
                        action="store_true")  # default is False

    # OPTIONAL KEYWORD
    parser.add_argument("--func",
                        "-F",
                        dest="func",
                        help=f"function to be run after import",
                        default="")
    parser.add_argument(
        "--terminal",
        "-t",
        dest="terminal",
        help=f"Flag to specify which terminal to be used. Default CMD.",
        default="")  # can choose `wt`
    parser.add_argument(
        "--shell",
        "-S",
        dest="shell",
        help=f"Flag to specify which terminal to be used. Default CMD.",
        default="")

    args = parser.parse_args()
    print(f"Crocodile.run: args of the firing command: ")
    tb.Struct(args.__dict__).print(dtype=False)

    # if args.cmd == "" and args.file == "": raise ValueError(f"Pass either a command (using -c) or .py file path (-f)")
    # ==================================================================================

    if args.main is True and args.file != "":  # run the file itself, don't import it.
        tb.Terminal().run_async(f"ipython",
                                "-i",
                                f"{args.file}",
                                terminal=args.terminal,
                                new_window=not args.here)
    else:  # run as a module (i.e. import it)

        if args.file != "":  # non empty file path:

            path = tb.P(args.file)
            if path.suffix == ".py":  # ==> a regular path was passed (a\b) ==> converting to: a.b format.
                if path.is_absolute():
                    path = path.rel2cwd()
                path = str((path - path.suffix)).replace(tb.os.sep, ".")
            else:  # It must be that user passed a.b format
                assert path.exists(
                ) is False, f"I could not determine whether this is a.b or a/b format."
                #         script = f"""
                # import importlib
                # module = importlib.import_module('{path}')
                # globals().update(module.__dict__)
                # """

            script = fr"""
from {path} import *

"""
            script += args.cmd
            script += "\n"
        else:
            script = args.cmd

        if args.func != "":
            script += f"tb.E.run_globally({args.func}, globals())"
        tb.Terminal().run_script(script=script,
                                 terminal=args.terminal,
                                 new_window=not args.here,
                                 interactive=not args.solitary,
                                 ipython=not args.python)