def fit(self, viz=False, **kwargs): default_settings = tb.Struct(x=self.data.split.x_train, y=self.data.split.y_train, validation_data=(self.data.split.x_test, self.data.split.y_test), batch_size=self.hp.batch_size, epochs=self.hp.epochs, verbose=1, shuffle=self.hp.shuffle, callbacks=[]) default_settings.update(kwargs) hist = self.model.fit(**default_settings.dict) self.history.append(tb.Struct(tb.copy.deepcopy(hist.history))) # it is paramount to copy, cause source can change. if viz: self.plot_loss() return self
def compile(self, loss=None, optimizer=None, metrics=None, compile_model=True, **kwargs): """ Updates compiler attributes. This acts like a setter. .. note:: * this method is as good as setting attributes of `compiler` directly in case of PyTorch. * In case of TF, this is not the case as TF requires actual futher different compilation before changes take effect. Remember: * Must be run prior to fit method. * Can be run only after defining model attribute. """ pkg = self.hp.pkg if self.hp.pkg_name == 'tensorflow': if loss is None: loss = pkg.keras.losses.MeanSquaredError() if optimizer is None: optimizer = pkg.keras.optimizers.Adam(self.hp.learning_rate) if metrics is None: metrics = tb.List() # [pkg.keras.metrics.MeanSquaredError()] elif self.hp.pkg_name == 'torch': if loss is None: loss = pkg.nn.MSELoss() if optimizer is None: optimizer = pkg.optim.Adam(self.model.parameters(), lr=self.hp.learning_rate) if metrics is None: metrics = tb.List() # [tmp.MeanSquareError()] # Create a new compiler object self.compiler = tb.Struct(loss=loss, optimizer=optimizer, metrics=tb.L(metrics), **kwargs) # in both cases: pass the specs to the compiler if we have TF framework if self.hp.pkg.__name__ == "tensorflow" and compile_model: self.model.compile(**self.compiler.__dict__)
def __init__(self, hp: HyperParam = None, specs=None, split=None, *args, **kwargs): super().__init__(*args, **kwargs) self.hp = hp self.split = split self.plotter = None # attributes to be saved. self.specs = specs if specs else tb.Struct() self.scaler = None
def deduce(self, obj, viz=True, **kwargs): """Assumes that contents of the object are in the form of a batch.""" preprocessed = self.preprocess(obj, **kwargs) prediction = self.infer(preprocessed) postprocessed = self.postprocess(prediction, **kwargs) result = tb.Struct(input=obj, preprocessed=preprocessed, prediction=prediction, postprocessed=postprocessed) if viz: self.viz(postprocessed, **kwargs) return result
def describe_table(self, table, sch=None, dtype=True): print(table.center(100, "=")) self.refresh() tbl = self.meta.tables[table] count = self.ses.query(tbl).count() res = tb.Struct(name=table, count=count, size_mb=count * len(tbl.exported_columns) * 10 / 1e6) res.print(dtype=False, config=True) dat = self.read_table(table=table, sch=sch, size=2) cols = self.get_columns(table, sch=sch) df = pd.DataFrame.from_records(dat, columns=cols) print("SAMPLE:\n", df) if dtype: print("\n") print("DETAILED COLUMNS:\n", tb.pd.DataFrame(self.insp.get_columns(table))) # print("DETAILED COLUMNS:\n", list(self.meta.tables[self._get_table_identifier(table, sch)].columns)) print("\n" * 3)
def split_the_data(self, *args, strings=None, **kwargs): """ :param args: whatever to be sent to train_test_split :param kwargs: whatever to be sent to train_test_split :param strings: :return: """ # import sklearn.preprocessing as preprocessing from sklearn.model_selection import train_test_split result = train_test_split(*args, test_size=self.hp.test_split, shuffle=self.hp.shuffle, random_state=self.hp.seed, **kwargs) self.split = tb.Struct(train_loader=None, test_loader=None) if strings is None: strings = ["x", "y"] self.split.update({astring + '_train': result[ii * 2] for ii, astring in enumerate(strings)}) self.split.update({astring + '_test': result[ii * 2 + 1] for ii, astring in enumerate(strings)}) self.specs.ip_shape = self.split.x_train.shape[1:] # useful info for instantiating models. self.specs.op_shape = self.split.y_train.shape[1:] # useful info for instantiating models. print(f"================== Training Data Split ===========================") self.split.print()
def evaluate(self, x_test=None, y_test=None, names_test=None, idx=None, viz=True, sample=5, **kwargs): # ================= Data Procurement =================================== x_test = x_test if x_test is not None else self.data.split.x_test y_test = y_test if y_test is not None else self.data.split.y_test this = self.data.split.names_test if hasattr(self.data.split, "names_test") else range(len(x_test)) names_test = names_test if names_test is not None else this if idx is None: def get_rand(x, y): idx_ = np.random.choice(len(x) - sample) return x[idx_:idx_ + sample], y[idx_:idx_ + sample], \ names_test[idx_: idx_ + sample], np.arange(idx_, idx_ + sample) assert self.data is not None, 'Data attribute is not defined' x_test, y_test, names_test, idx = get_rand(x_test, y_test) # already processed S's else: if type(idx) is int: assert idx < len(x_test), f"Index passed {idx} exceeds length of x_test {len(x_test)}" x_test, y_test, names_test = x_test[idx: idx + 1], y_test[idx: idx + 1], names_test[idx: idx + 1] # idx = [idx] else: x_test, y_test, names_test = x_test[idx], y_test[idx], names_test[idx] # ========================================================================== prediction = self.infer(x_test) loss_dict = self.get_metrics_evaluations(prediction, y_test) if loss_dict is not None: loss_dict['names'] = names_test pred = self.postprocess(prediction, per_instance_kwargs=dict(name=names_test), legend="Prediction", **kwargs) gt = self.postprocess(y_test, per_instance_kwargs=dict(name=names_test), legend="Ground Truth", **kwargs) results = tb.Struct(pp_prediction=pred, prediction=prediction, input=x_test, pp_gt=gt, gt=y_test, names=names_test, loss_df=loss_dict, ) if viz: loss_name = results.loss_df.columns.to_list()[0] # first loss path loss_label = results.loss_df[loss_name].apply(lambda x: f"{loss_name} = {x}").to_list() names = [f"{aname}. Case: {anindex}" for aname, anindex in zip(loss_label, names_test)] self.viz(pred, gt, names=names, **kwargs) return results
def __repr__(self): return tb.Struct(self.__dict__).print(config=True, return_str=True)
def __repr__(self): return f"DataReader Object with these keys: \n" + tb.Struct(self.__dict__).print(config=True, return_str=True)
def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.dataset = tb.Struct(x=np.random.normal(1000, 10), y=np.random.normal(1000, 1)) self.split_the_data(self.dataset.x, self.dataset.y)
def build_parser(): parser = argparse.ArgumentParser( description="Generic Parser to launch a script in a separate window.") # POSITIONAL ARGUMENT (UNNAMED) parser.add_argument(dest="file", help="Python file path.", default="this") # if dest is not specified, then, it has same path as keyword, e.g. "--dest" # parser.add_argument("--file", "-f", dest="file", help="Python file path.", default="") parser.add_argument("--cmd", "-c", dest="cmd", help="Python command.", default="") # A FLAG: parser.add_argument("--main", help="Flag tells to run the file as main.", action="store_true") # default is False # default is running as module, unless indicated by --main flag, which runs the script as main parser.add_argument("--here", "-H", help="Flag for running in this window.", action="store_true") # default is False parser.add_argument("-s", "--solitary", help="Specify a non-interactive session.", action="store_true") # default is False parser.add_argument("-p", "--python", help="Use python over IPython.", action="store_true") # default is False parser.add_argument("-e", help="Explore the file (what are its contents).", action="store_true") # default is False # OPTIONAL KEYWORD parser.add_argument("--func", "-F", dest="func", help=f"function to be run after import", default="") parser.add_argument( "--terminal", "-t", dest="terminal", help=f"Flag to specify which terminal to be used. Default CMD.", default="") # can choose `wt` parser.add_argument( "--shell", "-S", dest="shell", help=f"Flag to specify which terminal to be used. Default CMD.", default="") args = parser.parse_args() print(f"Crocodile.run: args of the firing command: ") tb.Struct(args.__dict__).print(dtype=False) # if args.cmd == "" and args.file == "": raise ValueError(f"Pass either a command (using -c) or .py file path (-f)") # ================================================================================== if args.main is True and args.file != "": # run the file itself, don't import it. tb.Terminal().run_async(f"ipython", "-i", f"{args.file}", terminal=args.terminal, new_window=not args.here) else: # run as a module (i.e. import it) if args.file != "": # non empty file path: path = tb.P(args.file) if path.suffix == ".py": # ==> a regular path was passed (a\b) ==> converting to: a.b format. if path.is_absolute(): path = path.rel2cwd() path = str((path - path.suffix)).replace(tb.os.sep, ".") else: # It must be that user passed a.b format assert path.exists( ) is False, f"I could not determine whether this is a.b or a/b format." # script = f""" # import importlib # module = importlib.import_module('{path}') # globals().update(module.__dict__) # """ script = fr""" from {path} import * """ script += args.cmd script += "\n" else: script = args.cmd if args.func != "": script += f"tb.E.run_globally({args.func}, globals())" tb.Terminal().run_script(script=script, terminal=args.terminal, new_window=not args.here, interactive=not args.solitary, ipython=not args.python)