def get_insights_adaccount(self, account_id: str = 'act_1377918625828738', time_increment: int = 1, date_preset: str = None, time_range: dict = None): res = self.request_facebook(edge=f'{account_id}/insights', params={'limit': '200'}, level=['campaign'], time_increment=time_increment, fields=[ 'impressions', 'spend', 'campaign_name', 'clicks', 'conversions', 'ctr', 'conversion_values' ], breakdown=['age'], date_preset=date_preset, time_range=time_range) res = utils.flatten_dict(res) res = [ utils.dict_key_val_pair_eliminate( dct, pair_id_re='(conversions_|conversion_values_)\d{1}', key_id_re='action_type', val_id_re='value') for dct in res ] df = pd.DataFrame(res) # pivot with aggregation to delete index_cols = ['campaign_name', 'date_start', 'date_stop'] other_static_fields = ['ctr', 'clicks', 'spend', 'impressions'] if any( ['offsite_conversion.fb_pixel_custom' in col for col in list(df)]): df = pd.wide_to_long( df, stubnames=['offsite_conversion.fb_pixel_custom'], i=index_cols, j='conversion_name', sep='.', suffix='(?!\.)[a-zA-Z\d_]*$').reset_index() df['conversion_name'] = df['conversion_name'].str.extract( '(.*)(?=_\d)') df = pd.pivot_table(df, values='offsite_conversion.fb_pixel_custom', columns=['conversion_name'], index=index_cols + other_static_fields, aggfunc=np.sum) df = pd.DataFrame(df.to_records()) if 'level_0' in list(df): df = df.drop('level_0', axis=1) else: df.columns = [ re.search('(^.*?)(?=_\d|$)', col)[0] for col in df.columns ] return df
def all_relevant_params(self): res = [] for exp_report in self.exp_reports: flat_params = flatten_dict(exp_report["raw_params"]) relevant_params = self.relevant_params(flat_params) for param in relevant_params: if param not in res: res.append(param) return res
def create_experiment_params(self): Print.point("Generating Experiments") for key in param_grid.keys(): if key not in self.params: self.params[key] = param_grid[key] exp_params_list = self.recurse_flatten(self.params) for params in exp_params_list: pipeline_items = params["preprocessor"].split(";") pipeline_items.append(params["classifier"]) self.pipeline_items = list( set(self.pipeline_items + pipeline_items)) for key, val in conditional_param_grid.items(): key = key if key in pipeline_items: if isinstance(val, dict): for val_key, val_val in val.items(): if key in self.params: if val_key in self.params[key]: params[key][val_key] = self.params[key][ val_key] else: params[key][val_key] = val_val else: params[key] = val else: params[key] = self.params[ key] if key in self.params else val else: if key in params: del params[key] exp_params_list = self.recurse_flatten(exp_params_list) # The following two lines remove duplicate configurations out = [] for v in exp_params_list: if v not in out: out.append(v) exp_params_list = out # set_of_jsons = {json.dumps(d, sort_keys=True) for d in exp_params_list} # exp_params_list = [json.loads(t) for t in set_of_jsons] Print.start("") print(pd.DataFrame([flatten_dict(e) for e in exp_params_list])) print("\n\n") self.exp_params_list = exp_params_list
def main(cfg: DictConfig) -> None: print(cfg.pretty()) neptune_logger = CustomNeptuneLogger(params=flatten_dict( OmegaConf.to_container(cfg, resolve=True)), **cfg.logging.neptune_logger) tb_logger = loggers.TensorBoardLogger(**cfg.logging.tb_logger) lr_logger = LearningRateLogger() # TODO change to cyclicLR per epochs my_callback = MyCallback(cfg) model = get_model(cfg) if cfg.model.ckpt_path is not None: ckpt_pth = glob.glob(utils.to_absolute_path(cfg.model.ckpt_path)) model = load_pytorch_model(ckpt_pth[0], model) seed_everything(2020) # TODO change to enable logging losses lit_model = O2UNetSystem(hparams=cfg, model=model) checkpoint_callback_conf = OmegaConf.to_container( cfg.callbacks.model_checkpoint, resolve=True) checkpoint_callback = ModelCheckpoint(**checkpoint_callback_conf) early_stop_callback_conf = OmegaConf.to_container(cfg.callbacks.early_stop, resolve=True) early_stop_callback = EarlyStopping(**early_stop_callback_conf) trainer = Trainer( checkpoint_callback=checkpoint_callback, early_stop_callback=early_stop_callback, logger=[tb_logger, neptune_logger], # logger=[tb_logger], callbacks=[lr_logger, my_callback], **cfg.trainer) # TODO change to train with all data datasets = get_datasets(OmegaConf.to_container(cfg, resolve=True)) train_dataset = datasets["train"] valid_dataset = datasets["valid"] trainer.fit( lit_model, train_dataloader=DataLoader(train_dataset, **cfg["training"]["dataloader"]["train"]), val_dataloaders=DataLoader(valid_dataset, **cfg["training"]["dataloader"]["valid"]))
def param_performance(self, param): res = {} for exp_report in self.exp_reports: flat_params = flatten_dict(exp_report["raw_params"]) if param in flat_params: param_val = flat_params[param] if param_val not in res: res[param_val] = [] res[param_val].append(exp_report["accuracy"]) for key, val in res.items(): res[key] = np.mean(val) return res
def change_to_right_form(self, job): norm_job = self.standard_sample.copy() flatten_job = flatten_dict(job) for key, value in self.map_schema.items(): real_value = flatten_job.get(key) if real_value is None: continue else: attribute = norm_job for attribute_level in value[:-1]: attribute = attribute.get(attribute_level) if type(real_value) is str: attribute[value[-1]] = re.sub(r'<[^<>]*>', '', str(real_value)) elif type(attribute[value[-1]]) == dict and type( real_value) == list: attribute[value[-1]] = real_value[0] else: attribute[value[-1]] = real_value return norm_job
def main(cfg: DictConfig) -> None: print(cfg.pretty()) neptune_logger = CustomNeptuneLogger(params=flatten_dict( OmegaConf.to_container(cfg, resolve=True)), **cfg.logging.neptune_logger) tb_logger = loggers.TensorBoardLogger(**cfg.logging.tb_logger) lr_logger = LearningRateLogger() my_callback = MyCallback(cfg) model = get_model(cfg) if cfg.model.ckpt_path is not None: ckpt_pth = glob.glob(utils.to_absolute_path(cfg.model.ckpt_path)) model = load_pytorch_model(ckpt_pth[0], model) if cfg.trainer.distributed_backend == 'ddp': model = nn.SyncBatchNorm.convert_sync_batchnorm(model) seed_everything(2020) lit_model = PLRegressionImageClassificationSystem(hparams=cfg, model=model) checkpoint_callback_conf = OmegaConf.to_container( cfg.callbacks.model_checkpoint, resolve=True) checkpoint_callback = ModelCheckpoint(**checkpoint_callback_conf) early_stop_callback_conf = OmegaConf.to_container(cfg.callbacks.early_stop, resolve=True) early_stop_callback = EarlyStopping(**early_stop_callback_conf) trainer = Trainer( checkpoint_callback=checkpoint_callback, early_stop_callback=early_stop_callback, logger=[tb_logger, neptune_logger], # logger=[tb_logger], callbacks=[lr_logger, my_callback], **cfg.trainer) trainer.fit(lit_model)
def generate_detail(self): fn = self.filename("exp_set_detail", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) res = "# Experiment Set Detail\n" res += "{}\n\n".format(datestamp_str(self.exp_set.init_time)) res += "* **Runtime:** {}s\n".format(np.round(self.exp_set.run_time, 1)) res += "* **Multiprocessing:** {}\n".format( self.exp_set.multiprocessing) res += "\n\n" if self.exp_set.description: res += "#### Description\n" res += self.exp_set.description + "\n" if self.exp_set.hypothesis: res += "#### Hypothesis\n" res += self.exp_set.hypothesis + "\n" res += "\n\n" res += "## Performance by configuration\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) res += "---\n\n" res += "### Entry {} accuracy: {}\n".format( i + 1, np.round(exp_report["accuracy"], DECIMALS)) res += "* **Kappa:** {}\n".format( np.round(exp_report["kappa"], DECIMALS)) res += "* **Average Experiment Time:** {}s\n".format( np.round(exp_report["time"]["exp"], 2)) res += "* **Dataset type:** {}\n".format( exp_report["dataset_type"]) res += "* **Dataset avg length:** {}\n".format( np.round(np.mean(exp_report["dataset_lengths"]), DECIMALS)) # res += "* **Feature Vector Length:** {}\n".format(exp_report["feature_vector_length"]) res += "* **CV Splits:** {}\n".format(exp_report["cv_splits"]) res += "\n" res += "{}\n".format(np.round(exp_report["accuracies"], DECIMALS)) res += "### Config\n" res += "**Relevant Parameters**\n\n" relevant_params = { key: flat_params[key] for key in relevant_keys if key in flat_params } params_df = pd.DataFrame([relevant_params]) res += tabulate( params_df, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "**All Parameters**\n\n" params_df = pd.DataFrame([flat_params]) res += tabulate(params_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "### Details\n" res += "**Confusion Matrix**\n\n" c_matrix = exp_report["confusion_matrix"] class_names = exp_report["dataset_type"].labels c_matrix_df = pd.DataFrame( c_matrix, columns=["Pred: {}".format(l) for l in class_names], index=["__True: {}__".format(l) for l in class_names]) res += tabulate( c_matrix_df, tablefmt="pipe", headers="keys", showindex=True) + "\n" res += "<!---\nConfusion Matrix in LaTeX\n" res += tabulate( c_matrix_df, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" # Formats the confusion matrix as res += "<!---\nConfusion Matrix Raw\n" res += "c_matrix = np.array({})\n".format(format_array(c_matrix)) res += "class_names = {}\n".format(format_array(class_names)) res += "--->\n" # res += "**Report**\n\n" # report = exp_report["report"] # report_df = pd.DataFrame.from_dict(report) # report_key = list(report.keys())[0] # index = ["__{}__".format(key) for key in report[report_key].keys()] # res += tabulate(report_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=index) + "\n" res += "**Time**\n\n" time_df = pd.DataFrame([exp_report["time"]]) res += tabulate(time_df.round(DECIMALS), tablefmt="pipe", headers="keys", showindex=False) + "\n" with open(fp, 'w+') as file: file.write(res)
def generate_overview(self): fn = self.filename("exp_set_overview", "md") Print.data(fn) fp = "/".join([self.path, fn]) relevant_keys = list(set(self.exp_set.relevant_keys)) Print.data(relevant_keys) exp_summary = np.empty( shape=[len(self.exp_reports), 3 + len(relevant_keys)], dtype="U25") res = "# Experiment Set Overview\n" res += "## Performance by relevant params\n\n" param_performances = { param: self.param_performance(param) for param in self.all_relevant_params() } for param_name, param_vals in param_performances.items(): res += "### {}\n\n".format(param_name) param_vals_list = sorted(list(param_vals.items()), key=lambda x: x[1], reverse=True) res += "\n".join([ "* **{}:** {}".format(e[0], np.round(e[1], DECIMALS)) for e in param_vals_list ]) res += "\n\n" res += "\n\n" res += "## Performance Overview\n\n" for i, exp_report in enumerate(self.exp_reports): flat_params = flatten_dict(exp_report["raw_params"]) relevant_params = np.empty(shape=[len(relevant_keys)], dtype="U25") for j, key in enumerate(relevant_keys): if key in flat_params: relevant_params[j] = flat_params[key] else: relevant_params[j] = "-" acc_string = "{}%".format(np.round(100 * exp_report["accuracy"], 1)) kappa_string = "{}".format(np.round(exp_report["kappa"], 3)) time_string = "{}s".format(np.round(exp_report["time"]["exp"], 2)) exp_summary[i, :3] = [acc_string, kappa_string, time_string] exp_summary[i, 3:] = relevant_params df_perf1 = pd.DataFrame(exp_summary, columns=["Accuracy", "Kappa", "Avg Time"] + relevant_keys, copy=True) df_perf1.sort_values(by=["Accuracy"], axis=0, ascending=False, inplace=True) res += tabulate( df_perf1, tablefmt="pipe", headers="keys", showindex=False) + "\n" res += "<!---\nResults in LaTeX\n" res += tabulate( df_perf1, tablefmt="latex", headers="keys", showindex=False) + "\n" res += "--->\n" with open(fp, 'w+') as file: file.write(res)