def to_matrix(self, entries_order: list = None, add_ones=False, dtype=None, mtype=np.matrix, should_zscore: bool = True): if entries_order is None: entries_order = sorted(self._gnx) sorted_features = map(at(1), sorted(self.items(), key=at(0))) # Consider caching the matrix creation (if it takes long time) sorted_features = [ feature for feature in sorted_features if feature.is_relevant() and feature.is_loaded ] if sorted_features: mx = np.hstack([ feature.to_matrix(entries_order, mtype=mtype, should_zscore=should_zscore) for feature in sorted_features ]) if add_ones: mx = np.hstack([mx, np.ones((mx.shape[0], 1))]) mx.astype(dtype) else: mx = np.matrix([]) return mtype(mx)
def shape(self): sorted_features = map(at(1), sorted(self.items(), key=at(0))) sorted_features = [ feature for feature in sorted_features if feature.is_relevant() and feature.is_loaded ] res = [] for feature in sorted_features: res.append((feature.print_name()), feature.shape[1]) return res
def _test(self, name, model_args, config): model, arguments = model_args["model"], model_args["arguments"] model.eval() loss_test = [] acc_test = [] hidden = model.init_hidden() for gnx_path in self.get_gnx_paths(): adj_r_t, adj, feat_x, topo_x, labels, idx_train, idx_val, idx_test = self.loader.load( data_type="asymmetric", feature_path=gnx_path) output = model(feat_x, topo_x, adj, hidden) loss_test.append( functional.nll_loss(output[idx_test], labels[idx_test])) acc_test.append(accuracy(output[idx_test], labels[idx_test])) # loss_train = functional.nll_loss(output[idx_train], labels[idx_train]) # acc_train = accuracy(output[idx_train], labels[idx_train]) # loss_train.backward() # optimizer.step() # output = model(*arguments) # loss_test = functional.nll_loss(output[idx_test], labels[idx_test]) # acc_test = accuracy(output[idx_test], labels[idx_test]) loss_test = loss_test[-1] acc_test = acc_test[-1] self._logger.info(name + " " + "Test set results: " + "loss= {:.4f} ".format(loss_test.data[0]) + "accuracy= {:.4f}".format(acc_test.data[0])) self._data_logger.info(name, loss_test.data[0], acc_test.data[0], *list(map(at(1), config))) return {"loss": loss_test.data[0], "acc": acc_test.data[0]}
def rel_filename(self): if not self._rel_filename: self._rel_filename = '{}_{}_{}_{}.{}.{}'.format( self['task_id'], *at('system', 'operation', 'name', 'extension')(self), self.get('result_extension', 'xlsx'), ) return self._rel_filename
def handle_instruction_label(lst): events = list(map(at("start", "end", "action", "resource"), lst)) ret = collections.defaultdict(list) for start, end, action, resource in events: start = (datetime.strptime(start, "%Y-%m-%dT00:00:00") - datetime(2020, 1, 1)).days end = (datetime.strptime(end, "%Y-%m-%dT00:00:00") - datetime(2020, 1, 1)).days for i in range(start, end): ret[i].append((resource, action)) # Add "LIMMEDIATE" if no time duration specified for lst in ret.values(): has_time_duration_map = collections.defaultdict(bool) for res, action in lst: has_time_duration_map[res] |= AssignedTypes.parse( action) == AssignedTypes.TimeLength for res, has_time_duration in has_time_duration_map.items(): if not has_time_duration: lst.append((res, IMMEDIATE)) lst.sort() return dict(ret)
def main(product_params, args): train_p = 50 num_samples = 3 config = { "hidden_layers": [70, 35], "dropout": KIPF_BASE["dropout"], "learning_rate": KIPF_BASE["lr"], "weight_decay": KIPF_BASE["weight_decay"], "epochs": args.epochs, "train_p": 0, "feat_type": "neighbors", "dataset": "firms", "seed": 12345678 } products_path = os.path.join(PROJ_DIR, "logs", config["dataset"], time.strftime("%Y_%m_%d_%H_%M_%S")) if not os.path.exists(products_path): os.makedirs(products_path) logger = multi_logger([ PrintLogger("IdansLogger", level=logging.INFO), FileLogger("results_%s" % config["dataset"], path=products_path, level=logging.INFO), FileLogger("results_%s_all" % config["dataset"], path=products_path, level=logging.DEBUG), ], name=None) # data_logger = CSVLogger("results_%s" % config["dataset"], path=products_path) # all_args = set(config).union(map(at(0), product_params)) # data_logger.info("name", "loss", "accuracy", *sorted(all_args)) runner = ModelRunner(DATA_PATH, args.cuda, logger, None) # data_logger) train_p /= 100. config["test_p"] = 1 - train_p config["train_p"] = train_p # for train_p in [5]: # + list(range(5, 100, 10)): for pars in product(*map(at(1), product_params)): current_params = list(zip(map(at(0), product_params), pars)) # cur_seed = 214899513 # random.randint(1, 1000000000) cur_seed = random.randint(1, 1000000000) current_params.append(("seed", cur_seed)) config.update(current_params) if "seed" in config: np.random.seed(config["seed"]) torch.manual_seed(config["seed"]) if args.cuda is not None: torch.cuda.manual_seed(config["seed"]) config_args = sorted(config.items(), key=at(0)) logger.info( "Arguments: (train %1.2f) " + ", ".join("%s: %s" % (name, val) for name, val in current_params), train_p) res = [] for _ in range(num_samples): res.append(runner.run(config_args)) # res = [runner.run(config) for _ in range(num_samples)] pickle.dump({ "params": current_params, "res": res }, open(os.path.join(products_path, "quant_res.pkl"), "ab"))