예제 #1
0
    def to_matrix(self,
                  entries_order: list = None,
                  add_ones=False,
                  dtype=None,
                  mtype=np.matrix,
                  should_zscore: bool = True):
        if entries_order is None:
            entries_order = sorted(self._gnx)

        sorted_features = map(at(1), sorted(self.items(), key=at(0)))
        # Consider caching the matrix creation (if it takes long time)
        sorted_features = [
            feature for feature in sorted_features
            if feature.is_relevant() and feature.is_loaded
        ]

        if sorted_features:
            mx = np.hstack([
                feature.to_matrix(entries_order,
                                  mtype=mtype,
                                  should_zscore=should_zscore)
                for feature in sorted_features
            ])
            if add_ones:
                mx = np.hstack([mx, np.ones((mx.shape[0], 1))])
            mx.astype(dtype)
        else:
            mx = np.matrix([])

        return mtype(mx)
예제 #2
0
 def shape(self):
     sorted_features = map(at(1), sorted(self.items(), key=at(0)))
     sorted_features = [
         feature for feature in sorted_features
         if feature.is_relevant() and feature.is_loaded
     ]
     res = []
     for feature in sorted_features:
         res.append((feature.print_name()), feature.shape[1])
     return res
예제 #3
0
    def _test(self, name, model_args, config):
        model, arguments = model_args["model"], model_args["arguments"]
        model.eval()

        loss_test = []
        acc_test = []
        hidden = model.init_hidden()
        for gnx_path in self.get_gnx_paths():
            adj_r_t, adj, feat_x, topo_x, labels, idx_train, idx_val, idx_test = self.loader.load(
                data_type="asymmetric", feature_path=gnx_path)

            output = model(feat_x, topo_x, adj, hidden)
            loss_test.append(
                functional.nll_loss(output[idx_test], labels[idx_test]))
            acc_test.append(accuracy(output[idx_test], labels[idx_test]))
            # loss_train = functional.nll_loss(output[idx_train], labels[idx_train])
            # acc_train = accuracy(output[idx_train], labels[idx_train])
            # loss_train.backward()
            # optimizer.step()
        # output = model(*arguments)
        # loss_test = functional.nll_loss(output[idx_test], labels[idx_test])
        # acc_test = accuracy(output[idx_test], labels[idx_test])
        loss_test = loss_test[-1]
        acc_test = acc_test[-1]
        self._logger.info(name + " " + "Test set results: " +
                          "loss= {:.4f} ".format(loss_test.data[0]) +
                          "accuracy= {:.4f}".format(acc_test.data[0]))
        self._data_logger.info(name, loss_test.data[0], acc_test.data[0],
                               *list(map(at(1), config)))
        return {"loss": loss_test.data[0], "acc": acc_test.data[0]}
예제 #4
0
 def rel_filename(self):
     if not self._rel_filename:
         self._rel_filename = '{}_{}_{}_{}.{}.{}'.format(
             self['task_id'],
             *at('system', 'operation', 'name', 'extension')(self),
             self.get('result_extension', 'xlsx'),
         )
     return self._rel_filename
def handle_instruction_label(lst):
    events = list(map(at("start", "end", "action", "resource"), lst))
    ret = collections.defaultdict(list)
    for start, end, action, resource in events:
        start = (datetime.strptime(start, "%Y-%m-%dT00:00:00") -
                 datetime(2020, 1, 1)).days
        end = (datetime.strptime(end, "%Y-%m-%dT00:00:00") -
               datetime(2020, 1, 1)).days
        for i in range(start, end):
            ret[i].append((resource, action))
    # Add "LIMMEDIATE" if no time duration specified
    for lst in ret.values():
        has_time_duration_map = collections.defaultdict(bool)
        for res, action in lst:
            has_time_duration_map[res] |= AssignedTypes.parse(
                action) == AssignedTypes.TimeLength
        for res, has_time_duration in has_time_duration_map.items():
            if not has_time_duration:
                lst.append((res, IMMEDIATE))
            lst.sort()

    return dict(ret)
예제 #6
0
def main(product_params, args):
    train_p = 50
    num_samples = 3

    config = {
        "hidden_layers": [70, 35],
        "dropout": KIPF_BASE["dropout"],
        "learning_rate": KIPF_BASE["lr"],
        "weight_decay": KIPF_BASE["weight_decay"],
        "epochs": args.epochs,
        "train_p": 0,
        "feat_type": "neighbors",
        "dataset": "firms",
        "seed": 12345678
    }

    products_path = os.path.join(PROJ_DIR, "logs", config["dataset"],
                                 time.strftime("%Y_%m_%d_%H_%M_%S"))
    if not os.path.exists(products_path):
        os.makedirs(products_path)

    logger = multi_logger([
        PrintLogger("IdansLogger", level=logging.INFO),
        FileLogger("results_%s" % config["dataset"],
                   path=products_path,
                   level=logging.INFO),
        FileLogger("results_%s_all" % config["dataset"],
                   path=products_path,
                   level=logging.DEBUG),
    ],
                          name=None)

    # data_logger = CSVLogger("results_%s" % config["dataset"], path=products_path)
    # all_args = set(config).union(map(at(0), product_params))
    # data_logger.info("name", "loss", "accuracy", *sorted(all_args))

    runner = ModelRunner(DATA_PATH, args.cuda, logger, None)  # data_logger)

    train_p /= 100.
    config["test_p"] = 1 - train_p
    config["train_p"] = train_p

    # for train_p in [5]:  # + list(range(5, 100, 10)):
    for pars in product(*map(at(1), product_params)):
        current_params = list(zip(map(at(0), product_params), pars))
        # cur_seed = 214899513 # random.randint(1, 1000000000)
        cur_seed = random.randint(1, 1000000000)
        current_params.append(("seed", cur_seed))
        config.update(current_params)

        if "seed" in config:
            np.random.seed(config["seed"])
            torch.manual_seed(config["seed"])
            if args.cuda is not None:
                torch.cuda.manual_seed(config["seed"])

        config_args = sorted(config.items(), key=at(0))
        logger.info(
            "Arguments: (train %1.2f) " +
            ", ".join("%s: %s" % (name, val) for name, val in current_params),
            train_p)
        res = []
        for _ in range(num_samples):
            res.append(runner.run(config_args))

        # res = [runner.run(config) for _ in range(num_samples)]
        pickle.dump({
            "params": current_params,
            "res": res
        }, open(os.path.join(products_path, "quant_res.pkl"), "ab"))