Esempio n. 1
0
        replacement += f'  {indent}delete {alias}\n'
    replacement += '\n'
    return replacement


with open(os.path.join(root, 'gen', 'itemfields.ts'), 'w') as f:
    print('declare const Zotero: any\n', file=f)
    print("const jurism = Zotero.BetterBibTeX.client() === 'jurism'", file=f)
    print('const zotero = !jurism\n', file=f)
    print('export const valid = {', file=f)
    print('  type: {', file=f)
    for itemType, client in sorted(ValidTypes.items(), key=lambda x: x[0]):
        print(f'    {itemType}: {client},', file=f)
    print('  },', file=f)
    print('  field: {', file=f)
    for itemType, fields in sorted(ValidFields.items(), key=lambda x: x[0]):
        print(f'    {itemType}: {{', file=f)
        for field, client in sorted(fields.items(), key=lambda x: x[0]):
            print(f'      {field}: {client},', file=f)
        print('    },', file=f)
    print('  },', file=f)
    print('}\n', file=f)

    print('function unalias(item) {', file=f)
    print('  delete item.inPublications', file=f)
    unalias = '  let v\n\n'
    for client in ['both', 'zotero', 'jurism']:
        if client != 'both': unalias += f'  if ({client}) {{\n'

        for field, aliases in Alias.items():
            if len(aliases[client]) > 0:
class Experiment(object):
    def __init__(self, conf_path=None, experiments_dir=None, hp=None):

        self.conf_path = conf_path
        self.conf = None
        self.log = None
        self.trainer = None
        self.tee = None
        self.current_run = 0

        self.fast_text_model = None

        if self.conf_path:
            self.set_conf(self.conf_path)
        else:
            self.conf = DefaultMunch(None).fromDict(default_conf)

        if experiments_dir:
            self.conf.experiments_dir = experiments_dir
        assert self.conf.experiments_dir is not None
        self.conf.experiments_dir = Path(self.conf.experiments_dir).resolve()

        if not self.conf.experiments_dir.exists():
            print("Creating %s" % str(self.conf.experiments_dir))
            self.conf.experiments_dir.mkdir(parents=True)

        if not self.conf.exp_id:
            self.conf.exp_id = str(datetime.datetime.now())[:10]

        self.dir = get_new_dir(self.conf.experiments_dir, self.conf.exp_id)

        self.summary = {
            "params": {p: []
                       for p in self.conf.randomizable_params},
            "other": {},
            "metrics": {m: []
                        for m in metrics},
        }

    def set_conf(self, path):
        with open(path, "r") as f:
            conf = yaml.safe_load(f)
        self.conf = DefaultMunch(None).fromDict(conf)

    def randomize(self, conf=None, verbose=0):
        conf = conf or self.conf
        params = conf.randomizable_params

        for p_name, p in params.items():
            if self.conf.trainer_type == "FT_DST" and p_name == "embedding_dim":
                self.summary["params"][p_name].append(300)
                continue
            if p.type == "range":
                values = np.arange(p.min, p.max, p.step)
            elif p.type == "list":
                values = np.array(p.vals)
            elif p.type == "fixed":
                value = np.array(p.value)
            else:
                raise ValueError("Unkonw type {} for {}".format(
                    p.type, p_name))
            if p.type != "fixed":
                if p.distribution == "normal":
                    value = normal_choice(values)
                elif p.distribution == "uniform":
                    value = uniform_choice(values)
                else:
                    # p.distribution == "deterministic"
                    value = values[self.current_run % len(values)]

            setattr(self.trainer.hp, p_name, value.tolist())
            self.summary["params"][p_name].append(value)
            if verbose > 0:
                print("{:20}: {:10}".format(p_name, value))
        if verbose > 0:
            print("{:20}: {:10}".format("trainer.hp.dir", self.trainer.hp.dir))

    def dump_conf(self, path):
        stringified = []
        for attr, val in self.conf.items():
            if isinstance(val, Path):
                self.conf[attr] = str(val)
                stringified.append(attr)
        with open(path, "w") as f:
            yaml.safe_dump(self.conf, f, default_flow_style=False)
        for attr in stringified:
            self.conf[attr] = Path(self.conf[attr])

    def summarize(self):
        metrics = pd.DataFrame(self.summary["metrics"])
        params = pd.DataFrame(self.summary["params"])
        other = "\n".join("{:20}: {}".format(k, v)
                          for k, v in self.summary["other"].items())
        summary = "{}\n\n{}".format(
            other,
            pd.concat([metrics, params], axis=1).to_string())
        with open(self.dir / "summary.txt", "a") as f:
            f.write(summary)
        metrics.to_csv(self.dir / "metrics.csv")
        params.to_csv(self.dir / "params.csv")
        self.dump_conf(self.dir / "conf.yaml")

    def setup(self, log=True):
        hp = HP(base_dir=self.dir)
        if self.conf.trainer_type == "DST":
            for attr, val in self.conf.hyperparameter.items():
                if val is not None:
                    setattr(hp, attr, val)
            self.trainer = DST(hp=hp)
        elif self.conf.trainer_type == "CDST":
            for attr, val in self.conf.hyperparameter.items():
                if val is not None:
                    setattr(hp, attr, val)
            self.trainer = CDST(hp=hp)
        elif self.conf.trainer_type == "FT_DST":
            for attr, val in self.conf.hyperparameter.items():
                if attr != "embedding_dim":
                    if val is not None:
                        setattr(hp, attr, val)
            if not self.fast_text_model:
                print("Setting fast_text_model...", end="")
                self.fast_text_model = FastText.load_fasttext_format(
                    hp.fast_text_model_file)
                print("Ok.")
            self.trainer = FT_DST(fast_text_model=self.fast_text_model, hp=hp)
        else:
            raise ValueError("Unknown Trainer")

        self.log = log
        if log:
            self.tee = Tee(str(self.trainer.hp.dir / "log.txt"))
            sys.stdout = self.tee

    def reset(self, verbose=0):
        self.setup()
        self.randomize(verbose)

    def delete(self):
        shutil.rmtree(self.dir, ignore_errors=True)

    def update_metrics(self, metrics):
        if metrics is None:
            metrics = None, None, None, None
        acc, mic, mac, wei = metrics
        self.summary["metrics"]["micro_f1"].append(mic)
        self.summary["metrics"]["macro_f1"].append(mac)
        self.summary["metrics"]["weighted_f1"].append(wei)
        self.summary["metrics"]["accuracy"].append(acc)

    def get_samples(self, samples, sample_size, is_val=False):
        preds, ys = None, None
        for _ in range(samples):
            x, y = self.trainer.get_input_pair(is_val, sample_size)
            pred = self.trainer.infer(x)
            if preds is None:
                preds, ys = pred, y
            else:
                preds = np.concatenate((preds, pred), axis=0)
                ys = np.concatenate((ys, y), axis=0)
        return expit(preds), ys

    def eval(self, thresholds, samples, sample_size, is_val=False):
        preds, ys = self.get_samples(samples, sample_size, is_val)
        averages = [None, "micro", "macro", "weighted"]

        metrics = {str(av): [] for av in averages}

        for av in averages:
            for threshold in thresholds:
                metrics[str(av)].append(
                    f1_score(ys, preds > threshold, average=av))
        return metrics

    def run(self, n_runs=None, randomize=True, log=True, verbose=0):
        n_runs = n_runs or self.conf.n_runs
        if n_runs is None:
            n_runs = np.iinfo(int).max
        print("\n= = > Run", self.current_run)

        self.setup(log)

        if randomize:
            self.randomize(verbose=verbose)

        while self.current_run < n_runs:
            if self.current_run > 0:
                print("\n= = > Run", self.current_run)
            try:
                metrics = self.trainer.train()
                self.trainer.dump_logits()
                self.update_metrics(metrics)
                self.summarize()
            except EndOfExperiment:
                print("\nStopping experiment. Delete?")
                answer = input("y/n")
                if "y" in answer:
                    self.delete()
                    return
                else:
                    self.summary["other"][
                        "interrupting"] = "Keyboard interrupted"
                    self.summarize()
                break

            self.current_run += 1
            if self.current_run < n_runs:
                self.reset(verbose)
            # End of run
        # End of all runs
        self.summary["other"]["interrupting"] = "Done: all runs performed."
        self.summarize()