def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as stream:
            self.vocab_map = json.load(stream)

        self.session = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        saver = tf.train.import_meta_graph(
            fmanip.join(path, 'tensorflow_graph.meta'))
        saver.restore(self.session, fmanip.join(path, 'model'))

        self.x_input = tf.get_collection('x_input')[0]
        self.y_input = tf.get_collection('y_input')[0]
        self.x_lengths = tf.get_collection('x_lengths')[0]
        self.y_hot = tf.get_collection('y_hot')[0]
        self.input_keep = tf.get_collection('input_keep')[0]
        self.output_keep = tf.get_collection('output_keep')[0]
        self.predictor = tf.get_collection('predictor')[0]
        self.loss = tf.get_collection('loss')[0]
        self.optimizer = tf.get_collection('optimizer')[0]
        self.summary = tf.get_collection('summary')[0]
        self.output = tf.get_collection('output')[0]
        self.output_prob = tf.get_collection('output_prob')[0]
        self.init = tf.get_collection('init')[0]
        self.logger = tf.summary.FileWriter(self._get_log_dir(),
                                            graph=tf.get_default_graph())

        self._assert_all_setup()
Ejemplo n.º 2
0
    def _save_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as f:
            json.dump(self.vocab_map, f)
        # np.save(fmanip.join(path, 'vocab_map_backwards.npy'), self.vocab_map_backwards)
        with open(fmanip.join(path, 'vocab_map_backwards.json'), 'w') as f:
            json.dump(self.vocab_map_backwards.tolist(), f)

        saver = tf.train.Saver()

        saver.save(self.session, fmanip.join(path, 'model'))
Ejemplo n.º 3
0
def save_small(prefix, ann_cls, src, dest, percentage):
    # type: (str, Type[TAnn], str, str, float) -> None
    train, dev, test = load_raw_data(src, prefix, ann_cls)

    fmanip.ensure_folder_exists(dest)
    train_path = fmanip.join(dest, 'train.json')
    dev_path = fmanip.join(dest, 'dev.json')
    test_path = fmanip.join(dest, 'test.json')

    if not os.path.isfile(train_path):
        save_as_json(train_path, train[:int(len(train) * percentage)])
        save_as_json(dev_path, dev[:int(len(dev) * percentage)])
        save_as_json(test_path, test[:int(len(test) * percentage)])
Ejemplo n.º 4
0
    def restore_from_saved(cls: Any,
                           run_num: Optional[int] = None,
                           path: Optional[str] = None) -> Any:
        # Signature really should be
        # (Type[TSelf], str) -> TSelf
        # ...but idk if mypy supports this fully atm
        '''Restores model and parameters from given location
        If run num is passed, tries to find that run's path using the base log dir;
        if no run num is passed, uses the last run's path. If path is passed, formats
        the given string with the run path; else just restores from the run path.
        (E.g. path="{}/epoch10", run_num=4 -> "runs/run4/epoch10")'''
        print(run_num, path)
        '''if run_num is None:
            run_num = cls._get_next_run_num() - 1
        run_dir = cls.base_log_dir.format(run_num)
        if not os.path.exists(run_dir):
            run_dirs = glob.glob(run_dir + "-*")
            if len(run_dirs) < 1:
                print("Error: No run with that number.")
                return
            elif len(run_dirs) > 1:
                print("Multiple runs with that number.")
            run_dir = run_dirs[0]

        path = run_dir if path is None else path.format(run_dir)'''

        assert os.path.isdir(path)
        return cls(restore_from=path,
                   **fmanip.load_json(fmanip.join(path, 'params.json')))
Ejemplo n.º 5
0
def load_attack_data(
        prefix: str = 'wikipedia-attack-data-v6',
        small: bool = False) -> Tuple[AttackData, AttackData, AttackData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'attack', AttackAnnotation)
Ejemplo n.º 6
0
    def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as f:
            self.vocab_map = json.load(f)
        try:
            with open(fmanip.join(path, 'vocab_map_backwards.json'), 'r') as f:
                self.vocab_map_backwards = np.array(json.load(f))
            # np.save(fmanip.join(path, 'vocab_map_backwards.npy'), self.vocab_map_backwards)
        except IOError:
            self.vocab_map_backwards = np.load(
                fmanip.join(path, 'vocab_map_backwards.npy'))
            with open(fmanip.join(path, 'vocab_map_backwards.json'), 'w') as f:
                json.dump(self.vocab_map_backwards.tolist(), f)

        self.session = tf.Session(graph=tf.get_default_graph())
        # saver = tf.train.import_meta_graph(fmanip.join(path, 'tensorflow_graph.meta'))
        saver = tf.train.Saver()
        saver.restore(self.session, fmanip.join(path, 'model'))
Ejemplo n.º 7
0
def load_aggression_data(
    prefix: str = 'wikipedia-aggression-data-v5',
    small: bool = False
) -> Tuple[AggressionData, AggressionData, AggressionData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'aggression', AggressionAnnotation)
Ejemplo n.º 8
0
def load_toxicity_data(
        prefix: str = 'wikipedia-toxicity-data-v2',
        small: bool = False
) -> Tuple[ToxicityData, ToxicityData, ToxicityData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'toxicity', ToxicityAnnotation)
Ejemplo n.º 9
0
def load_raw_data(folder_path: str, prefix: str) -> List[TwitterAnnotation]:
    out_list = []
    path = fmanip.join(folder_path, prefix + ".csv")
    with open(path, 'r', encoding='utf-8', errors='ignore') as stream:
        reader = csv.reader(stream, delimiter=',', quotechar='"')
        next(reader)  # discard header
        for row in reader:
            out_list.append(TwitterAnnotation.from_row(row))
    return out_list
Ejemplo n.º 10
0
def load_raw_data(folder_path, prefix, ann_cls):
    # type: (str, str, Type[TAnn]) -> List[TAnn]
    out_list = []
    with open(fmanip.join(folder_path, prefix + ".csv"), 'r') as stream:
        reader = csv.reader(stream, delimiter=',', quotechar='"')
        next(reader)  # skip header
        for row in reader:
            out_list.append(ann_cls.from_row(row))
    return out_list
Ejemplo n.º 11
0
 def _save_model(self, path: str) -> None:
     # lol, apparently pickling is the recommended way of saving/loading
     # trained classifiers. See
     # http://scikit-learn.org/stable/modules/model_persistence.html
     #
     # The pickled output is relatively fragile, and could break on
     # different operating systems/different version of python/different
     # versions of basically any library we're using.
     joblib.dump(self.classifier, fmanip.join(path, 'classifier.pkl'))
Ejemplo n.º 12
0
 def save(self, path: Optional[str] = None) -> None:
     '''Saves the model and parameters. The path can be a string to be
     formatted with the default path (including a completely different
     path that won't be formatted) or None to use the default path of
     the log dir. (E.g. path="{}/epoch100" -> "runs/run10/epoch100")'''
     path = self.format_log_dir(path)
     fmanip.ensure_folder_exists(path)
     param_path = fmanip.join(path, 'params.json')
     fmanip.write_nice_json(self._get_all_parameters(), param_path)
     self._save_model(path)
    def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as stream:
            self.vocab_map = json.load(stream)

        self.session = tf.Session(graph=tf.get_default_graph())
        saver = tf.train.import_meta_graph(
            fmanip.join(path, 'tensorflow_graph.meta'))
        saver.restore(self.session, fmanip.join(path, 'model'))

        self.x_input = tf.get_collection('x_input')[0]
        self.dropout = tf.get_collection('dropout')[0]
        self.y_input = tf.get_collection('y_input')[0]
        self.predictor = tf.get_collection('predictor')[0]
        self.loss = tf.get_collection('loss')[0]
        self.optimizer = tf.get_collection('optimizer')[0]
        self.summary = tf.get_collection('summary')[0]
        self.output = tf.get_collection('output')[0]
        self.output_prob = tf.get_collection('output_prob')[0]
        self.init = tf.get_collection('init')[0]
        self.logger = tf.summary.FileWriter(self._get_log_dir(),
                                            graph=tf.get_default_graph())
Ejemplo n.º 14
0
    def _save_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as stream:
            json.dump(self.vocab_map, stream)
        saver = tf.train.Saver()

        tf.add_to_collection('x_input', self.x_input)
        tf.add_to_collection('y_input', self.y_input)
        tf.add_to_collection('x_lengths', self.x_lengths)
        tf.add_to_collection('input_keep', self.input_keep)
        tf.add_to_collection('output_keep', self.output_keep)
        tf.add_to_collection('predictor', self.predictor)
        tf.add_to_collection('loss', self.loss)
        tf.add_to_collection('optimizer', self.optimizer)
        tf.add_to_collection('summary', self.summary)
        # tf.add_to_collection('output', self.output)
        tf.add_to_collection('output_prob', self.output_prob)
        tf.add_to_collection('init', self.init)

        saver.save(self.session, fmanip.join(path, 'model'))
        tf.train.export_meta_graph(
            filename=fmanip.join(path, 'tensorflow_graph.meta'))
Ejemplo n.º 15
0
def load_raw_data(folder_path, prefix, ann_cls):
    # type: (str, str, Type[TAnn]) -> Tuple[List[Comment[TAnn]], List[Comment[TAnn]], List[Comment[TAnn]]]
    train_path = fmanip.join(folder_path, 'train.json')
    dev_path = fmanip.join(folder_path, 'dev.json')
    test_path = fmanip.join(folder_path, 'test.json')
    if os.path.isfile(train_path):
        return (load_from_json(train_path,
                               ann_cls), load_from_json(dev_path, ann_cls),
                load_from_json(test_path, ann_cls))

    comments_path = os.path.join(folder_path,
                                 prefix + '_annotated_comments.tsv')
    annotations_path = os.path.join(folder_path, prefix + '_annotations.tsv')

    comments = pd.read_csv(comments_path, sep='\t', index_col=0)
    annotations = pd.read_csv(annotations_path, sep='\t')

    comments['comment'] = comments['comment'].apply(
        lambda x: x.replace("NEWLINE_TOKEN", " ").replace("TAB_TOKEN", " "))
    pre_group = annotations.groupby('rev_id')

    def extract(split: str) -> List[Comment]:
        rows = comments[comments['split'] == split]
        out = []
        for row in rows.itertuples():
            anns = []
            for ann in pre_group.get_group(row.Index).itertuples():
                anns.append(ann_cls.parse_row(ann))
            rev_id = int(row.Index)
            out.append(
                Comment(rev_id, row.comment, bool(row.logged_in), row.ns,
                        row.sample, anns, ann_cls.average(rev_id, anns)))
        return out

    train, dev, test = extract('train'), extract('dev'), extract('test')
    save_as_json(train_path, train)
    save_as_json(dev_path, dev)
    save_as_json(test_path, test)

    return train, dev, test
    def _save_model(self, path: str) -> None:
        '''Saves the model. The path is a path to an existing folder;
        this method may create any arbitrary files/folders within the
        provided path.'''
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as stream:
            json.dump(self.vocab_map, stream)
        saver = tf.train.Saver()

        tf.add_to_collection('x_input', self.x_input)
        tf.add_to_collection('dropout', self.dropout)
        tf.add_to_collection('y_input', self.y_input)
        tf.add_to_collection('y_hot', self.y_hot)
        tf.add_to_collection('predictor', self.predictor)
        tf.add_to_collection('loss', self.loss)
        tf.add_to_collection('optimizer', self.optimizer)
        tf.add_to_collection('summary', self.summary)
        tf.add_to_collection('output', self.output)
        tf.add_to_collection('output_prob', self.output_prob)
        tf.add_to_collection('init', self.init)

        saver.save(self.session, fmanip.join(path, 'model'))
        tf.train.export_meta_graph(
            filename=fmanip.join(path, 'tensorflow_graph.meta'))
Ejemplo n.º 17
0
def restore_model(saver: Any,
                  pathname: str,
                  filename: str = "model.ckpt") -> None:
    session = tf.Session(graph=tf.get_default_graph())
    saver.restore(session, fmanip.join(pathname, filename))
    return session
Ejemplo n.º 18
0
 def _restore_model(self, path: str) -> None:
     self.classifier = joblib.load(fmanip.join(path, 'classifier.pkl'))
Ejemplo n.º 19
0
def save_model(saver: Any,
               session: Any,
               pathname: str,
               filename: str = "model.ckpt") -> None:
    saver.save(session, fmanip.join(pathname, filename))