コード例 #1
0
    def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as stream:
            self.vocab_map = json.load(stream)

        self.session = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True))
        saver = tf.train.import_meta_graph(
            fmanip.join(path, 'tensorflow_graph.meta'))
        saver.restore(self.session, fmanip.join(path, 'model'))

        self.x_input = tf.get_collection('x_input')[0]
        self.y_input = tf.get_collection('y_input')[0]
        self.x_lengths = tf.get_collection('x_lengths')[0]
        self.y_hot = tf.get_collection('y_hot')[0]
        self.input_keep = tf.get_collection('input_keep')[0]
        self.output_keep = tf.get_collection('output_keep')[0]
        self.predictor = tf.get_collection('predictor')[0]
        self.loss = tf.get_collection('loss')[0]
        self.optimizer = tf.get_collection('optimizer')[0]
        self.summary = tf.get_collection('summary')[0]
        self.output = tf.get_collection('output')[0]
        self.output_prob = tf.get_collection('output_prob')[0]
        self.init = tf.get_collection('init')[0]
        self.logger = tf.summary.FileWriter(self._get_log_dir(),
                                            graph=tf.get_default_graph())

        self._assert_all_setup()
コード例 #2
0
    def _save_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as f:
            json.dump(self.vocab_map, f)
        # np.save(fmanip.join(path, 'vocab_map_backwards.npy'), self.vocab_map_backwards)
        with open(fmanip.join(path, 'vocab_map_backwards.json'), 'w') as f:
            json.dump(self.vocab_map_backwards.tolist(), f)

        saver = tf.train.Saver()

        saver.save(self.session, fmanip.join(path, 'model'))
コード例 #3
0
def save_small(prefix, ann_cls, src, dest, percentage):
    # type: (str, Type[TAnn], str, str, float) -> None
    train, dev, test = load_raw_data(src, prefix, ann_cls)

    fmanip.ensure_folder_exists(dest)
    train_path = fmanip.join(dest, 'train.json')
    dev_path = fmanip.join(dest, 'dev.json')
    test_path = fmanip.join(dest, 'test.json')

    if not os.path.isfile(train_path):
        save_as_json(train_path, train[:int(len(train) * percentage)])
        save_as_json(dev_path, dev[:int(len(dev) * percentage)])
        save_as_json(test_path, test[:int(len(test) * percentage)])
コード例 #4
0
ファイル: model.py プロジェクト: Michael0x2a/nlp-capstone
    def restore_from_saved(cls: Any,
                           run_num: Optional[int] = None,
                           path: Optional[str] = None) -> Any:
        # Signature really should be
        # (Type[TSelf], str) -> TSelf
        # ...but idk if mypy supports this fully atm
        '''Restores model and parameters from given location
        If run num is passed, tries to find that run's path using the base log dir;
        if no run num is passed, uses the last run's path. If path is passed, formats
        the given string with the run path; else just restores from the run path.
        (E.g. path="{}/epoch10", run_num=4 -> "runs/run4/epoch10")'''
        print(run_num, path)
        '''if run_num is None:
            run_num = cls._get_next_run_num() - 1
        run_dir = cls.base_log_dir.format(run_num)
        if not os.path.exists(run_dir):
            run_dirs = glob.glob(run_dir + "-*")
            if len(run_dirs) < 1:
                print("Error: No run with that number.")
                return
            elif len(run_dirs) > 1:
                print("Multiple runs with that number.")
            run_dir = run_dirs[0]

        path = run_dir if path is None else path.format(run_dir)'''

        assert os.path.isdir(path)
        return cls(restore_from=path,
                   **fmanip.load_json(fmanip.join(path, 'params.json')))
コード例 #5
0
def load_attack_data(
        prefix: str = 'wikipedia-attack-data-v6',
        small: bool = False) -> Tuple[AttackData, AttackData, AttackData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'attack', AttackAnnotation)
コード例 #6
0
    def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as f:
            self.vocab_map = json.load(f)
        try:
            with open(fmanip.join(path, 'vocab_map_backwards.json'), 'r') as f:
                self.vocab_map_backwards = np.array(json.load(f))
            # np.save(fmanip.join(path, 'vocab_map_backwards.npy'), self.vocab_map_backwards)
        except IOError:
            self.vocab_map_backwards = np.load(
                fmanip.join(path, 'vocab_map_backwards.npy'))
            with open(fmanip.join(path, 'vocab_map_backwards.json'), 'w') as f:
                json.dump(self.vocab_map_backwards.tolist(), f)

        self.session = tf.Session(graph=tf.get_default_graph())
        # saver = tf.train.import_meta_graph(fmanip.join(path, 'tensorflow_graph.meta'))
        saver = tf.train.Saver()
        saver.restore(self.session, fmanip.join(path, 'model'))
コード例 #7
0
def load_aggression_data(
    prefix: str = 'wikipedia-aggression-data-v5',
    small: bool = False
) -> Tuple[AggressionData, AggressionData, AggressionData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'aggression', AggressionAnnotation)
コード例 #8
0
def load_toxicity_data(
        prefix: str = 'wikipedia-toxicity-data-v2',
        small: bool = False
) -> Tuple[ToxicityData, ToxicityData, ToxicityData]:
    if small:
        prefix += '-small'
    path = fmanip.join('data', prefix)
    return load_raw_data(path, 'toxicity', ToxicityAnnotation)
コード例 #9
0
ファイル: parsing.py プロジェクト: Michael0x2a/nlp-capstone
def load_raw_data(folder_path: str, prefix: str) -> List[TwitterAnnotation]:
    out_list = []
    path = fmanip.join(folder_path, prefix + ".csv")
    with open(path, 'r', encoding='utf-8', errors='ignore') as stream:
        reader = csv.reader(stream, delimiter=',', quotechar='"')
        next(reader)  # discard header
        for row in reader:
            out_list.append(TwitterAnnotation.from_row(row))
    return out_list
コード例 #10
0
def load_raw_data(folder_path, prefix, ann_cls):
    # type: (str, str, Type[TAnn]) -> List[TAnn]
    out_list = []
    with open(fmanip.join(folder_path, prefix + ".csv"), 'r') as stream:
        reader = csv.reader(stream, delimiter=',', quotechar='"')
        next(reader)  # skip header
        for row in reader:
            out_list.append(ann_cls.from_row(row))
    return out_list
コード例 #11
0
 def _save_model(self, path: str) -> None:
     # lol, apparently pickling is the recommended way of saving/loading
     # trained classifiers. See
     # http://scikit-learn.org/stable/modules/model_persistence.html
     #
     # The pickled output is relatively fragile, and could break on
     # different operating systems/different version of python/different
     # versions of basically any library we're using.
     joblib.dump(self.classifier, fmanip.join(path, 'classifier.pkl'))
コード例 #12
0
ファイル: model.py プロジェクト: Michael0x2a/nlp-capstone
 def save(self, path: Optional[str] = None) -> None:
     '''Saves the model and parameters. The path can be a string to be
     formatted with the default path (including a completely different
     path that won't be formatted) or None to use the default path of
     the log dir. (E.g. path="{}/epoch100" -> "runs/run10/epoch100")'''
     path = self.format_log_dir(path)
     fmanip.ensure_folder_exists(path)
     param_path = fmanip.join(path, 'params.json')
     fmanip.write_nice_json(self._get_all_parameters(), param_path)
     self._save_model(path)
コード例 #13
0
    def _restore_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'r') as stream:
            self.vocab_map = json.load(stream)

        self.session = tf.Session(graph=tf.get_default_graph())
        saver = tf.train.import_meta_graph(
            fmanip.join(path, 'tensorflow_graph.meta'))
        saver.restore(self.session, fmanip.join(path, 'model'))

        self.x_input = tf.get_collection('x_input')[0]
        self.dropout = tf.get_collection('dropout')[0]
        self.y_input = tf.get_collection('y_input')[0]
        self.predictor = tf.get_collection('predictor')[0]
        self.loss = tf.get_collection('loss')[0]
        self.optimizer = tf.get_collection('optimizer')[0]
        self.summary = tf.get_collection('summary')[0]
        self.output = tf.get_collection('output')[0]
        self.output_prob = tf.get_collection('output_prob')[0]
        self.init = tf.get_collection('init')[0]
        self.logger = tf.summary.FileWriter(self._get_log_dir(),
                                            graph=tf.get_default_graph())
コード例 #14
0
    def _save_model(self, path: str) -> None:
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as stream:
            json.dump(self.vocab_map, stream)
        saver = tf.train.Saver()

        tf.add_to_collection('x_input', self.x_input)
        tf.add_to_collection('y_input', self.y_input)
        tf.add_to_collection('x_lengths', self.x_lengths)
        tf.add_to_collection('input_keep', self.input_keep)
        tf.add_to_collection('output_keep', self.output_keep)
        tf.add_to_collection('predictor', self.predictor)
        tf.add_to_collection('loss', self.loss)
        tf.add_to_collection('optimizer', self.optimizer)
        tf.add_to_collection('summary', self.summary)
        # tf.add_to_collection('output', self.output)
        tf.add_to_collection('output_prob', self.output_prob)
        tf.add_to_collection('init', self.init)

        saver.save(self.session, fmanip.join(path, 'model'))
        tf.train.export_meta_graph(
            filename=fmanip.join(path, 'tensorflow_graph.meta'))
コード例 #15
0
def load_raw_data(folder_path, prefix, ann_cls):
    # type: (str, str, Type[TAnn]) -> Tuple[List[Comment[TAnn]], List[Comment[TAnn]], List[Comment[TAnn]]]
    train_path = fmanip.join(folder_path, 'train.json')
    dev_path = fmanip.join(folder_path, 'dev.json')
    test_path = fmanip.join(folder_path, 'test.json')
    if os.path.isfile(train_path):
        return (load_from_json(train_path,
                               ann_cls), load_from_json(dev_path, ann_cls),
                load_from_json(test_path, ann_cls))

    comments_path = os.path.join(folder_path,
                                 prefix + '_annotated_comments.tsv')
    annotations_path = os.path.join(folder_path, prefix + '_annotations.tsv')

    comments = pd.read_csv(comments_path, sep='\t', index_col=0)
    annotations = pd.read_csv(annotations_path, sep='\t')

    comments['comment'] = comments['comment'].apply(
        lambda x: x.replace("NEWLINE_TOKEN", " ").replace("TAB_TOKEN", " "))
    pre_group = annotations.groupby('rev_id')

    def extract(split: str) -> List[Comment]:
        rows = comments[comments['split'] == split]
        out = []
        for row in rows.itertuples():
            anns = []
            for ann in pre_group.get_group(row.Index).itertuples():
                anns.append(ann_cls.parse_row(ann))
            rev_id = int(row.Index)
            out.append(
                Comment(rev_id, row.comment, bool(row.logged_in), row.ns,
                        row.sample, anns, ann_cls.average(rev_id, anns)))
        return out

    train, dev, test = extract('train'), extract('dev'), extract('test')
    save_as_json(train_path, train)
    save_as_json(dev_path, dev)
    save_as_json(test_path, test)

    return train, dev, test
コード例 #16
0
    def _save_model(self, path: str) -> None:
        '''Saves the model. The path is a path to an existing folder;
        this method may create any arbitrary files/folders within the
        provided path.'''
        with open(fmanip.join(path, 'vocab_map.json'), 'w') as stream:
            json.dump(self.vocab_map, stream)
        saver = tf.train.Saver()

        tf.add_to_collection('x_input', self.x_input)
        tf.add_to_collection('dropout', self.dropout)
        tf.add_to_collection('y_input', self.y_input)
        tf.add_to_collection('y_hot', self.y_hot)
        tf.add_to_collection('predictor', self.predictor)
        tf.add_to_collection('loss', self.loss)
        tf.add_to_collection('optimizer', self.optimizer)
        tf.add_to_collection('summary', self.summary)
        tf.add_to_collection('output', self.output)
        tf.add_to_collection('output_prob', self.output_prob)
        tf.add_to_collection('init', self.init)

        saver.save(self.session, fmanip.join(path, 'model'))
        tf.train.export_meta_graph(
            filename=fmanip.join(path, 'tensorflow_graph.meta'))
コード例 #17
0
def restore_model(saver: Any,
                  pathname: str,
                  filename: str = "model.ckpt") -> None:
    session = tf.Session(graph=tf.get_default_graph())
    saver.restore(session, fmanip.join(pathname, filename))
    return session
コード例 #18
0
 def _restore_model(self, path: str) -> None:
     self.classifier = joblib.load(fmanip.join(path, 'classifier.pkl'))
コード例 #19
0
def save_model(saver: Any,
               session: Any,
               pathname: str,
               filename: str = "model.ckpt") -> None:
    saver.save(session, fmanip.join(pathname, filename))