Beispiel #1
0
 def __init__(self, opt):
     self.metrics = {}
     self.metrics['cnt'] = 0
     self.metrics_list = ['mean_rank', 'loss', 'correct', 'f1', 'ppl']
     if nltkbleu is not None:
         # only compute bleu if we can
         self.metrics_list.append('bleu')
     if rouge is not None:
         # only compute rouge if we can
         self.metrics_list.append('rouge-1')
         self.metrics_list.append('rouge-2')
         self.metrics_list.append('rouge-L')
     for k in self.metrics_list:
         self.metrics[k] = 0.0
         self.metrics[k + '_cnt'] = 0
     self.eval_pr = [1, 5, 10, 100]
     for k in self.eval_pr:
         self.metrics['hits@' + str(k)] = 0
     self.metrics['hits@_cnt'] = 0
     self.flags = {
         'has_text_cands': False,
         'print_prediction_metrics': False
     }
     if opt.get('numthreads', 1) > 1:
         self.metrics = SharedTable(self.metrics)
         self.flags = SharedTable(self.flags)
    def test_torch(self):
        try:
            import torch
        except ImportError:
            # pass by default if no torch available
            return

        st = SharedTable({'a': torch.FloatTensor([1]), 'b': torch.LongTensor(2)})
        assert st['a'][0] == 1.0
        assert len(st) == 2
        assert 'b' in st
        del st['b']
        assert 'b' not in st
        assert len(st) == 1

        if torch.cuda.is_available():
            st = SharedTable(
                {'a': torch.cuda.FloatTensor([1]), 'b': torch.cuda.LongTensor(2)}
            )
            assert st['a'][0] == 1.0
            assert len(st) == 2
            assert 'b' in st
            del st['b']
            assert 'b' not in st
            assert len(st) == 1
Beispiel #3
0
 def __init__(self, opt):
     self.metrics = {}
     self.metrics['cnt'] = 0
     self.metrics_list = ['mean_rank', 'loss', 'correct', 'f1', 'ppl']
     for k in self.metrics_list:
         self.metrics[k] = 0.0
         self.metrics[k + '_cnt'] = 0
     self.eval_pr = [1, 5, 10, 100]
     for k in self.eval_pr:
         self.metrics['hits@' + str(k)] = 0
     self.metrics['hits@_cnt'] = 0
     self.flags = {'has_text_cands': False, 'print_prediction_metrics': False}
     if opt.get('numthreads', 1) > 1:
         self.metrics = SharedTable(self.metrics)
         self.flags = SharedTable(self.flags)
    def test_get_set_del(self):
        st = SharedTable()
        try:
            st['key']
            assert False, 'did not fail on nonexistent key'
        except KeyError:
            pass

        st['key'] = 1
        assert st['key'] == 1

        st['key'] += 1
        assert st['key'] == 2

        try:
            st['key'] = 2.1
            assert False, 'cannot change type of value for set keys'
        except TypeError:
            pass

        del st['key']
        assert 'key' not in st, 'key should have been removed from table'

        st['key'] = 'hello'
        assert st['key'] == 'hello'

        st['key'] += ' world'
        assert st['key'] == 'hello world'

        st['ctr'] = 0
        keyset1 = set(iter(st))
        keyset2 = set(st.keys())
        assert keyset1 == keyset2, 'iterating should return keys'
Beispiel #5
0
 def __init__(self, opt, agents, shared=None):
     super().__init__(opt)
     if shared:
         # Create agents based on shared data.
         self.task, self.agent, self.dict = create_agents_from_shared(
             shared['agents'])
         self.metrics = shared['metrics']
     else:
         if len(agents) != 3:
             raise RuntimeError('There must be exactly three agents.')
         if opt.get('batchsize', 1) > 1:
             raise RuntimeError('This world only works with bs=1. Try '
                                'using multiple threads instead, nt>1.')
         self.task, self.agent, self.dict = agents
         if not hasattr(self.agent, 'next_word_probability'):
             raise RuntimeError('Agent must implement function '
                                '`next_word_probability`.')
         self.metrics = {
             'exs': 0,
             'loss': 0.0,
             'num_tokens': 0,
             'num_unk': 0
         }
         if opt.get('numthreads', 1) > 1:
             self.metrics = SharedTable(self.metrics)
     self.agents = [self.task, self.agent, self.dict]
     self.acts = [None, None]
    def test_get_set_del(self):
        st = SharedTable({'key': 0})
        try:
            st['none']
            self.fail('did not fail on nonexistent key')
        except KeyError:
            pass

        st['key'] = 1
        assert st['key'] == 1

        st['key'] += 1
        assert st['key'] == 2

        try:
            st['key'] = 2.1
            self.fail('cannot change type of value for set keys')
        except TypeError:
            pass

        del st['key']
        assert 'key' not in st, 'key should have been removed from table'

        try:
            st['key'] = True
            self.fail('cannot change removed key')
        except KeyError:
            pass
 def test_iter_keys(self):
     st = SharedTable({'key': 0, 'ctr': 0.0, 'val': False, 'other': 1})
     assert len(st) == 4
     del st['key']
     assert len(st) == 3, 'length should decrease after deleting key'
     keyset1 = set(iter(st))
     keyset2 = set(st.keys())
     assert keyset1 == keyset2, 'iterating should return keys'
     assert len(keyset1) == 3, ''
Beispiel #8
0
 def share(self):
     """Share model parameters."""
     shared = super().share()
     shared['model'] = self.model
     if self.opt.get('numthreads', 1) > 1 and isinstance(self.metrics, dict):
         torch.set_num_threads(1)
         # move metrics and model to shared memory
         self.metrics = SharedTable(self.metrics)
         self.model.share_memory()
     shared['metrics'] = self.metrics
     return shared
Beispiel #9
0
 def __init__(self, opt):
     self.metrics = {}
     self.metrics['cnt'] = 0
     self.metrics['correct'] = 0
     self.metrics['f1'] = 0.0
     self.eval_pr = [1, 5, 10, 50, 100]
     for k in self.eval_pr:
         self.metrics['hits@' + str(k)] = 0
     if opt.get('numthreads', 1) > 1:
         self.metrics = SharedTable(self.metrics)
     self.datatype = opt.get('datatype', 'train')
Beispiel #10
0
 def test_init_from_dict(self):
     d = {
         'a': 0,
         'b': 1,
         'c': 1.0,
         'd': True,
         1: False,
         2: 2.0
     }
     st = SharedTable(d)
     for k, v in d.items():
         assert(st[k] == v)
Beispiel #11
0
 def __init__(self, opt):
     self.metrics = {}
     self.metrics['cnt'] = 0
     self.metrics_list = set()
     optional_metrics_list = []
     metrics_arg = opt.get('metrics', 'default')
     if metrics_arg == 'default':
         optional_metrics_list = DEFAULT_METRICS
     elif metrics_arg == 'all':
         optional_metrics_list = ALL_METRICS
     else:
         optional_metrics_list = set(metrics_arg.split(','))
         optional_metrics_list.add('correct')
     for each_m in optional_metrics_list:
         if each_m.startswith('rouge'):
             if rouge is not None:
                 # only compute rouge if rouge is available
                 self.metrics_list.add('rouge')
         elif each_m == 'bleu' and nltkbleu is None:
             # only compute bleu if bleu is available
             pass
         else:
             self.metrics_list.add(each_m)
     metrics_list = (self.metrics_list if 'rouge' not in self.metrics_list
                     else self.metrics_list | ROUGE_METRICS)
     for k in metrics_list:
         self.metrics[k] = 0.0
         self.metrics[k + '_cnt'] = 0
     self.eval_pr = [1, 5, 10, 100]
     for k in self.eval_pr:
         self.metrics['hits@' + str(k)] = 0
     self.metrics['hits@_cnt'] = 0
     self.flags = {
         'has_text_cands': False,
         'print_prediction_metrics': False
     }
     if opt.get('numthreads', 1) > 1:
         self.metrics = SharedTable(self.metrics)
         self.flags = SharedTable(self.flags)
Beispiel #12
0
 def share(self):
     """Share internal states between parent and child instances."""
     shared = super().share()
     shared['model'] = self.model
     if self.opt.get('numthreads', 1) > 1:
         # we're doing hogwild so share the model too
         if type(self.metrics) == dict:
             # move metrics and model to shared memory
             self.metrics = SharedTable(self.metrics)
             self.model.share_memory()
         shared['states'] = {  # don't share optimizer states
             'optimizer_type': self.opt['optimizer'],
         }
     shared['metrics'] = self.metrics  # do after numthreads check
     return shared
Beispiel #13
0
 def share(self):
     """Share model parameters."""
     shared = super().share()
     shared['model'] = self.model
     if self.opt.get('numthreads', 1) > 1 and isinstance(self.metrics, dict):
         torch.set_num_threads(1)
         # move metrics and model to shared memory
         self.metrics = SharedTable(self.metrics)
         self.model.share_memory()
     shared['metrics'] = self.metrics
     shared['fixed_candidates'] = self.fixed_candidates
     shared['fixed_candidate_vecs'] = self.fixed_candidate_vecs
     shared['vocab_candidates'] = self.vocab_candidates
     shared['vocab_candidate_vecs'] = self.vocab_candidate_vecs
     shared['optimizer'] = self.optimizer
     return shared
 def share(self):
     """Share internal states between parent and child instances."""
     shared = super().share()
     shared['criterion'] = self.criterion
     if self.opt.get('numthreads', 1) > 1:
         # we're doing hogwild so share the model too
         if isinstance(self.metrics, dict):
             # move metrics and model to shared memory
             self.metrics = SharedTable(self.metrics)
             self.model.share_memory()
         shared['states'] = {  # don't share optimizer states
             'optimizer_type': self.opt['optimizer']
         }
     shared['metrics'] = self.metrics  # do after numthreads check
     if self.beam_dot_log is True:
         shared['beam_dot_dir'] = self.beam_dot_dir
     return shared
    def test_concurrent_access(self):
        st = SharedTable({'cnt': 0})

        def inc():
            for _ in range(50):
                with st.get_lock():
                    st['cnt'] += 1
                time.sleep(random.randint(1, 5) / 10000)

        threads = []
        for _ in range(5):  # numthreads
            threads.append(Process(target=inc))
        for t in threads:
            t.start()
        for t in threads:
            t.join()
        assert st['cnt'] == 250
Beispiel #16
0
    def __init__(self, opt):
        self.metrics = {}
        self.metrics['cnt'] = 0
        self.metrics['correct'] = 0
        self.metrics['f1'] = 0.0
        self.custom_metrics = ['mean_rank', 'loss']
        for k in self.custom_metrics:
            self.metrics[k] = 0.0
            self.metrics[k + '_cnt'] = 0
        self.eval_pr = [1, 5, 10, 100]
        for k in self.eval_pr:
            self.metrics['hits@' + str(k)] = 0
        if opt.get('numthreads', 1) > 1:
            self.metrics = SharedTable(self.metrics)

        self.custom_keys = []
        self.datatype = opt.get('datatype', 'train')
Beispiel #17
0
 def share(self):
     """Share internal states between parent and child instances."""
     shared = super().share()
     shared['opt'] = self.opt
     shared['dict'] = self.dict
     shared['NULL_IDX'] = self.NULL_IDX
     shared['END_IDX'] = self.END_IDX
     shared['model'] = self.model
     if self.opt.get('numthreads', 1) > 1:
         if type(self.metrics) == dict:
             # move metrics and model to shared memory
             self.metrics = SharedTable(self.metrics)
             self.model.share_memory()
         shared['states'] = {  # only need to pass optimizer states
             'optimizer': self.optimizer.state_dict(),
         }
     shared['metrics'] = self.metrics
     return shared
Beispiel #18
0
 def share(self):
     """Share internal states between parent and child instances."""
     shared = super().share()
     shared['opt'] = self.opt
     shared['answers'] = self.answers
     shared['dict'] = self.dict
     shared['START_IDX'] = self.START_IDX
     shared['END_IDX'] = self.END_IDX
     shared['NULL_IDX'] = self.NULL_IDX
     if self.opt.get('numthreads', 1) > 1:
         if type(self.metrics) == dict:
             self.metrics = SharedTable(self.metrics)
             self.model.share_memory()
         shared['metrics'] = self.metrics
         shared['model'] = self.model
         shared['states'] = { # only need to pass optimizer states
             'optimizer': self.optimizer.state_dict(),
             'optimizer_type': self.opt['optimizer'],
         }
     return shared
Beispiel #19
0
 def share(self):
     """Share internal states between parent and child instances."""
     shared = super().share()
     shared['opt'] = self.opt
     shared['answers'] = self.answers
     shared['dict'] = self.dict
     shared['START_IDX'] = self.START_IDX
     shared['END_IDX'] = self.END_IDX
     shared['NULL_IDX'] = self.NULL_IDX
     if self.opt.get('numthreads', 1) > 1:
         # we're doing hogwild so share the model too
         if type(self.metrics) == dict:
             # move metrics and model to shared memory
             self.metrics = SharedTable(self.metrics)
             self.model.share_memory()
         shared['model'] = self.model
         shared['metrics'] = self.metrics
         shared['states'] = {  # don't share optimizer states
             'optimizer_type': self.opt['optimizer'],
         }
     return shared
 def test_init_from_dict(self):
     d = {'a': 0, 'b': 1, 'c': 1.0, 'd': 'hello', 1: 'world', 2: 2.0}
     st = SharedTable(d)
     for k, v in d.items():
         assert (st[k] == v)