Beispiel #1
0
    def observe(self, observation, increment_turn: bool = True):
        """
        Need to protect the observe also with a semaphore for composed models where an
        act() may be called within an observe()
        """
        logging.info(
            f'{self.__class__.__name__}: In observe() before semaphore, self.turn_idx is {self.turn_idx} and observation is {observation}'
        )
        new_ob = copy.deepcopy(observation)
        if self.semaphore:
            with self.semaphore:
                self.model_agent.observe(new_ob)
        else:
            self.model_agent.observe(new_ob)
        if 'text' not in new_ob:
            logging.warning(
                f'{self.__class__.__name__}: In observe() AFTER semaphore, self.turn_idx: {self.turn_idx}, and observation is missing a "text" field: {new_ob}'
            )
        else:
            logging.info(
                f'{self.__class__.__name__}: In observe() AFTER semaphore, self.turn_idx: {self.turn_idx}, observation["text"]: {new_ob["text"]}'
            )

        if increment_turn:
            self.turn_idx += 1
Beispiel #2
0
 def __init__(self, opt, shared=None):
     mutators = '+'.join(
         ['do_generate_search_query_mutator', 'skip_retrieval_mutator'])
     if opt.get('mutators'):
         mutators = '+'.join([mutators, opt['mutators']])
     logging.warning(f'overriding mutators to {mutators}')
     opt['mutators'] = mutators
     super().__init__(opt, shared)
     self.id = 'NQSearchDecisionTeacher'
Beispiel #3
0
    def __init__(self, opt: Opt):
        """
        Initialize IVFPQ FAISS Indexer.

        The IVFPQ Indexer is a great way to reduce memory footprint of dense embeddings.
        """
        super().__init__(opt)
        self.dim = opt['retriever_embedding_size']
        self.use_gpu_train = (not opt['no_cuda'] and torch.cuda.is_available()
                              and opt['compressed_indexer_gpu_train'])
        self.hnsw_ef_search = opt['hnsw_ef_search']

        self.index_factory = opt['compressed_indexer_factory']
        if self.index_factory:
            logging.warning(
                f'Creating Index from Index Factory: {self.index_factory}')
            self.is_ivf_index = 'IVF' in self.index_factory
            self.index = self.faiss.index_factory(
                self.dim, self.index_factory, self.faiss.METRIC_INNER_PRODUCT)
        else:
            self.is_ivf_index = True
            quantizer = self.faiss.IndexHNSWFlat(
                self.dim, opt['hnsw_indexer_store_n'],
                self.faiss.METRIC_INNER_PRODUCT)
            quantizer.hnsw.efConstruction = opt['hnsw_ef_construction']
            quantizer.hnsw.efSearch = opt['hnsw_ef_search']
            ivf_index = self.faiss.IndexIVFPQ(quantizer, self.dim, 4096, 128,
                                              8,
                                              self.faiss.METRIC_INNER_PRODUCT)
            ivf_index.nprobe = opt['compressed_indexer_nprobe']
            self.index = ivf_index

        if self.is_ivf_index:
            self.index_ivf = self.faiss.extract_index_ivf(self.index)
            self.index_ivf.metric_type = self.faiss.METRIC_INNER_PRODUCT
            self.nlist = self.index_ivf.nlist
            self.index_ivf.verbose = True
            self.downcast_quantizer = self.faiss.downcast_index(
                self.index_ivf.quantizer)
            self.downcast_quantizer.verbose = True
            self.downcast_quantizer.metric_type = self.faiss.METRIC_INNER_PRODUCT
            if hasattr(self.downcast_quantizer, 'hnsw'):
                self.downcast_quantizer.hnsw.efSearch = opt['hnsw_ef_search']
                self.downcast_quantizer.hnsw.efConstruction = opt[
                    'hnsw_ef_construction']
                self.downcast_quantizer.hnsw.metric_type = (
                    self.faiss.METRIC_INNER_PRODUCT)

            self.setup_gpu_train()
            self.index.nprobe = opt['compressed_indexer_nprobe']

        self.nprobe = opt['compressed_indexer_nprobe']
        self.span = 5  # arbitrarily chosen, from prior evidence
        self.random = random.Random(42)
Beispiel #4
0
 def setup_gpu_train(self):
     """
     Setup training on the gpu.
     """
     if self.use_gpu_train:
         logging.warning('Will train index on GPU')
         try:
             clustering_index = self.faiss.index_cpu_to_all_gpus(
                 self.faiss.IndexFlatIP(self.index_ivf.d))
             self.index.clustering_index = clustering_index
         except NameError:
             logging.warning(
                 'GPU training not supported; switching to CPU.')
Beispiel #5
0
def get_dialogue_task_mutators(opt: Opt) -> str:
    """
    Set the mutators appropriately for the dialogue tasks.
    """
    mutators = '+'.join([
        'flatten',
        'skip_retrieval_mutator',
        'bst_tasks_maybe_generate_search_query_mutator',
    ])
    if opt.get('mutators'):
        mutators = '+'.join([mutators, opt['mutators']])
    logging.warning(f'overriding mutators to {mutators}')
    return mutators
Beispiel #6
0
 def __init__(self, opt, shared=None):
     mutators = '+'.join([
         'flatten',
         'wow_maybe_generate_search_query_mutator',
         'skip_retrieval_mutator',
     ])
     if opt.get('mutators'):
         mutators = '+'.join([mutators, opt['mutators']])
     logging.warning(f'overriding mutators to {mutators}')
     opt['mutators'] = mutators
     opt['add_missing_turns'] = 'all'
     super().__init__(opt, shared)
     self.id = 'WowSearchDecisionTeacher'
Beispiel #7
0
 def __init__(self, opt, shared=None):
     mutators = '+'.join([
         'flatten',
         'woi_dropout_retrieved_docs',
         'woi_maybe_generate_search_query_mutator',
         'skip_retrieval_mutator',
     ])
     if opt.get('mutators'):
         mutators = '+'.join([mutators, opt['mutators']])
     logging.warning(f'overriding mutators to {mutators}')
     opt['mutators'] = mutators
     super().__init__(opt, shared)
     self.id = 'WoiSearchDecisionTeacher'
Beispiel #8
0
 def __init__(self, opt, shared=None):
     mutators = '+'.join([
         'ms_marco_filter_has_answer',
         'ms_marco_create_fid_docs',
         'ms_marco_find_selected_sentence_for_response',
         'woi_pop_documents_mutator',
         'skip_retrieval_mutator',
     ])
     if opt.get('mutators'):
         mutators = '+'.join([mutators, opt['mutators']])
     logging.warning(f'overriding mutators to {mutators}')
     opt['mutators'] = mutators
     super().__init__(opt, shared)
     self.id = "MsMarcoDialogueTeacher"
Beispiel #9
0
 def __init__(self, opt, shared=None):
     mutators = '+'.join([
         'flatten',
         'woi_pop_documents_mutator',
         'woi_filter_no_passage_used',
         'woi_add_checked_sentence_to_input',
         'skip_retrieval_mutator',
     ] + opt.get('mutators', '').split('+'))
     if opt.get('mutators'):
         mutators = '+'.join([mutators, opt['mutators']])
     logging.warning(f'overriding mutators to {mutators}')
     opt['mutators'] = mutators
     super().__init__(opt, shared)
     self.id = "WoiDialogueTeacher"