def observe(self, observation, increment_turn: bool = True): """ Need to protect the observe also with a semaphore for composed models where an act() may be called within an observe() """ logging.info( f'{self.__class__.__name__}: In observe() before semaphore, self.turn_idx is {self.turn_idx} and observation is {observation}' ) new_ob = copy.deepcopy(observation) if self.semaphore: with self.semaphore: self.model_agent.observe(new_ob) else: self.model_agent.observe(new_ob) if 'text' not in new_ob: logging.warning( f'{self.__class__.__name__}: In observe() AFTER semaphore, self.turn_idx: {self.turn_idx}, and observation is missing a "text" field: {new_ob}' ) else: logging.info( f'{self.__class__.__name__}: In observe() AFTER semaphore, self.turn_idx: {self.turn_idx}, observation["text"]: {new_ob["text"]}' ) if increment_turn: self.turn_idx += 1
def __init__(self, opt, shared=None): mutators = '+'.join( ['do_generate_search_query_mutator', 'skip_retrieval_mutator']) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') opt['mutators'] = mutators super().__init__(opt, shared) self.id = 'NQSearchDecisionTeacher'
def __init__(self, opt: Opt): """ Initialize IVFPQ FAISS Indexer. The IVFPQ Indexer is a great way to reduce memory footprint of dense embeddings. """ super().__init__(opt) self.dim = opt['retriever_embedding_size'] self.use_gpu_train = (not opt['no_cuda'] and torch.cuda.is_available() and opt['compressed_indexer_gpu_train']) self.hnsw_ef_search = opt['hnsw_ef_search'] self.index_factory = opt['compressed_indexer_factory'] if self.index_factory: logging.warning( f'Creating Index from Index Factory: {self.index_factory}') self.is_ivf_index = 'IVF' in self.index_factory self.index = self.faiss.index_factory( self.dim, self.index_factory, self.faiss.METRIC_INNER_PRODUCT) else: self.is_ivf_index = True quantizer = self.faiss.IndexHNSWFlat( self.dim, opt['hnsw_indexer_store_n'], self.faiss.METRIC_INNER_PRODUCT) quantizer.hnsw.efConstruction = opt['hnsw_ef_construction'] quantizer.hnsw.efSearch = opt['hnsw_ef_search'] ivf_index = self.faiss.IndexIVFPQ(quantizer, self.dim, 4096, 128, 8, self.faiss.METRIC_INNER_PRODUCT) ivf_index.nprobe = opt['compressed_indexer_nprobe'] self.index = ivf_index if self.is_ivf_index: self.index_ivf = self.faiss.extract_index_ivf(self.index) self.index_ivf.metric_type = self.faiss.METRIC_INNER_PRODUCT self.nlist = self.index_ivf.nlist self.index_ivf.verbose = True self.downcast_quantizer = self.faiss.downcast_index( self.index_ivf.quantizer) self.downcast_quantizer.verbose = True self.downcast_quantizer.metric_type = self.faiss.METRIC_INNER_PRODUCT if hasattr(self.downcast_quantizer, 'hnsw'): self.downcast_quantizer.hnsw.efSearch = opt['hnsw_ef_search'] self.downcast_quantizer.hnsw.efConstruction = opt[ 'hnsw_ef_construction'] self.downcast_quantizer.hnsw.metric_type = ( self.faiss.METRIC_INNER_PRODUCT) self.setup_gpu_train() self.index.nprobe = opt['compressed_indexer_nprobe'] self.nprobe = opt['compressed_indexer_nprobe'] self.span = 5 # arbitrarily chosen, from prior evidence self.random = random.Random(42)
def setup_gpu_train(self): """ Setup training on the gpu. """ if self.use_gpu_train: logging.warning('Will train index on GPU') try: clustering_index = self.faiss.index_cpu_to_all_gpus( self.faiss.IndexFlatIP(self.index_ivf.d)) self.index.clustering_index = clustering_index except NameError: logging.warning( 'GPU training not supported; switching to CPU.')
def get_dialogue_task_mutators(opt: Opt) -> str: """ Set the mutators appropriately for the dialogue tasks. """ mutators = '+'.join([ 'flatten', 'skip_retrieval_mutator', 'bst_tasks_maybe_generate_search_query_mutator', ]) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') return mutators
def __init__(self, opt, shared=None): mutators = '+'.join([ 'flatten', 'wow_maybe_generate_search_query_mutator', 'skip_retrieval_mutator', ]) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') opt['mutators'] = mutators opt['add_missing_turns'] = 'all' super().__init__(opt, shared) self.id = 'WowSearchDecisionTeacher'
def __init__(self, opt, shared=None): mutators = '+'.join([ 'flatten', 'woi_dropout_retrieved_docs', 'woi_maybe_generate_search_query_mutator', 'skip_retrieval_mutator', ]) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') opt['mutators'] = mutators super().__init__(opt, shared) self.id = 'WoiSearchDecisionTeacher'
def __init__(self, opt, shared=None): mutators = '+'.join([ 'ms_marco_filter_has_answer', 'ms_marco_create_fid_docs', 'ms_marco_find_selected_sentence_for_response', 'woi_pop_documents_mutator', 'skip_retrieval_mutator', ]) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') opt['mutators'] = mutators super().__init__(opt, shared) self.id = "MsMarcoDialogueTeacher"
def __init__(self, opt, shared=None): mutators = '+'.join([ 'flatten', 'woi_pop_documents_mutator', 'woi_filter_no_passage_used', 'woi_add_checked_sentence_to_input', 'skip_retrieval_mutator', ] + opt.get('mutators', '').split('+')) if opt.get('mutators'): mutators = '+'.join([mutators, opt['mutators']]) logging.warning(f'overriding mutators to {mutators}') opt['mutators'] = mutators super().__init__(opt, shared) self.id = "WoiDialogueTeacher"