def merge_subsentences_initializer(to_write_file, to_write_storage, to_read_file, to_read_storage, sentidx2offset_file): """merge_subsentences initializer for multiprocessing. Args: to_write_file: file to write to_write_storage: mmap storage type to_read_file: file to read to_read_storage: mmap storage type sentidx2offset_file: sentence index to offset in mmap data Returns: """ global filt_emb_data_global filt_emb_data_global = np.memmap(to_write_file, dtype=to_write_storage, mode="r+") global full_pred_data_global full_pred_data_global = np.memmap(to_read_file, dtype=to_read_storage, mode="r+") global sentidx2offset_marisa_global sentidx2offset_marisa_global = utils.load_single_item_trie( sentidx2offset_file)
def write_data_labels_initializer( merged_entity_emb_file, merged_storage_type, sental2embid_file, result_alias_offset, train_in_candidates, max_cands, dump_embs, trie_candidate_map_folder, trie_qid2eid_file, ): """ write_data_labels multiprocessing initializer Args: merged_entity_emb_file: flattened embedding input file merged_storage_type: mmap storage type sental2embid_file: sentence, alias -> embedding id mapping result_alias_offset: alias offset of this batch of results train_in_candidates: train in candidates flag max_cands: max candidates dump_embs: dump embedding flag trie_candidate_map_folder: alias trie folder trie_qid2eid_file: qid to eid trie file Returns: """ global filt_emb_data_global filt_emb_data_global = np.memmap(merged_entity_emb_file, dtype=merged_storage_type, mode="r+") global sental2embid_global sental2embid_global = utils.load_single_item_trie(sental2embid_file) global alias_cand_trie_global alias_cand_trie_global = AliasCandRecordTrie( load_dir=trie_candidate_map_folder) global qid2eid_global qid2eid_global = utils.load_single_item_trie(trie_qid2eid_file) global result_alias_offset_global result_alias_offset_global = result_alias_offset global train_in_candidates_global train_in_candidates_global = train_in_candidates global max_cands_global max_cands_global = max_cands global dump_embs_global dump_embs_global = dump_embs
def merge_subsentences_initializer(to_write_file, to_write_storage, to_read_file, to_read_storage, sent_start_map_file): global filt_emb_data_global filt_emb_data_global = np.memmap(to_write_file, dtype=to_write_storage, mode='r+') global full_pred_data_global full_pred_data_global = np.memmap(to_read_file, dtype=to_read_storage, mode='r+') global sent_start_map_marisa_global sent_start_map_marisa_global = utils.load_single_item_trie( sent_start_map_file)
def write_data_labels_initializer(merged_entity_emb_file, merged_storage_type, sent_idx_map_file, train_in_candidates, dump_embs, data_config): global filt_emb_data_global filt_emb_data_global = np.memmap(merged_entity_emb_file, dtype=merged_storage_type, mode="r+") global sent_idx_map_global sent_idx_map_global = utils.load_single_item_trie(sent_idx_map_file) global train_in_candidates_global train_in_candidates_global = train_in_candidates global dump_embs_global dump_embs_global = dump_embs global entity_dump_global entity_dump_global = EntitySymbols( load_dir=os.path.join(data_config.entity_dir, data_config.entity_map_dir), alias_cand_map_file=data_config.alias_cand_map)