def create_fields(self): mapping_url = self.base_url + '/_mapping' headers = {"Content-Type": "application/json"} base_data = json.dumps(read_json(DATA_DIR + 'setting.json')) field_data = json.dumps(read_json(DATA_DIR + 'fields.json')) ret = requests.put(self.base_url, data=base_data, headers=headers) if ret.status_code != 200: raise Exception('setting es error, {}'.format(ret.text)) ret = requests.put(mapping_url, data=field_data, headers=headers) if ret.status_code != 200: raise Exception('create index error, {}'.format(ret.text)) self.logger.info('create index success')
def load_model(self): if not self.args.train_from: model = CopyRNN(self.args, self.vocab2id) else: model_path = self.args.train_from config_path = os.path.join(os.path.dirname(model_path), self.get_basename(model_path) + '.json') old_config = read_json(config_path) old_config['train_from'] = model_path old_config['step'] = int(model_path.rsplit('_', 1)[-1].split('.')[0]) self.args = Munch(old_config) self.vocab2id = load_vocab(self.args.vocab_path, self.args.vocab_size) model = CopyRNN(self.args, self.vocab2id) if torch.cuda.is_available(): checkpoint = torch.load(model_path) else: checkpoint = torch.load(model_path, map_location=torch.device('cpu')) state_dict = OrderedDict() # avoid error when load parallel trained model for k, v in checkpoint.items(): if k.startswith('module.'): k = k[7:] state_dict[k] = v model.load_state_dict(state_dict) return model
def load_config(self): default_config = vars(parse_args(parser=self.parser)) config_path = os.path.splitext(self.model_path)[0] + '.json' model_config = read_json(config_path) # config_dict = model_config config_dict = {**default_config, **model_config} config_dict['batch_size'] = self.batch_size config = Munch(config_dict) return config
def load_config(self, custom_config): # default_config = vars(parse_args(parser=self.parser)) config_path = os.path.splitext(self.model_path)[0] + '.json' model_config = read_json(config_path) if custom_config: config_dict = {**model_config, **custom_config} else: config_dict = model_config config = Munch(config_dict) return config
def __load_data(self, input_data): if isinstance(input_data, str): if input_data.endswith('.json'): data_source = read_json(input_data) elif input_data.endswith('.jsonl'): data_source = read_jsonline(input_data) else: raise ValueError('input file type is not supported, only support .json and .jsonl') elif isinstance(input_data, list): data_source = copy.deepcopy(input_data) else: raise TypeError('input data type error. only accept str (path) and list.') return data_source
def __init__(self, index_filename): self.indexer = hnswlib.Index(space='cosine', dim=self.dim) self.indexer.load_index(index_filename) self.idx2paper_id = read_json(index_filename + '.map')