def create_fields(self):
        mapping_url = self.base_url + '/_mapping'
        headers = {"Content-Type": "application/json"}
        base_data = json.dumps(read_json(DATA_DIR + 'setting.json'))
        field_data = json.dumps(read_json(DATA_DIR + 'fields.json'))
        ret = requests.put(self.base_url, data=base_data, headers=headers)
        if ret.status_code != 200:
            raise Exception('setting es error, {}'.format(ret.text))

        ret = requests.put(mapping_url, data=field_data, headers=headers)
        if ret.status_code != 200:
            raise Exception('create index error, {}'.format(ret.text))
        self.logger.info('create index success')
Exemple #2
0
    def load_model(self):
        if not self.args.train_from:
            model = CopyRNN(self.args, self.vocab2id)
        else:
            model_path = self.args.train_from
            config_path = os.path.join(os.path.dirname(model_path),
                                       self.get_basename(model_path) + '.json')

            old_config = read_json(config_path)
            old_config['train_from'] = model_path
            old_config['step'] = int(model_path.rsplit('_', 1)[-1].split('.')[0])
            self.args = Munch(old_config)
            self.vocab2id = load_vocab(self.args.vocab_path, self.args.vocab_size)

            model = CopyRNN(self.args, self.vocab2id)

            if torch.cuda.is_available():
                checkpoint = torch.load(model_path)
            else:
                checkpoint = torch.load(model_path, map_location=torch.device('cpu'))
            state_dict = OrderedDict()
            # avoid error when load parallel trained model
            for k, v in checkpoint.items():
                if k.startswith('module.'):
                    k = k[7:]
                state_dict[k] = v
            model.load_state_dict(state_dict)

        return model
Exemple #3
0
 def load_config(self):
     default_config = vars(parse_args(parser=self.parser))
     config_path = os.path.splitext(self.model_path)[0] + '.json'
     model_config = read_json(config_path)
     # config_dict = model_config
     config_dict = {**default_config, **model_config}
     config_dict['batch_size'] = self.batch_size
     config = Munch(config_dict)
     return config
 def load_config(self, custom_config):
     # default_config = vars(parse_args(parser=self.parser))
     config_path = os.path.splitext(self.model_path)[0] + '.json'
     model_config = read_json(config_path)
     if custom_config:
         config_dict = {**model_config, **custom_config}
     else:
         config_dict = model_config
     config = Munch(config_dict)
     return config
Exemple #5
0
 def __load_data(self, input_data):
     if isinstance(input_data, str):
         if input_data.endswith('.json'):
             data_source = read_json(input_data)
         elif input_data.endswith('.jsonl'):
             data_source = read_jsonline(input_data)
         else:
             raise ValueError('input file type is not supported, only support .json and .jsonl')
     elif isinstance(input_data, list):
         data_source = copy.deepcopy(input_data)
     else:
         raise TypeError('input data type error. only accept str (path) and  list.')
     return data_source
 def __init__(self, index_filename):
     self.indexer = hnswlib.Index(space='cosine', dim=self.dim)
     self.indexer.load_index(index_filename)
     self.idx2paper_id = read_json(index_filename + '.map')