def _get(self, spec): if isinstance(spec, dict): # assume that if spec is a dictionary, then must be loadable spec = Spec.dict2spec(spec) if spec not in self.data: raise KeyError("Spec not found: {}".format(spec)) return self.data.get(spec)
def get(self, name): res = self._get_raw(name) if isinstance(res, dict) and 'type' in res: res = Spec.dict2spec(res) return res
def iterkeys(self, raw=False): for doc in self.coll.find(no_cursor_timeout=False, projection=['spec']): if raw: yield doc['_id'], doc['spec'] else: yield Spec.dict2spec(doc['spec'])
def __init__(self, *args, **kwargs): super(FileDataStore, self).__init__(*args, **kwargs) if not os.path.exists(self.path): os.makedirs(self.path) conf_file = os.path.join(self.path, 'conf.yaml') if os.path.exists(conf_file): with open(conf_file) as f: conf = yaml.load(f) if 'serializer' not in conf: warnings.warn( "Old conf.yaml format. Please update it to the new format") conf_serializer = Spec.dict2spec(conf) conf_use_class_name = False else: conf_serializer = Spec.dict2spec(conf['serializer']) conf_use_class_name = conf.get('use_class_name', False) if conf_use_class_name != self.use_class_name: raise RuntimeError( 'This store was initialized with use_class_name = {} and now was instanced with {}' .format(conf_use_class_name, self.use_class_name)) if self.serializer is not None and self.serializer != conf_serializer: raise RuntimeError( "This store was initialized with this serializer:\n{}\n\n" + "But was now instanced with this one:\n{}".format( json.dumps(conf['serializer'], indent=2), json.dumps(self.serializer.to_dict(), indent=2))) self.serializer = conf_serializer self.use_class_name = conf_use_class_name else: if self.serializer is None: self.serializer = PickleSerializer() with open(conf_file, 'w') as f: yaml.dump( { 'serializer': self.serializer.to_dict(), 'use_class_name': self.use_class_name }, f)
def refactor(self, refactor_operation, out_data_store, permissive=False): # TODO: rewrite iterkeys, it's horrible! for id, doc in self.iterkeys(raw=True): try: refactored_doc = refactor_operation.bind(doc=doc).execute() spec = Spec.dict2spec(refactored_doc) out_data_store[spec] = self[id] except Exception, e: if permissive: warnings.warn(' '.join(e.args)) else: raise e
def find_similar(self, spec): res = [] spec_dict = spec.to_dict() if isinstance(spec, Spec) else spec for id, other_spec_dict in self.iterkeys(raw=True): similarity = matching_fields(spec_dict, other_spec_dict) if similarity > 0: try: res.append((Spec.dict2spec(other_spec_dict), similarity)) except: # TODO: improve how exceptions are risen res.append((other_spec_dict, similarity)) res.sort(key=lambda x: -x[1]) return res
def test_get_by_id(self): for i, ds in enumerate(self.data_stores): for j, (id, doc) in enumerate(ds.iterkeys(raw=True)): # Not gonna perform this test on these kind of specs, I might even remove them in the future if isinstance(doc['type'], basestring) and '@' in doc['type']: continue spec = Spec.dict2spec(doc) v = ds[spec] assert ds[id] == v assert ds[doc] == v assert ds[ds.get_id(spec)] == v for spec in self.not_indexed_specs: self.assertRaises(KeyError, ds.get_id, spec) ds[spec] = 1 assert ds[ds.get_id(spec)] == 1
def _dict2spec(self, d): d = d.copy() return Spec.dict2spec(d)
def _parse_doc(self, doc): values = doc['values'] if self.use_gridfs: values = self.gridfs.get(values).read() spec = Spec.dict2spec(doc['spec']) return spec, values
def test_to_dict(self): for ds in self.data_stores: assert ds == Spec.dict2spec(ds.to_dict())
def test_empty_load(self): assert SpecWithDefault() == Spec.dict2spec({'type': 'SpecWithDefault'})
def test_serialize(self): s = SpecA(0, verbose=True) assert 'verbose' not in s.to_dict() assert 'verbose' in s.to_dict(include_all=True) assert Spec.dict2spec(s.to_dict()) == s assert Spec.dict2spec(s.to_dict(include_all=True)) == s