def _databaseOperation_runCommand(self, target, **kwargs): self.__raise_if_unsupported('runCommand', target, Database) # Ensure the first key is the command name. ordered_command = SON([(kwargs.pop('command_name'), 1)]) ordered_command.update(kwargs['command']) kwargs['command'] = ordered_command return target.command(**kwargs)
def specific_config_gen(IC,args): IC.base_dir = args['base_dir'] IC.annotate_dir = args['annotate_dir'] IC.groundtruth_dir = args['groundtruth_dir'] IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence']) IC.size = args['size'] IC.prefix = prefix = args.get('image_extension','.jpg') IC.current_frame_path = None csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')] csvs.sort() Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs] cns = [csv.split('.')[0] for csv in csvs] cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)] Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)] csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')] csvs.sort() Gs = [] fields = ['clip_num','Frame'] + xfields + yfields for ind,csv in enumerate(csvs): try: g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv)) except: x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness']) else: g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num']) g = g[fields + ['Confidence']] g.renamecol('Confidence','Correctness') x = Xs[ind].join(g,keycols=fields) Gs.append(x) X = tb.tab_rowstack(Gs) X.sort(order=['clip_num','Frame']) Y = IC.correspondence F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']]) Y = Y[F] X = X.join(Y,keycols=['clip_num','Frame']) params = [] for t in X: print(t) cn = t['clip_num'] fr = t['Frame'] box = get_darpa_box(t) bb = box.pop('box') xc,yc = bb.center center = correct_center((xc,yc),IC.size,(1920,1080)) bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1]) p = SON([('size',IC.size), ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])), ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])), ('clip_num',cn), ('Frame',int(t['Original'])), ('base_dir',IC.base_dir), ('correctness',int(t['Correctness']))]) p.update(box) p['GuessObjectType'] = p['ObjectType'] p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else '' params.append(SON([('image',p)])) return params
def create_user(authdb, user, pwd=None, roles=None, **kwargs): cmd = SON([('createUser', user)]) # X509 doesn't use a password if pwd: cmd['pwd'] = pwd cmd['roles'] = roles or ['root'] cmd.update(**kwargs) return authdb.command(cmd)
def create_user(authdb, user, pwd=None, roles=None, **kwargs): cmd = SON([("createUser", user)]) # X509 doesn't use a password if pwd: cmd["pwd"] = pwd cmd["roles"] = roles or ["root"] cmd.update(**kwargs) return authdb.command(cmd)
def command(self, command, value=1, callback=None, check=True, allowable_errors=[], **kwargs): """Issue a MongoDB command. Send command `command` to the database and return the response. If `command` is an instance of :class:`basestring` then the command {`command`: `value`} will be sent. Otherwise, `command` must be an instance of :class:`dict` and will be sent as is. Any additional keyword arguments will be added to the final command document before it is sent. For example, a command like ``{buildinfo: 1}`` can be sent using: >>> db.command("buildinfo") For a command where the value matters, like ``{collstats: collection_name}`` we can do: >>> db.command("collstats", collection_name) For commands that take additional arguments we can use kwargs. So ``{filemd5: object_id, root: file_root}`` becomes: >>> db.command("filemd5", object_id, root=file_root) :Parameters: - `command`: document representing the command to be issued, or the name of the command (for simple commands only). .. note:: the order of keys in the `command` document is significant (the "verb" must come first), so commands which require multiple keys (e.g. `findandmodify`) should use an instance of :class:`~bson.son.SON` or a string and kwargs instead of a Python `dict`. - `value` (optional): value to use for the command verb when `command` is passed as a string - `**kwargs` (optional): additional keyword arguments will be added to the command document before it is sent .. mongodoc:: commands """ if isinstance(command, basestring): command = SON([(command, value)]) command.update(kwargs) from mongotor.cursor import Cursor cursor = Cursor('$cmd', command, is_command=True) cursor.find(limit=-1, callback=callback)
def __set_fail_point(self, client, command_args): if not client_context.test_commands_enabled: self.skipTest('Test commands must be enabled') cmd_on = SON([('configureFailPoint', 'failCommand')]) cmd_on.update(command_args) client.admin.command(cmd_on) self.addCleanup(client.admin.command, 'configureFailPoint', cmd_on['configureFailPoint'], mode='off')
def near(col, params): if params is None or params.count(':') != 2: raise TypeError("$near requires three arguments. Use like /%s/$near=-73.10:42.18:0.5/ to return all records within a 0.5-mile radius of %s" % (col, col)) params = params.split(":") params[0] = float(params[0]) params[1] = float(params[1]) params[2] = float(params[2]) / 69.0 near_dict = {"$near": [params[0], params[1]]} dist_dict = {"$maxDistance": params[2]} q = SON(near_dict) q.update(dist_dict) return q
def create_user(authdb, user, pwd=None, roles=None, **kwargs): """Create user. Avoids PyMongo's add_user helper, which isn't yet compatible with MongoDB 4. """ cmd = SON([('createUser', user)]) # X509 doesn't use a password if pwd: cmd['pwd'] = pwd cmd['roles'] = roles or ['root'] cmd.update(**kwargs) return authdb.command(cmd)
def command(self, command, value=1, read_preference=None, callback=None, check=True, allowable_errors=[], connection=None, **kwargs): """Issue a MongoDB command. Send command `command` to the database and return the response. If `command` is an instance of :class:`basestring` then the command {`command`: `value`} will be sent. Otherwise, `command` must be an instance of :class:`dict` and will be sent as is. Any additional keyword arguments will be added to the final command document before it is sent. For example, a command like ``{buildinfo: 1}`` can be sent using: >>> db.command("buildinfo") For a command where the value matters, like ``{collstats: collection_name}`` we can do: >>> db.command("collstats", collection_name) For commands that take additional arguments we can use kwargs. So ``{filemd5: object_id, root: file_root}`` becomes: >>> db.command("filemd5", object_id, root=file_root) :Parameters: - `command`: document representing the command to be issued, or the name of the command (for simple commands only). .. note:: the order of keys in the `command` document is significant (the "verb" must come first), so commands which require multiple keys (e.g. `findandmodify`) should use an instance of :class:`~bson.son.SON` or a string and kwargs instead of a Python `dict`. - `value` (optional): value to use for the command verb when `command` is passed as a string - `**kwargs` (optional): additional keyword arguments will be added to the command document before it is sent """ if isinstance(command, basestring): command = SON([(command, value)]) command.update(kwargs) if read_preference is None: read_preference = self._read_preference Client(self, '$cmd').find_one(command, is_command=True, connection=connection, read_preference=read_preference, callback=callback)
class FailPoint: def __init__(self, client, command_args): self.client = client self.cmd_on = SON([("configureFailPoint", "failCommand")]) self.cmd_on.update(command_args) async def __aenter__(self): await self.client.admin.command(self.cmd_on) async def __aexit__(self, exc_type, exc, tb): await self.client.admin.command("configureFailPoint", self.cmd_on["configureFailPoint"], mode="off")
class FailPoint: def __init__(self, client, command_args): self.client = client self.cmd_on = SON([('configureFailPoint', 'failCommand')]) self.cmd_on.update(command_args) async def __aenter__(self): await self.client.admin.command(self.cmd_on) async def __aexit__(self, exc_type, exc, tb): await self.client.admin.command('configureFailPoint', self.cmd_on['configureFailPoint'], mode='off')
def near(col, params): if params is None or params.count(':') != 2: raise TypeError( "$near requires three arguments. Use like /%s/$near=-73.10:42.18:0.5/ to return all records within a 0.5-mile radius of %s" % (col, col)) params = params.split(":") params[0] = float(params[0]) params[1] = float(params[1]) params[2] = float(params[2]) / 69.0 near_dict = {"$near": [params[0], params[1]]} dist_dict = {"$maxDistance": params[2]} q = SON(near_dict) q.update(dist_dict) return q
def generate_splits(task_config,hash,colname): base_query = SON([('__hash__',hash)]) ntrain = task_config['ntrain'] ntest = task_config['ntest'] ntrain_pos = task_config.get('ntrain_pos') ntest_pos = task_config.get('ntest_pos') N = task_config.get('N',10) query = task_config['query'] base_query.update(reach_in('config',task_config.get('universe',SON([])))) cquery = reach_in('config',query) print('q',cquery) print('u',base_query) return traintest.generate_split2(DB_NAME,colname,cquery,N,ntrain,ntest,ntrain_pos=ntrain_pos,ntest_pos = ntest_pos,universe=base_query,use_negate = True)
class FlatFileKVEngine(object): def __init__(self, file_path, conn_config): """ """ self.file_path = file_path self.__conn_config = conn_config self.__cache = SON() self.modified_count = 0 if os.path.isfile(self.file_path): for doc in _read_pretty(self.file_path): self.__cache[doc["_id"]] = BSON.encode(doc) @classmethod def touch(cls, file_path): if not os.path.isfile(file_path): with open(file_path, "w"): pass @property def document_count(self): return len(self.__cache) def flush(self): _write_pretty(self.file_path, self.__cache) self.modified_count = 0 def read(self): return self.__cache def write(self, documents): if not isinstance(documents, SON): raise TypeError("Expecting 'SON' type, got {!r}" "".format(type(documents).__name__)) self.modified_count += len(documents) self.__cache.update(documents) if self.modified_count > self.__conn_config.cache_modified: self.flush() def delete(self, doc_id): self.modified_count += 1 del self.__cache[doc_id] if self.modified_count > self.__conn_config.cache_modified: self.flush()
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash): feature_config = get_config(feature_config_path) base_query = SON([('__config_hash__',hash)]) image_params = SON([('image',feature_config['image'])]) models_params = feature_config['models'] ntrain = task_config['ntrain'] ntest = task_config['ntest'] ntrain_pos = task_config.get('ntrain_pos') N = task_config.get('N',10) query = task_config['query'] base_query.update(reach_in('config',task_config.get('universe',SON([])))) print('\n') print('BASE',base_query) print('\n') conn = pm.Connection(document_class=SON) db = conn['v1'] fs = gridfs.GridFS(db, collection = 'model_performance') cquery = reach_in('config',query) for m in models_params: base_query_copy = base_query.copy() base_query_copy.update(reach_in('config.model',m)) splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy) splitpickle = cPickle.dumps(splitdata) data = SON([('feature_config_path',feature_config_path), ('model',m), ('task',son_escape(task_config)), ('image__aggregate__',son_escape(feature_config['image']))]) filename = get_filename(data) data.update(results) data['filename'] = filename fs.put(splitpickle,**data) createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})
def put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_coll,task,ext_hash): model_results = SON([]) for stat in STATS: if stat in split_results[0] and split_results[0][stat] != None: model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean() out_record = SON([('model',m['config']['model']), ('model_hash',model_hash), ('model_filename',m['filename']), ('images',son_escape(image_config_gen['images'])), ('image_hash',image_hash), ('task',son_escape(task)), ('__hash__',ext_hash) ]) out_record.update(model_results) perf_coll.insert(out_record)
def put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitres_fs): out_record = SON([('model',m['config']['model']), ('images',son_escape(image_config_gen['images'])), ('task',son_escape(task)), ('split_id',split_id), ]) split_result = SON([]) for stat in STATS: if stat in res and res[stat] != None: split_result[stat] = res[stat] filename = get_filename(out_record) out_record['filename'] = filename out_record['__hash__'] = ext_hash out_record.update(split_result) print('dump out split result...') out_data = cPickle.dumps(SON([('split_result',res)])) splitres_fs.put(out_data,**out_record)
def get_last_error(self, db, **options): command = SON([("getlasterror", 1)]) db = "%s.$cmd" % db.split('.', 1)[0] command.update(options) query = Query(collection=db, query=command) reply = yield self.send_QUERY(query) assert len(reply.documents) == 1 document = reply.documents[0].decode() err = document.get("err", None) code = document.get("code", None) if err is not None: if code == 11000: raise DuplicateKeyError(err, code=code) else: raise OperationFailure(err, code=code) defer.returnValue(document)
def get_last_error(self, db, **options): command = SON([("getlasterror", 1)]) db = "%s.$cmd" % db.split('.', 1)[0] command.update(options) def on_reply(reply): assert len(reply.documents) == 1 document = reply.documents[0].decode() err = document.get("err", None) code = document.get("code", None) if err is not None: if code == 11000: raise DuplicateKeyError(err, code=code) else: raise OperationFailure(err, code=code) return document query = Query(collection=db, query=command) return self.send_QUERY(query).addCallback(on_reply)
def get_op_gen(op, oplist): if op.get("outcertpaths") is None: func = op["func"] params = op.get("params") inroots = func.inroots outroots = func.outroots if func.action_name == "inject": args = op["params"] out_args = SON([(outroot, params) for outroot in outroots]) else: params = op.get("params", SON([])) parents = [] for ir in inroots: try: parent = [op0 for op0 in oplist if ir in op0["func"].outroots][0] except IndexError: raise IndexError, "No parent found for at least one collection in " + repr(op0["func"].outroots) else: parents.append(parent) for parent in parents: get_op_gen(parent, oplist) in_args = [parent["out_args"] for parent in parents] op["incertpaths"] = [ get_cert_path(func.dbname, inroot, get_config_string(in_arg)) for (inroot, in_arg) in zip(inroots, in_args) ] out_args = dict_union(in_args) out_args.update(params) op["out_args"] = out_args op["outcertpaths"] = [ get_cert_path(func.dbname, outroot, get_config_string(out_args)) for outroot in func.outroots ]
def oid_date_range_filter(dt_from=None, dt_upto=None, field_name='_id'): """ constructs a range query usefull to query an ObjectId field by date :Parameters: - dt_from (datetime or tuple): starting date_time if tuple a datetime is constucted from tuple - dt_upto (datetime or tuple): end date_time if tuple a datetime is constucted from tuple - field_name: (str): optional default to '_id' field to query or None if None returns range only else returns full query :Returns: - range query (due to objectId structure $gt includes dt_from) while $lt dt_upto (not included) """ def dt(dt_or_tuple): if isinstance(dt_or_tuple, datetime): return dt_or_tuple elif isinstance(dt_or_tuple, tuple): return datetime(*dt_or_tuple) else: raise TypeError('dt must be a date or tuple') q = SON() if dt_from is not None: q.update(SON([('$gte', ObjectId.from_datetime(dt(dt_from)))])) if dt_upto is not None: q.update(SON([('$lte', ObjectId.from_datetime(dt(dt_upto)))])) return q if field_name is None else SON([(field_name, q)])
async def set_fail_point(self, client, command_args): cmd = SON([('configureFailPoint', 'failCommand')]) cmd.update(command_args) await client.admin.command(cmd)
def mr( coll, # a pymongo collection instance fun_map, # js function used for map fun_reduce=None, # js function used for reduce defaults to one counting values query={}, # a pymongo query dictionary to query coll defaults to {} out={"replace": 'mr_tmp'}, # output dict {'replace'|'merge'|'reduce'|'inline':collection_name|'database':db_name} fun_finalize=None, # js function to run on finalize scope={}, # vars available during map-reduce sort=None, # i.e: sort= { "_id":1 } short dict to sort before map jsMode=False, # True|False (don't convert to Bson between map & reduce if True) verbose=1 # if 1 includes timing info on output if 2,3 more details ): """simplified generic Map Reduce `see MongoDB Map Reduce <http://docs.mongodb.org/manual/reference/command/mapReduce/>`_ :Parameters: - coll (object) a pymongo collection instance - fun_map js function used for map - fun_reduce js function used for reduce defaults to a function that increments value count - query a pymongo query dictionary to query collection, defaults to {} - out a dictionary for output specification {replace|merge|reduce|:collection_name|db:db_name} also can specify {'inline':1} for in memory operation (with some limitations) defaults to {"replace": 'mr_tmp'} - scope vars available during map-reduce-finalize - sort dictionary to sort before map i.e: sort= { "_id":1 } - jsMode True|False (don't convert to Bson between map & reduce if True) should be False if we expect more than 500K distinct results - db' (optional): database_name if no db is specified output collection will be in same db as input coll :Returns: tuple (results collection or results list if out={"inline" :1}, MR response statistics) :Example: see :func:`group_counts` function """ def mr_cmd(): """returns the actual command from output parameter""" return [i for i in ['replace', 'merge', 'reduce', 'inline'] if i in list(out.keys())][0] command = mr_cmd() out_db = out.get('db', None) out = SON([(command, out[command]), ('nonAtomic', out.get('nonAtomic', False))]) if out_db: out.update(SON([('db', out_db)])) # out = SON([(command, out[command]), ('db', out.get('db')), # ('nonAtomic', out.get('nonAtomic', False))]) # nonAtomic not allowed on replace fun_map = Code(fun_map, {}) if fun_reduce is None: fun_reduce = parse_js_default('MapReduce.js', 'GroupCountsReduce') fun_reduce = Code(fun_reduce, {}) if sort: sort = SON(sort) if verbose > 1: frmt = "Map Reduce {}\n\ collection={coll!s:}\n\ query= {query!s:}\n\ sort= {sort!s:}\n\ scope= {scope!s}\n\ out= {out!s:}\n\ jsMode= {jsMode!s:}\n\ map= {fun_map!s:}\n\ reduce= {fun_reduce!s:}\n\ finalize= {fun_finalize!s:}\n" print(frmt.format('Starting...', **locals())) r = coll.map_reduce(fun_map, fun_reduce, out=out, query=query, finalize=fun_finalize, scope=scope, sort=sort, full_response=True, jsMode=jsMode) if verbose > 0: frmt = "Map Reduce {}\n\ ok= {ok:}\n\ millisecs = {timeMillis:,d}\n\ counts= {counts!s:}\n\ out= {!s:}\n" print(frmt.format('End', out, **r)) if command == 'inline': # if 'results' in list(r.keys()): # @note: results is a list if inline else a coll results = r['results'] del r['results'] else: db_out = out.get('db', coll.database.name) # if db not specified it is the collection_db results = coll.database.client[db_out][out[command]] return results, r
def set_fail_point(self, command_args): cmd = SON([("configureFailPoint", "failCommand")]) cmd.update(command_args) self.client.admin.command(cmd)
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier): conn = pm.Connection(document_class=bson.SON) db = conn['v1'] opt_fs = gridfs.GridFS(db,'optimized_performance') image_coll = db['raw_images.files'] image_fs = gridfs.GridFS(db,'raw_images') image_certdict = cPickle.load(open(image_certificate_file)) print('using image certificate', image_certificate_file) image_hash = image_certdict['run_hash'] image_args = image_certdict['out_args'] if convolve_func == v1f.v1like_filter_pyfft: v1_pyfft.setup_pyfft() filterbanks = [] perfs = [] model_configs = [] center_config = initial_model i = 0 improving = True while ((i < rep_limit) or rep_limit is None): i += 1 print('Round', i) next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs] if next_configs: next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs] next_perfs = [x[0] for x in next_results] next_filterbanks = [x[1] for x in next_results] next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max() perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0 if next_perf_ac_max > perf_ac_max: next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax() center_config = next_configs[next_perf_ac_argmax] print('\n\n') print('new best performance is', next_perf_ac_max, 'from model', center_config) print('\n\n') perfs.extend(next_perfs) model_configs.extend(next_configs) filterbanks.extend(next_filterbanks) else: print('Breaking because no further optimization could be done. Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max) break else: print('Breaking because no next configs') break perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax() best_model = model_configs[perfargmax] best_performance = perfs[perfargmax] out_record = SON([('initial_model',initial_model), ('task',son_escape(task)), ('images',son_escape(image_args)), ('images_hash',image_hash), ('modifier_args',son_escape(modifier_args)), ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__) ]) filename = get_filename(out_record) out_record['filename'] = filename out_record.update(SON([('performances',perfs)])) out_record.update(SON([('best_model',best_model)])) out_record.update(SON([('best_performance',best_performance)])) out_record.update(SON([('num_steps',len(model_configs))])) out_record.update(SON([('models',model_configs)])) outdata = cPickle.dumps(filterbanks) opt_fs.put(outdata,**out_record) if convolve_func == v1f.v1like_filter_pyfft: v1_pyfft.cleanup_pyfft() createCertificateDict(outfile,{'image_file':image_certificate_file})
def find_cmd(self, select=None, project=None, sort=None, take=None, skip=None, tailable=False, reduce_by=None): cmd = SON([('find', self._mongo_collection.name)]) cmd.update({'filter': select}) if select else None cmd.update({'projection': project}) if project else None cmd.update({'sort': sort}) if sort else None cmd.update({'skip': skip}) if skip else None cmd.update({'limit': take}) if take else None cmd.update({'tailable': tailable}) if tailable else None cmd.update({'singleBatch': True}) cmd.update({'batchSize': 1000}) docs = self.db_command(cmd)['cursor']['firstBatch'] if hasattr(docs, '__iter__'): return [d if not reduce_by else reduce_by(**d) for d in docs] # for d in docs: # yield d if not reduce_by else reduce_by().objectify(d) else: return docs if not reduce_by else reduce_by(**docs)
def distinct_cmd(self, key, query=None): cmd = SON([('distinct', self._mongo_collection.name)]) cmd.update({'key': key}) cmd.update({'query': key}) if query else None return self.db_command(cmd)['values']
def count_cmd(self, select=None, take=None, skip=None): cmd = SON([('aggregate', self._mongo_collection.name)]) cmd.update({'query': select}) if select else None cmd.update({'limit': take}) if take else None cmd.update({'skip': skip}) if skip else None return self.db_command(cmd)['n']
class ObjectField(Field): """Represents an object field (sub-document) in the mongo database. """ # TODO until V0.3.0: support ordering properties. # TODO until V0.4.0: support non boolean additional_properties. additional_properties = True def __init__(self, properties=None, additional_properties=None, name=None, nullable=None, required=None, get_default=None, **kwargs): """Defines an object of fields in MongoDB. :Parameters: - `properties` (optional): A mapping of a key to a field. - `additional_properties` (optional): If ``True``, raise :class:`TypeError` if this field contains any properties that are not in the object's `properties`. - `**kwargs` (optional): See the documentation about :class:`~mongomodals.field.Field` for the full details. """ super(ObjectField, self).__init__(name=name, nullable=nullable, required=required, get_default=get_default, **kwargs) if additional_properties is not None: self.additional_properties = additional_properties self.properties = SON() if properties is not None: self.properties.update(properties) def __repr__(self): repr_fields = u', '.join( ("%s=%s" if prop.required else "[%s=%s]") % (key, prop) for key, prop in iteritems(self.properties)) if self.additional_properties: repr_fields += ', ...' if repr_fields else '...' return "%s<%s>" % (super(ObjectField, self).__repr__(), repr_fields) def get_field(self, key): """Get the property in the `key` position of this field. """ return self.properties.get(key, Field.ANONYMOUS) def resolve(self, value): """Resolve the BSON `value` by setting the default BSON value and resolving all of the fields that should be in `value`. :Returns: The resolved `value`. """ value = super(ObjectField, self).resolve(value) if value is not None: for key, prop in iteritems(self.properties): name = self.get_field_name(key) if name in value: value[name] = prop.resolve(value[name]) elif prop.required and prop.get_default: value[name] = prop.resolve(prop.get_default()) return value def validate(self, value): """Raises :class:`TypeError` if `value` is not an instance of :class:`dict` or if any validation of its fields fail. """ if super(ObjectField, self).validate(value): return True # Check the type of the BSON value. if not isinstance(value, dict): raise TypeError("value %r must be an instance of dict" % value) extra_names = set(value) # Validate each child prop in the object. for key, prop in iteritems(self.properties): name = self.get_field_name(key) extra_names.difference_update((name, )) if name in value: prop.validate(value[name]) elif prop.required: raise TypeError("required property '%s' is missing for %r" % (name, self)) # If self is limiting the fields that it may contain, # make sure that object does not contain any unexpected fields. if not self.additional_properties and extra_names: raise TypeError("properties %s are not excepted for %r" % (', '.join(repr(n) for n in extra_names), self))
def evaluate(outfile,feature_certificate,cpath,task,ext_hash): conn = pm.Connection(document_class=bson.SON) db = conn[DB_NAME] perf_fs = gridfs.GridFS(db,'performance') perf_coll = db['performance.files'] remove_existing(perf_coll,perf_fs,ext_hash) feature_certdict = cPickle.load(open(feature_certificate)) feature_hash = feature_certdict['feature_hash'] image_hash = feature_certdict['image_hash'] model_hash = feature_certdict['model_hash'] image_config_gen = feature_certdict['args']['images'] model_col = db['models.files'] feature_fs = gridfs.GridFS(db,'features') feature_col = db['features.files'] stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy'] if isinstance(task,list): task_list = task else: task_list = [task] model_configs = get_most_recent_files(model_col,{'__hash__':model_hash}) for m in model_configs: print('Evaluating model',m) for task in task_list: task['universe'] = task.get('universe',SON([])) task['universe']['model'] = m['config']['model'] print('task', task) classifier_kwargs = task.get('classifier_kwargs',{}) split_results = [] splits = generate_splits(task,feature_hash,'features') for (ind,split) in enumerate(splits): print ('split', ind) train_data = split['train_data'] test_data = split['test_data'] train_filenames = [t['filename'] for t in train_data] test_filenames = [t['filename'] for t in test_data] assert set(train_filenames).intersection(test_filenames) == set([]) print('train feature extraction ...') train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data]) print('test feature extraction ...') test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data]) train_labels = split['train_labels'] test_labels = split['test_labels'] print('classifier ...') res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs) print('Split test accuracy', res['test_accuracy']) split_results.append(res) model_results = SON([]) for stat in STATS: if stat in split_results[0] and split_results[0][stat] != None: model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean() out_record = SON([('model',m['config']['model']), ('model_hash',model_hash), ('model_filename',m['filename']), ('images',son_escape(image_config_gen)), ('image_hash',image_hash), ('task',son_escape(task)), ]) filename = get_filename(out_record) out_record['filename'] = filename out_record['config_path'] = cpath out_record['__hash__'] = ext_hash out_record.update(model_results) print('dump out ...') out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)])) perf_fs.put(out_data,**out_record) createCertificateDict(outfile,{'feature_file':feature_certificate})