def _databaseOperation_runCommand(self, target, **kwargs):
     self.__raise_if_unsupported('runCommand', target, Database)
     # Ensure the first key is the command name.
     ordered_command = SON([(kwargs.pop('command_name'), 1)])
     ordered_command.update(kwargs['command'])
     kwargs['command'] = ordered_command
     return target.command(**kwargs)
Exemple #2
0
def specific_config_gen(IC,args):
    IC.base_dir = args['base_dir']
    IC.annotate_dir = args['annotate_dir']
    IC.groundtruth_dir = args['groundtruth_dir']
    IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence'])
    IC.size = args['size']
    IC.prefix = prefix = args.get('image_extension','.jpg')
    IC.current_frame_path = None
    csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')]
    csvs.sort()
    Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs]
    cns = [csv.split('.')[0] for csv in csvs]
    cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)]
    Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)]

    csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')]
    csvs.sort()
    Gs = []
    fields = ['clip_num','Frame'] + xfields + yfields
    for ind,csv in enumerate(csvs):
        try:
            g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv))
        except:
            x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness'])
        else:
            g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num'])
            g = g[fields + ['Confidence']]
            g.renamecol('Confidence','Correctness')
            x = Xs[ind].join(g,keycols=fields)
        Gs.append(x)
    X = tb.tab_rowstack(Gs)
    X.sort(order=['clip_num','Frame'])
    
    Y = IC.correspondence
    F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']])
    Y = Y[F]
    X = X.join(Y,keycols=['clip_num','Frame'])

    params = []
    for t in X:
        print(t)  
        cn = t['clip_num']
        fr = t['Frame']
        box = get_darpa_box(t)
        bb = box.pop('box')
        xc,yc = bb.center
        center = correct_center((xc,yc),IC.size,(1920,1080))
        bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1])
        p = SON([('size',IC.size),
                     ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])),
                     ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])),
                     ('clip_num',cn),
                     ('Frame',int(t['Original'])),
                     ('base_dir',IC.base_dir),
                     ('correctness',int(t['Correctness']))])
        p.update(box)
        p['GuessObjectType'] = p['ObjectType']
        p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else ''
        params.append(SON([('image',p)]))
    return params
Exemple #3
0
def create_user(authdb, user, pwd=None, roles=None, **kwargs):
    cmd = SON([('createUser', user)])
    # X509 doesn't use a password
    if pwd:
        cmd['pwd'] = pwd
    cmd['roles'] = roles or ['root']
    cmd.update(**kwargs)
    return authdb.command(cmd)
Exemple #4
0
def create_user(authdb, user, pwd=None, roles=None, **kwargs):
    cmd = SON([("createUser", user)])
    # X509 doesn't use a password
    if pwd:
        cmd["pwd"] = pwd
    cmd["roles"] = roles or ["root"]
    cmd.update(**kwargs)
    return authdb.command(cmd)
Exemple #5
0
    def command(self,
                command,
                value=1,
                callback=None,
                check=True,
                allowable_errors=[],
                **kwargs):
        """Issue a MongoDB command.

        Send command `command` to the database and return the
        response. If `command` is an instance of :class:`basestring`
        then the command {`command`: `value`} will be sent. Otherwise,
        `command` must be an instance of :class:`dict` and will be
        sent as is.

        Any additional keyword arguments will be added to the final
        command document before it is sent.

        For example, a command like ``{buildinfo: 1}`` can be sent
        using:

        >>> db.command("buildinfo")

        For a command where the value matters, like ``{collstats:
        collection_name}`` we can do:

        >>> db.command("collstats", collection_name)

        For commands that take additional arguments we can use
        kwargs. So ``{filemd5: object_id, root: file_root}`` becomes:

        >>> db.command("filemd5", object_id, root=file_root)

        :Parameters:
          - `command`: document representing the command to be issued,
            or the name of the command (for simple commands only).

            .. note:: the order of keys in the `command` document is
               significant (the "verb" must come first), so commands
               which require multiple keys (e.g. `findandmodify`)
               should use an instance of :class:`~bson.son.SON` or
               a string and kwargs instead of a Python `dict`.

          - `value` (optional): value to use for the command verb when
            `command` is passed as a string
          - `**kwargs` (optional): additional keyword arguments will
            be added to the command document before it is sent

        .. mongodoc:: commands
        """
        if isinstance(command, basestring):
            command = SON([(command, value)])

        command.update(kwargs)

        from mongotor.cursor import Cursor
        cursor = Cursor('$cmd', command, is_command=True)
        cursor.find(limit=-1, callback=callback)
    def __set_fail_point(self, client, command_args):
        if not client_context.test_commands_enabled:
            self.skipTest('Test commands must be enabled')

        cmd_on = SON([('configureFailPoint', 'failCommand')])
        cmd_on.update(command_args)
        client.admin.command(cmd_on)
        self.addCleanup(client.admin.command,
                        'configureFailPoint',
                        cmd_on['configureFailPoint'],
                        mode='off')
def near(col, params):
    if params is None or params.count(':') != 2:
        raise TypeError("$near requires three arguments.  Use like /%s/$near=-73.10:42.18:0.5/ to return all records within a 0.5-mile radius of %s" % (col, col))
    params = params.split(":")
    params[0] = float(params[0])
    params[1] = float(params[1])
    params[2] = float(params[2]) / 69.0
    near_dict = {"$near": [params[0], params[1]]}
    dist_dict = {"$maxDistance": params[2]}
    q = SON(near_dict)
    q.update(dist_dict)
    return q
Exemple #8
0
def create_user(authdb, user, pwd=None, roles=None, **kwargs):
    """Create user.

    Avoids PyMongo's add_user helper, which isn't yet compatible with MongoDB 4.
    """
    cmd = SON([('createUser', user)])
    # X509 doesn't use a password
    if pwd:
        cmd['pwd'] = pwd
    cmd['roles'] = roles or ['root']
    cmd.update(**kwargs)
    return authdb.command(cmd)
Exemple #9
0
    def command(self, command, value=1, read_preference=None,
        callback=None, check=True, allowable_errors=[], connection=None, **kwargs):
        """Issue a MongoDB command.

        Send command `command` to the database and return the
        response. If `command` is an instance of :class:`basestring`
        then the command {`command`: `value`} will be sent. Otherwise,
        `command` must be an instance of :class:`dict` and will be
        sent as is.

        Any additional keyword arguments will be added to the final
        command document before it is sent.

        For example, a command like ``{buildinfo: 1}`` can be sent
        using:

        >>> db.command("buildinfo")

        For a command where the value matters, like ``{collstats:
        collection_name}`` we can do:

        >>> db.command("collstats", collection_name)

        For commands that take additional arguments we can use
        kwargs. So ``{filemd5: object_id, root: file_root}`` becomes:

        >>> db.command("filemd5", object_id, root=file_root)

        :Parameters:
          - `command`: document representing the command to be issued,
            or the name of the command (for simple commands only).

            .. note:: the order of keys in the `command` document is
               significant (the "verb" must come first), so commands
               which require multiple keys (e.g. `findandmodify`)
               should use an instance of :class:`~bson.son.SON` or
               a string and kwargs instead of a Python `dict`.

          - `value` (optional): value to use for the command verb when
            `command` is passed as a string
          - `**kwargs` (optional): additional keyword arguments will
            be added to the command document before it is sent

        """
        if isinstance(command, basestring):
            command = SON([(command, value)])
        command.update(kwargs)

        if read_preference is None:
            read_preference = self._read_preference

        Client(self, '$cmd').find_one(command, is_command=True,
            connection=connection, read_preference=read_preference, callback=callback)
Exemple #10
0
class FailPoint:
    def __init__(self, client, command_args):
        self.client = client
        self.cmd_on = SON([("configureFailPoint", "failCommand")])
        self.cmd_on.update(command_args)

    async def __aenter__(self):
        await self.client.admin.command(self.cmd_on)

    async def __aexit__(self, exc_type, exc, tb):
        await self.client.admin.command("configureFailPoint",
                                        self.cmd_on["configureFailPoint"],
                                        mode="off")
Exemple #11
0
class FailPoint:
    def __init__(self, client, command_args):
        self.client = client
        self.cmd_on = SON([('configureFailPoint', 'failCommand')])
        self.cmd_on.update(command_args)

    async def __aenter__(self):
        await self.client.admin.command(self.cmd_on)

    async def __aexit__(self, exc_type, exc, tb):
        await self.client.admin.command('configureFailPoint',
                                        self.cmd_on['configureFailPoint'],
                                        mode='off')
Exemple #12
0
def near(col, params):
    if params is None or params.count(':') != 2:
        raise TypeError(
            "$near requires three arguments.  Use like /%s/$near=-73.10:42.18:0.5/ to return all records within a 0.5-mile radius of %s"
            % (col, col))
    params = params.split(":")
    params[0] = float(params[0])
    params[1] = float(params[1])
    params[2] = float(params[2]) / 69.0
    near_dict = {"$near": [params[0], params[1]]}
    dist_dict = {"$maxDistance": params[2]}
    q = SON(near_dict)
    q.update(dist_dict)
    return q
Exemple #13
0
def generate_splits(task_config,hash,colname):
    base_query = SON([('__hash__',hash)])
    ntrain = task_config['ntrain']
    ntest = task_config['ntest']
    ntrain_pos = task_config.get('ntrain_pos')
    ntest_pos = task_config.get('ntest_pos')
    N = task_config.get('N',10)
    query = task_config['query']  
    base_query.update(reach_in('config',task_config.get('universe',SON([]))))    
    cquery = reach_in('config',query)
    
    print('q',cquery)
    print('u',base_query)
 
    return traintest.generate_split2(DB_NAME,colname,cquery,N,ntrain,ntest,ntrain_pos=ntrain_pos,ntest_pos = ntest_pos,universe=base_query,use_negate = True)
Exemple #14
0
class FlatFileKVEngine(object):

    def __init__(self, file_path, conn_config):
        """
        """
        self.file_path = file_path

        self.__conn_config = conn_config
        self.__cache = SON()
        self.modified_count = 0

        if os.path.isfile(self.file_path):
            for doc in _read_pretty(self.file_path):
                self.__cache[doc["_id"]] = BSON.encode(doc)

    @classmethod
    def touch(cls, file_path):
        if not os.path.isfile(file_path):
            with open(file_path, "w"):
                pass

    @property
    def document_count(self):
        return len(self.__cache)

    def flush(self):
        _write_pretty(self.file_path, self.__cache)
        self.modified_count = 0

    def read(self):
        return self.__cache

    def write(self, documents):
        if not isinstance(documents, SON):
            raise TypeError("Expecting 'SON' type, got {!r}"
                            "".format(type(documents).__name__))
        self.modified_count += len(documents)
        self.__cache.update(documents)

        if self.modified_count > self.__conn_config.cache_modified:
            self.flush()

    def delete(self, doc_id):
        self.modified_count += 1
        del self.__cache[doc_id]

        if self.modified_count > self.__conn_config.cache_modified:
            self.flush()
Exemple #15
0
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash):

    feature_config = get_config(feature_config_path)
        
    base_query = SON([('__config_hash__',hash)])
    
    image_params = SON([('image',feature_config['image'])])
    models_params = feature_config['models']

    ntrain = task_config['ntrain']
    ntest = task_config['ntest']
    ntrain_pos = task_config.get('ntrain_pos')
    N = task_config.get('N',10)
    query = task_config['query']  
    base_query.update(reach_in('config',task_config.get('universe',SON([]))))
 
    print('\n')
    print('BASE',base_query)
    print('\n')
    
    conn = pm.Connection(document_class=SON)
    db = conn['v1']
    fs = gridfs.GridFS(db, collection = 'model_performance')
    
    cquery = reach_in('config',query)
    for m in models_params:
        base_query_copy = base_query.copy()
        base_query_copy.update(reach_in('config.model',m))
        splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy)
        
        splitpickle = cPickle.dumps(splitdata)
        
        data = SON([('feature_config_path',feature_config_path),
                    ('model',m),
                    ('task',son_escape(task_config)),
                    ('image__aggregate__',son_escape(feature_config['image']))])
        filename = get_filename(data)
        data.update(results)
        data['filename'] = filename
        

        fs.put(splitpickle,**data)
        
    createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})    
Exemple #16
0
def put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_coll,task,ext_hash):
    
    model_results = SON([])
    for stat in STATS:
        if stat in split_results[0] and split_results[0][stat] != None:
            model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           

    out_record = SON([('model',m['config']['model']),
                      ('model_hash',model_hash), 
                      ('model_filename',m['filename']), 
                      ('images',son_escape(image_config_gen['images'])),
                      ('image_hash',image_hash),
                      ('task',son_escape(task)),
                      ('__hash__',ext_hash)
                 ])
                 
    out_record.update(model_results)

    perf_coll.insert(out_record)
Exemple #17
0
def put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitres_fs):
    out_record = SON([('model',m['config']['model']),
                      ('images',son_escape(image_config_gen['images'])),
                      ('task',son_escape(task)),
                      ('split_id',split_id),
                 ])   
                 
    split_result = SON([])
    for stat in STATS:
        if stat in res and res[stat] != None:
            split_result[stat] = res[stat] 

    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record['__hash__'] = ext_hash
    out_record.update(split_result)
    print('dump out split result...')
    out_data = cPickle.dumps(SON([('split_result',res)]))
    splitres_fs.put(out_data,**out_record)          
Exemple #18
0
    def get_last_error(self, db, **options):
        command = SON([("getlasterror", 1)])
        db = "%s.$cmd" % db.split('.', 1)[0]
        command.update(options)

        query = Query(collection=db, query=command)
        reply = yield self.send_QUERY(query)

        assert len(reply.documents) == 1

        document = reply.documents[0].decode()
        err = document.get("err", None)
        code = document.get("code", None)

        if err is not None:
            if code == 11000:
                raise DuplicateKeyError(err, code=code)
            else:
                raise OperationFailure(err, code=code)

        defer.returnValue(document)
Exemple #19
0
    def get_last_error(self, db, **options):
        command = SON([("getlasterror", 1)])
        db = "%s.$cmd" % db.split('.', 1)[0]
        command.update(options)

        def on_reply(reply):
            assert len(reply.documents) == 1

            document = reply.documents[0].decode()
            err = document.get("err", None)
            code = document.get("code", None)

            if err is not None:
                if code == 11000:
                    raise DuplicateKeyError(err, code=code)
                else:
                    raise OperationFailure(err, code=code)

            return document

        query = Query(collection=db, query=command)
        return self.send_QUERY(query).addCallback(on_reply)
Exemple #20
0
def get_op_gen(op, oplist):

    if op.get("outcertpaths") is None:
        func = op["func"]
        params = op.get("params")
        inroots = func.inroots
        outroots = func.outroots
        if func.action_name == "inject":
            args = op["params"]
            out_args = SON([(outroot, params) for outroot in outroots])

        else:
            params = op.get("params", SON([]))

            parents = []
            for ir in inroots:
                try:
                    parent = [op0 for op0 in oplist if ir in op0["func"].outroots][0]
                except IndexError:
                    raise IndexError, "No parent found for at least one collection in " + repr(op0["func"].outroots)
                else:
                    parents.append(parent)

            for parent in parents:
                get_op_gen(parent, oplist)

            in_args = [parent["out_args"] for parent in parents]
            op["incertpaths"] = [
                get_cert_path(func.dbname, inroot, get_config_string(in_arg))
                for (inroot, in_arg) in zip(inroots, in_args)
            ]
            out_args = dict_union(in_args)
            out_args.update(params)

        op["out_args"] = out_args
        op["outcertpaths"] = [
            get_cert_path(func.dbname, outroot, get_config_string(out_args)) for outroot in func.outroots
        ]
Exemple #21
0
def oid_date_range_filter(dt_from=None, dt_upto=None, field_name='_id'):
    """
    constructs a range query usefull to query an ObjectId field by date
    :Parameters:
        - dt_from (datetime or tuple): starting date_time if tuple a datetime is constucted from tuple
        - dt_upto (datetime or tuple): end date_time if tuple a datetime is constucted from tuple
        - field_name: (str): optional default to '_id' field to query or None if None returns range only else returns full query
    :Returns:
        - range query (due to objectId structure $gt includes dt_from) while $lt dt_upto (not included)
    """
    def dt(dt_or_tuple):
        if isinstance(dt_or_tuple, datetime):
            return dt_or_tuple
        elif isinstance(dt_or_tuple, tuple):
            return datetime(*dt_or_tuple)
        else:
            raise TypeError('dt must be a date or tuple')
    q = SON()
    if dt_from is not None:
        q.update(SON([('$gte', ObjectId.from_datetime(dt(dt_from)))]))
    if dt_upto is not None:
        q.update(SON([('$lte', ObjectId.from_datetime(dt(dt_upto)))]))
    return q if field_name is None else SON([(field_name, q)])
Exemple #22
0
 async def set_fail_point(self, client, command_args):
     cmd = SON([('configureFailPoint', 'failCommand')])
     cmd.update(command_args)
     await client.admin.command(cmd)
Exemple #23
0
def mr(
    coll,                       # a pymongo collection instance
    fun_map,                    # js function used for map
    fun_reduce=None,            # js function used for reduce defaults to one counting values
    query={},                   # a pymongo query dictionary to query coll defaults to {}
    out={"replace": 'mr_tmp'},  # output dict {'replace'|'merge'|'reduce'|'inline':collection_name|'database':db_name}
    fun_finalize=None,          # js function to run on finalize
    scope={},                   # vars available during map-reduce
    sort=None,                  # i.e: sort= { "_id":1 } short dict to sort before map
    jsMode=False,               # True|False (don't convert to Bson between map & reduce if True)
    verbose=1                   # if 1 includes timing info on output if 2,3  more details
        ):
    """simplified generic Map Reduce
    `see MongoDB Map Reduce <http://docs.mongodb.org/manual/reference/command/mapReduce/>`_

    :Parameters:
      - coll (object) a pymongo collection instance
      - fun_map js function used for map
      - fun_reduce js function used for reduce defaults to a function that increments value count
      - query a pymongo query dictionary to query collection, defaults to {}
      - out  a dictionary for output specification  {replace|merge|reduce|:collection_name|db:db_name}
        also can specify {'inline':1} for in memory operation (with some limitations)
        defaults to {"replace": 'mr_tmp'}
      - scope vars available during map-reduce-finalize
      - sort dictionary to sort before map  i.e: sort= { "_id":1 }
      - jsMode True|False (don't convert to Bson between map & reduce if True)
        should be False if we expect more than 500K distinct results
      - db' (optional): database_name
        if no db is specified output collection will be in same db as input coll

    :Returns: tuple (results collection or results list if out={"inline" :1}, MR response statistics)
    :Example: see :func:`group_counts` function
    """
    def mr_cmd():
        """returns the actual command from output parameter"""
        return [i for i in ['replace', 'merge', 'reduce', 'inline']
                if i in list(out.keys())][0]
    command = mr_cmd()
    out_db = out.get('db', None)
    out = SON([(command, out[command]), ('nonAtomic', out.get('nonAtomic', False))])
    if out_db:
        out.update(SON([('db', out_db)]))
#     out = SON([(command, out[command]), ('db', out.get('db')),
#                    ('nonAtomic', out.get('nonAtomic', False))])
        # nonAtomic not allowed on replace
    fun_map = Code(fun_map, {})
    if fun_reduce is None:
        fun_reduce = parse_js_default('MapReduce.js', 'GroupCountsReduce')
    fun_reduce = Code(fun_reduce, {})

    if sort:
        sort = SON(sort)
    if verbose > 1:
        frmt = "Map Reduce {}\n\
                collection={coll!s:}\n\
                query=     {query!s:}\n\
                sort=      {sort!s:}\n\
                scope=     {scope!s}\n\
                out=       {out!s:}\n\
                jsMode=    {jsMode!s:}\n\
                map=       {fun_map!s:}\n\
                reduce=    {fun_reduce!s:}\n\
                finalize=  {fun_finalize!s:}\n"
        print(frmt.format('Starting...', **locals()))
    r = coll.map_reduce(fun_map, fun_reduce, out=out, query=query,
                        finalize=fun_finalize, scope=scope, sort=sort,
                        full_response=True, jsMode=jsMode)
    if verbose > 0:
        frmt = "Map Reduce {}\n\
                ok=        {ok:}\n\
                millisecs = {timeMillis:,d}\n\
                counts=    {counts!s:}\n\
                out=       {!s:}\n"
        print(frmt.format('End', out, **r))

    if command == 'inline':
        # if 'results' in list(r.keys()):              # @note:  results is a list if inline else a coll
        results = r['results']
        del r['results']
    else:
        db_out = out.get('db', coll.database.name)     # if db not specified it is the collection_db
        results = coll.database.client[db_out][out[command]]
    return results, r
 def set_fail_point(self, command_args):
     cmd = SON([("configureFailPoint", "failCommand")])
     cmd.update(command_args)
     self.client.admin.command(cmd)
Exemple #25
0
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier):

    conn = pm.Connection(document_class=bson.SON)
    db = conn['v1']
    
    opt_fs = gridfs.GridFS(db,'optimized_performance')
    
    image_coll = db['raw_images.files']
    image_fs = gridfs.GridFS(db,'raw_images')
    
    image_certdict = cPickle.load(open(image_certificate_file))
    print('using image certificate', image_certificate_file)
    
    image_hash = image_certdict['run_hash']
    image_args = image_certdict['out_args']

    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.setup_pyfft()
    
  
    filterbanks = []
    perfs = []
    model_configs = []
    center_config = initial_model
    
    i = 0
    improving = True
    
    
    while ((i < rep_limit) or rep_limit is None):
        i += 1
        print('Round', i)
        next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs]

        if next_configs:
            next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs]
            next_perfs = [x[0] for x in next_results]
            next_filterbanks = [x[1] for x in next_results]
            next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max()
            perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0
            if next_perf_ac_max > perf_ac_max:
                next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax()
                center_config = next_configs[next_perf_ac_argmax]  
                print('\n\n')
                print('new best performance is', next_perf_ac_max, 'from model', center_config)
                print('\n\n')
                perfs.extend(next_perfs)  
                model_configs.extend(next_configs)
                filterbanks.extend(next_filterbanks)
            else:
                print('Breaking because no further optimization could be done.  Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max)
                break
            
        else:
            print('Breaking because no next configs')
            break
        

    perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax()
    best_model = model_configs[perfargmax]
    best_performance = perfs[perfargmax]
        
    out_record = SON([('initial_model',initial_model),
                       ('task',son_escape(task)),
                       ('images',son_escape(image_args)),
                       ('images_hash',image_hash),
                       ('modifier_args',son_escape(modifier_args)),
                       ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__)
                     ])   
    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record.update(SON([('performances',perfs)]))
    out_record.update(SON([('best_model',best_model)]))
    out_record.update(SON([('best_performance',best_performance)]))
    out_record.update(SON([('num_steps',len(model_configs))]))
    out_record.update(SON([('models',model_configs)]))
    outdata = cPickle.dumps(filterbanks)
        
    opt_fs.put(outdata,**out_record)
     
    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.cleanup_pyfft() 
      
    createCertificateDict(outfile,{'image_file':image_certificate_file})
Exemple #26
0
 def find_cmd(self, select=None, project=None, sort=None, take=None, skip=None, tailable=False, reduce_by=None):
     cmd = SON([('find', self._mongo_collection.name)])
     cmd.update({'filter': select}) if select else None
     cmd.update({'projection': project}) if project else None
     cmd.update({'sort': sort}) if sort else None
     cmd.update({'skip': skip}) if skip else None
     cmd.update({'limit': take}) if take else None
     cmd.update({'tailable': tailable}) if tailable else None
     cmd.update({'singleBatch': True})
     cmd.update({'batchSize': 1000})
     docs = self.db_command(cmd)['cursor']['firstBatch']
     if hasattr(docs, '__iter__'):
         return [d if not reduce_by else reduce_by(**d) for d in docs]
         # for d in docs:
         #     yield d if not reduce_by else reduce_by().objectify(d)
     else:
         return docs if not reduce_by else reduce_by(**docs)
Exemple #27
0
 def distinct_cmd(self, key, query=None):
     cmd = SON([('distinct', self._mongo_collection.name)])
     cmd.update({'key': key})
     cmd.update({'query': key}) if query else None
     return self.db_command(cmd)['values']
Exemple #28
0
 def count_cmd(self, select=None, take=None, skip=None):
     cmd = SON([('aggregate', self._mongo_collection.name)])
     cmd.update({'query': select}) if select else None
     cmd.update({'limit': take}) if take else None
     cmd.update({'skip': skip}) if skip else None
     return self.db_command(cmd)['n']
Exemple #29
0
class ObjectField(Field):
    """Represents an object field (sub-document) in the mongo database.
    """

    # TODO until V0.3.0: support ordering properties.
    # TODO until V0.4.0: support non boolean additional_properties.
    additional_properties = True

    def __init__(self,
                 properties=None,
                 additional_properties=None,
                 name=None,
                 nullable=None,
                 required=None,
                 get_default=None,
                 **kwargs):
        """Defines an object of fields in MongoDB.

        :Parameters:
          - `properties` (optional): A mapping of a key to a field.
          - `additional_properties` (optional): If ``True``, raise
            :class:`TypeError` if this field contains any properties that are
            not in the object's `properties`.
          - `**kwargs` (optional): See the documentation about
            :class:`~mongomodals.field.Field` for the full details.
        """
        super(ObjectField, self).__init__(name=name,
                                          nullable=nullable,
                                          required=required,
                                          get_default=get_default,
                                          **kwargs)

        if additional_properties is not None:
            self.additional_properties = additional_properties

        self.properties = SON()
        if properties is not None:
            self.properties.update(properties)

    def __repr__(self):
        repr_fields = u', '.join(
            ("%s=%s" if prop.required else "[%s=%s]") % (key, prop)
            for key, prop in iteritems(self.properties))
        if self.additional_properties:
            repr_fields += ', ...' if repr_fields else '...'
        return "%s<%s>" % (super(ObjectField, self).__repr__(), repr_fields)

    def get_field(self, key):
        """Get the property in the `key` position of this field.
        """
        return self.properties.get(key, Field.ANONYMOUS)

    def resolve(self, value):
        """Resolve the BSON `value` by setting the default BSON value and
        resolving all of the fields that should be in `value`.

        :Returns:
          The resolved `value`.
        """
        value = super(ObjectField, self).resolve(value)
        if value is not None:
            for key, prop in iteritems(self.properties):
                name = self.get_field_name(key)
                if name in value:
                    value[name] = prop.resolve(value[name])
                elif prop.required and prop.get_default:
                    value[name] = prop.resolve(prop.get_default())
        return value

    def validate(self, value):
        """Raises :class:`TypeError` if `value` is not an instance of
        :class:`dict` or if any validation of its fields fail.
        """
        if super(ObjectField, self).validate(value):
            return True

        # Check the type of the BSON value.
        if not isinstance(value, dict):
            raise TypeError("value %r must be an instance of dict" % value)

        extra_names = set(value)
        # Validate each child prop in the object.
        for key, prop in iteritems(self.properties):
            name = self.get_field_name(key)
            extra_names.difference_update((name, ))
            if name in value:
                prop.validate(value[name])
            elif prop.required:
                raise TypeError("required property '%s' is missing for %r" %
                                (name, self))

        # If self is limiting the fields that it may contain,
        # make sure that object does not contain any unexpected fields.
        if not self.additional_properties and extra_names:
            raise TypeError("properties %s are not excepted for %r" %
                            (', '.join(repr(n) for n in extra_names), self))
Exemple #30
0
def evaluate(outfile,feature_certificate,cpath,task,ext_hash):

    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    
    perf_fs = gridfs.GridFS(db,'performance')
    perf_coll = db['performance.files']
    
    remove_existing(perf_coll,perf_fs,ext_hash)

    feature_certdict = cPickle.load(open(feature_certificate))
    feature_hash = feature_certdict['feature_hash']
    image_hash = feature_certdict['image_hash']
    model_hash = feature_certdict['model_hash']
    image_config_gen = feature_certdict['args']['images']
    model_col = db['models.files']
    feature_fs = gridfs.GridFS(db,'features')
    feature_col = db['features.files']
    
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
       
    if isinstance(task,list):
        task_list = task
    else:
        task_list = [task]
    
    model_configs = get_most_recent_files(model_col,{'__hash__':model_hash})
    
    for m in model_configs:
        print('Evaluating model',m) 
        for task in task_list:
            task['universe'] = task.get('universe',SON([]))
            task['universe']['model'] = m['config']['model']
            print('task', task)
            classifier_kwargs = task.get('classifier_kwargs',{})    
            split_results = []
            splits = generate_splits(task,feature_hash,'features') 
            for (ind,split) in enumerate(splits):
                print ('split', ind)
                train_data = split['train_data']
                test_data = split['test_data']
                
                train_filenames = [t['filename'] for t in train_data]
                test_filenames = [t['filename'] for t in test_data]
                assert set(train_filenames).intersection(test_filenames) == set([])
                
                print('train feature extraction ...')
                train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data])
                print('test feature extraction ...')
                test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data])
                train_labels = split['train_labels']
                test_labels = split['test_labels']
    
                print('classifier ...')
                res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
                print('Split test accuracy', res['test_accuracy'])
                split_results.append(res)
        
            model_results = SON([])
            for stat in STATS:
                if stat in split_results[0] and split_results[0][stat] != None:
                    model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           
    
            out_record = SON([('model',m['config']['model']),
                              ('model_hash',model_hash), 
                              ('model_filename',m['filename']), 
                              ('images',son_escape(image_config_gen)),
                              ('image_hash',image_hash),
                              ('task',son_escape(task)),
                         ])
                                             
            filename = get_filename(out_record)
            out_record['filename'] = filename
            out_record['config_path'] = cpath
            out_record['__hash__'] = ext_hash
            out_record.update(model_results)
            print('dump out ...')
            out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)]))
            
            perf_fs.put(out_data,**out_record)

    createCertificateDict(outfile,{'feature_file':feature_certificate})