Ejemplo n.º 1
0
def mongo_cache(app, func, name, duration, force=False):
    """
    Disk persistent cache that reruns a function once every
    'duration' no of seconds
    """
    from mad3.db import get_db
    db = get_db(app)
    cache = db['cache']

    lg.debug("cache : %s", name)

    #find most recent cache record
    tcutoff = datetime.now() - timedelta(seconds=duration)
    ccursor = cache.find({
        'name': name,
        'date': {
            "$gt": tcutoff
        }
    }).sort('date', -1)

    if not force:
        try:
            crec = ccursor.next()
            return crec['result']

        except StopIteration as e:
            # no record in cache (?)
            lg.debug("no record found")

    lg.debug("run function {}".format(name))
    result = func()
    cacherec = {'name': name, 'date': datetime.now(), 'result': result}
    cache.insert(cacherec)
    return result
Ejemplo n.º 2
0
def create_index(app, args):
    """Create mongodb indici."""
    db = get_db(app)
    for idx in app.conf['index']['transient']:
        db.transient.create_index([(idx, pymongo.ASCENDING)])
    for idx in app.conf['index']['transaction']:
        db.transaction.create_index([(idx, pymongo.ASCENDING)])
Ejemplo n.º 3
0
    def find_in_db(self, strict=False):
        """
        Attempt to find a similar transcation in the db.

        Subsequently, load the shasum data from the database
        """
        query = [{'template_sha256': self.template_sha256}]  # build a pymongo query
        if 'io' not in self.data:
            self.app.warning('Cannot query db for a relation '
                             'with no IO files')
            return []

        rv = []

        for io in self.data['io']:
            if not io.get('sha256'):
                lg.info('Can\'t query, no input shasum for {}'
                        .format(io['filename']))
                return []

            query.append({
                'io': {'$elemMatch': {'sha256': io['sha256'],
                                      'category': io['category'],
                                      'group': io['group']}}})

        query = {'$and': query}
        db = get_db(self.app)
        # nicedictprint(query)
        for r in db.relation.find(query):
            rv.append(r)
        return rv
Ejemplo n.º 4
0
def drop(app, args):
    """Drop all data from one ore more databases"""
    if not args.i_know_what_im_doing:
        app.warning("Really drop? Add another command line flag")
    db = get_db(app)
    if args.transient:
        db.transient.drop()
    if args.core:
        db.core.drop()
    if args.transaction:
        db.transaction.drop()
Ejemplo n.º 5
0
Archivo: stats.py Proyecto: mfiers/mad3
 def get_allkeys():
     import bson.code
     mapper = bson.code.Code('''
         function () { for (var key in this) {
             emit(key, {count: 1, size: this.size}); };
         }''')
     reducer = bson.code.Code('''
         function(key, values) {
             result = {count: 0, size: 0};
             values.forEach(function(value) {
                 result.count += value.count;
                 result.size += value.size; });
             return result; }''')
     db = get_db(app)
     result = db.transient.map_reduce(mapper, reducer, out="_allkeys")
     rv = [k for k in result.find()]
     rv.sort(key=lambda x: x['value']['size'], reverse=True)
     db['_allkeys'].drop()
     return rv
Ejemplo n.º 6
0
def tfind(app: Type[leip.app], args: Type[argparse.Namespace]):
    """Find a relation."""
    db = get_db(app)
    transact = db['relation']
    argsd = vars(args)  # noqa: T484
    query = {}

    def simple_query(key):
        if key in argsd and argsd[key]:
            query[key] = argsd[key]

    # id is a special case
    if argsd.get('id'):
        tid = argsd['id']
        query['_id'] = tid if len(tid) == 64 \
            else {'$regex': '^{}'.format(tid)}

    filename = argsd.get('file')
    if filename:
        madfile = MadFile(app, filename)
        sha256 = madfile.sha256
        query['io.sha256'] = sha256
        # print(madfile, sha256)

    simple_query('hostname')
    simple_query('state')

    for r in transact.find(query):
        if args.human:
            print("{:>6s} {} {:8} {}:{}".format(
                nicetimedelta(r['time']),
                r['_id'][:10],
                r['state'],
                r['hostname'],
                r['pwd'],
            ))
            for io in sorted(r['io'], key=lambda x: x['category']):
                marker = '<i' if io['category'] == 'input' else 'o>'
                print('  {} {} ({})'.format(marker, io['filename'],
                                            io['sha256'][:8]))
        else:
            nicedictprint(r)
Ejemplo n.º 7
0
def find(app, args):

    query = {}

    db = get_db(app)

    for term in args.term:
        rawkey, rawval = term.split('=', 1)
        keyname, keyinfo = key_info(app, rawkey)
        val = keyinfo['transformer'](rawval)
        lg.info('search: {} {}'.format(keyname, val))
        if keyinfo['shape'] == 'one':
            if keyname not in query:
                query[keyname] = {"$in": [val]}
            else:
                raise NotImplemented()
        elif keyinfo['shape'] == 'set':
            if keyname not in query:
                query[keyname] = {'$all': [val]}

    for rec in db.transient.find(query):
        print(rec['filename'])
Ejemplo n.º 8
0
def forget(app, args):
    """Forget a key, or key value combination."""
    key, kinfo = key_info(app.conf, args.key)

    db = get_db(app)
    if args.value:
        lg.warning("forget key=value {}={}".format(key, args.value))
        if kinfo.get('shape', 'one') == 'set':
            db.transient.update({}, {"$pull": {
                key: args.value
            }},
                                upsert=False,
                                multi=True)
            db.core.update({}, {"$pull": {
                key: args.value
            }},
                           upsert=False,
                           multi=True)
        else:
            raise NotImplemented()
    else:
        lg.warning("forget key {}".format(key))
        raise NotImplemented()
Ejemplo n.º 9
0
Archivo: stats.py Proyecto: mfiers/mad3
def _single_sum(app, group_by=None, force=False):
    groupby_field = "${}".format(group_by)
    db = get_db(app)
    query = [{
        '$unwind': groupby_field
    }, {
        '$group': {
            "_id": groupby_field,
            "total": {
                "$sum": "$size"
            },
            "count": {
                "$sum": 1
            }
        }
    }, {
        "$sort": {
            "total": -1
        }
    }]

    res = db.transient.aggregate(query)
    rv = list(res)
    return rv
Ejemplo n.º 10
0
def scan(app, args):

    basedir = os.getcwd().rstrip('/') + '/'
    db = get_db(app)
    starttime = time.time()

    app.bulk_init()

    ff_regex = "^{}".format(basedir)

    lg.info("Query database for files below\n    {}".format(basedir))
    allfilesdb = db.transient.find({'filename': {"$regex": ff_regex}},
                                 projection=['filename', 'mtime', 'size'])

    file2id = {}
    allfiles = []
    for x in allfilesdb:
        allfiles.append((x['filename'], x['mtime'], x['size']))
        file2id[x['filename']] = x['_id']

    allfiles = set(allfiles)
    lg.info("Found {} files in db".format(len(allfiles)))


    madignore = os.path.expanduser('~/.madignore')
    cwd = os.path.abspath(os.path.normpath(os.getcwd()))
    cl = r"find {} -type f -printf '%p\t%T@\t%s\n'".format(cwd)

    if os.path.exists(madignore):
        cl += ' | grep -v -f ~/.madignore'

    lg.info('running unix find')
    P = sp.Popen(cl, shell=True, stdout=sp.PIPE, stderr=sp.DEVNULL)
    o, e = P.communicate()

    o = list(map(str.strip, o.decode().split('\n')))
    o = filter(None, o)

    def cnv2(l):
        p, m, s = l.rsplit("\t", 2)
        m = datetime.fromtimestamp(int(math.floor(float(m))))
        s = int(s)
        return (p, m, s)

    o = list(map(cnv2, o))
    lg.info("unix find found {} files".format(len(o)))
    now = set(o)

    if args.refresh:
        changed = list(now)
    else:
        changed = list(now - allfiles)

    deleted = list(allfiles - now)

    app.counter['indb'] = len(allfiles)
    app.counter['onfs'] = len(now)
    app.counter['check'] = len(changed)
    app.counter['rm'] = len(deleted)

    lg.info('in database       : {:>8d}'.format(len(allfiles)))
    lg.info('on filesystem     : {:>8d}'.format(len(now)))
    lg.info('total new/changed : {:>8d}'.format(len(changed)))
    lg.info('total deleted     : {:>8d}'.format(len(deleted)))

    for i, c in enumerate(sorted(changed)):
        lg.info('changed: {} path  {}'.format(i, c[0]))
        lg.info('           mtime {}'.format(c[1]))
        lg.info('           size  {}'.format(c[2]))
        if i > 3:
            break

    delids = [file2id[x[0]] for x in deleted]

    db.transient.remove({'_id': {"$in": delids}})

    changed = set([f[0] for f in changed])
    deleted = set([f[0] for f in deleted])


    lg.info("{} files seem changed".format(len(changed)))

    lastscreenupdate = time.time()
    # print_counter(app.counter)

    # store in database

    for filename in changed:

        app.counter['changed'] += 1
        try:
            mfile = MadFile(app, filename, quick=args.quick)
        except PermissionError as e:
            app.counter['noaccess'] += 1
            continue

        if mfile.dirty:
            mfile.save()

        if time.time() - lastscreenupdate > 2:
            print_counter(app.counter)
            lastscreenupdate = time.time()

    app.bulk_execute()

    print_counter(app.counter)
    # ensure we end on a newline
    print("\nruntime: {:.4f}".format(time.time() - starttime))
Ejemplo n.º 11
0
def scan2(app, args):

    basedir = os.getcwd().rstrip('/') + '/'
    db = get_db(app)
    lastscreenupdate = starttime = time.time()
    app.bulk_init()

    ff_regex = "^{}".format(basedir)

    lg.info("Query database for files below\n    {}".format(basedir))
    allfilesdb = db.transient.find({'filename': {"$regex": ff_regex}},
                                 projection=['filename', 'mtime', 'size'])

    file2id = {}
    allfiles = []
    for x in allfilesdb:
        allfiles.append((x['filename'], x['mtime'], x['size']))
        file2id[x['filename']] = x['_id']

    allfiles = set(allfiles)
    # print(len(allfiles))
    lg.info("Found {} files in db".format(len(allfiles)))
    app.counter['indb'] = len(allfiles)
    app.message('Files in db: {}'.format(len(allfiles)))


    # run Unix FIND (seems the fastest way, plus we get the benefit
    # of adding in grep)


    madignore = os.path.expanduser('~/.madignore')
    cwd = os.path.abspath(os.path.normpath(os.getcwd()))
    cl = r"find {} -type f -printf '%p\t%T@\t%s\n'".format(cwd)

    if os.path.exists(madignore):
        cl += ' | grep -v -f ~/.madignore'

    lg.info('running unix find')

    def cnv2(l):
        p, m, s = l.rsplit("\t", 2)
        m = datetime.fromtimestamp(int(math.floor(float(m))))
        s = int(s)
        return (p, m, s)

    deleted = set()
    changed = set()
    onfs = set()


    P = sp.Popen(cl, shell=True, bufsize=1 stdout=sp.PIPE, stderr=sp.DEVNULL)
    with P.stdout as uxfind:
        for ii, line in enumerate(uxfind.readlines()):

            ffile = cnv2(line.strip().decode())
            if not ffile:
                continue

            onfs.add(ffile)
            app.counter['seenonfs'] += 1
            in_allfiles = ffile in allfiles
            if (in_allfiles and args.refresh) or \
                    (not in_allfiles):
                #needs refreshing

                filename = ffile[0]
                if in_allfiles:
                    app.counter['refresh'] += 1
                else:
                    app.counter['new'] += 1

                try:
                    mfile = MadFile(app, filename, quick=args.quick)
                except PermissionError as e:
                    app.counter['noxs'] += 1
                    continue

                if mfile.dirty:
                    mfile.save()

                if time.time() - lastscreenupdate > 2:
                    print_counter(app.counter)
                    lastscreenupdate = time.time()
            else:
                # ignoring this file, it exists, and has not changed
                app.counter['notnew'] += 1

    app.bulk_execute()

    deleted = list(allfiles - onfs)

    app.counter['onfs'] = len(onfs)
    app.counter['rm'] = len(deleted)
    delids = [file2id[x[0]] for x in deleted]

    db.transient.remove({'_id': {"$in": delids}})
    lg.info("{} files seem changed".format(len(changed)))

    print_counter(app.counter)
    # ensure we end on a newline
    print("\nruntime: {:.4f}".format(time.time() - starttime))
Ejemplo n.º 12
0
    def __init__(self, app, filename, quick=False):
        """Prepare the MadFile."""
        self.app = app
        self.quick = quick
        self.dirty = False

        self.app.counter['init_madfile'] += 1

        filename = os.path.abspath(os.path.expanduser(filename))
        if not os.path.exists(filename):
            lg.warning("{} does not exist:".format(filename))
            raise FileNotFound()

        if not os.access(filename, os.R_OK):
            raise PermissionError("m3 cannot read file {}".format(filename))

        self.filename = filename
        self.filestat = os.stat(filename)

        # step one - determine transient id of this file
        lg.debug('calc transient id for {}'.format(self.filename))
        self.transient_id = self.get_transient_id()
        lg.debug(' -- transient id is {}'.format(self.transient_id))

        # check the database if a record with the transient id exists
        self.db = get_db(app)
        lg.debug('check transient rec for {}'.format(self.filename))

        self.transient_rec = self.db.transient.find_one(
            {'_id': self.transient_id})

        # if there is no transient rec, calculate core id

        if self.transient_rec is None:
            self.app.counter['-trans'] += 1
            lg.debug('transient record does not exist')
            # needs to be saved now
            self.dirty = True

            if self.quick:
                self.app.counter['nochksum'] += 1
                self.sha1, self.sha256 = '0', '0'
            else:  # not quick, calculate all shasums
                # calculate fresh sha256
                self.app.counter['chksum'] += 1
                self.sha1, self.sha256 = self.calculate_checksum()

            # create an stub transient rec
            self.transient_rec = {
                '_id': self.transient_id,
                'sha256': self.sha256,
                'sha1': self.sha1,
                'filename': self.filename,
                'hostname': self.app.conf['hostname']
            }

            # and fill up the transient record see if there are changes
            self.refresh()

        else:
            lg.debug('transient record found!')
            self.app.counter['transload'] += 1
            self.sha256 = self.transient_rec['sha256']
            self.sha1 = self.transient_rec['sha1']

            #check if this was a Q&D record (and this is not a Q&D call)
            if not self.quick and (self.sha256 == '0' or self.sha1 == '0'):
                self.app.counter['unquicken'] += 1
                self.sha1, self.sha256 = self.calculate_checksum()
                self.transient_rec['sha256'] = self.sha256
                self.transient_rec['sha1'] = self.sha1
                self.dirty = True

            # refrehs the transient record - see if there are changes
            self.refresh()

            # if something changed - recalc thte sha256
            if self.dirty:
                if self.quick:
                    self.app.counter['~dirty'] += 1
                    #print('dirty?', self.filename)
                else:
                    self.app.counter['re-chksum'] += 1
                    newsha1, newsha256 = self.calculate_checksum()
                    if newsha256 == self.transient_rec['sha256']:
                        self.app.counter['sha256_ok'] += 1
                    else:
                        self.app.counter['sha256_change!'] += 1
                        self.transient_rec['sha1'] = newsha1
                        self.transient_rec['sha256'] = newsha256
                        # TODO: Create a transaction!!!
                        # TODO: copy core record data??

        if not self.quick:
            assert self.sha256 != "0"
            self.core_rec = self.db.core.find_one({'_id': self.sha256})

            if self.core_rec is None:
                lg.debug('Core rec not found')
                self.core_rec = {'_id': self.sha256, 'sha1': self.sha1}
            else:
                for k, v in self.core_rec.items():
                    if k == '_id': continue
                    if (k not in self.transient_rec) or \
                            (self.transient_rec[k] != v):
                        self.transient_rec[k] = v
                        self.dirty = True

        if self.dirty:
            lg.debug('dirty transient rec, saving')
            if getattr(self.app, 'bulk_mode', False):
                lg.debug('pepare bulk insert for {}'.format(self.filename))
                self.app.bulk_transient\
                    .find({'_id': self.transient_id})\
                    .upsert()\
                    .update({"$set": self.transient_rec})
            else:
                self.db.transient.update({'_id': self.transient_id},
                                         self.transient_rec,
                                         upsert=True)
            self.dirty = False

        self.app.run_hook('onload', self)
Ejemplo n.º 13
0
def bulk_init(app):
    lg.debug("start bulk mode")
    db = get_db(app)
    app.bulk_mode = True
    app.bulk_transient = db.transient.initialize_unordered_bulk_op()
    app.bulk_core = db.core.initialize_unordered_bulk_op()
Ejemplo n.º 14
0
Archivo: stats.py Proyecto: mfiers/mad3
def sum(app, args):
    """
    Show the associated mongodb record
    """
    if not args.key:
        db = get_db(app)
        notrans = db.transient.count()
        print("No Transient records: ", notrans)
        if notrans > 0:
            print(
                "Total data Transient: ",
                nicesize(
                    list(
                        db.transient.aggregate([{
                            "$group": {
                                "_id": None,
                                "total": {
                                    "$sum": "$size"
                                }
                            }
                        }]))[0]['total']))
        print("     No Core records: ", db.transient.count())
        return

    kname, kinfo = key_info(app.conf, args.key)
    res = _single_sum(app, group_by=kname, force=args.force)
    total_size = int(0)
    total_count = 0
    mgn = len("Total")
    for reshost in res:
        gid = reshost['_id']
        if gid is None:
            mgn = max(4, mgn)
        else:
            mgn = max(len(str(reshost['_id'])), mgn)

    fms = "{:" + str(mgn) + "}\t{:>10}\t{:>9}"
    if args.human:
        print("# {}:".format(kname))
    for reshost in res:
        total = reshost['total']
        count = reshost['count']
        total_size += int(total)
        total_count += count
        if args.human:
            total_human = nicesize(total)
            count_human = nicenumber(count)
            categ = reshost['_id']
            if categ is None:
                categ = "<undefined>"

            print(fms.format(categ, total_human, count_human))
        else:
            print("{}\t{}\t{}".format(reshost['_id'], total, count))

    if args.human:
        total_size_human = nicesize(total_size)
        total_count_human = nicenumber(total_count)
        print(fms.format('', '-' * 10, '-' * 9))
        print(fms.format("Total", total_size_human, total_count_human))
    else:
        print("Total\t{}\t{}".format(total_size, total_count))
Ejemplo n.º 15
0
 def save(self):
     """Save relation to the database."""
     db = get_db(self.app)
     transact = db['relation']
     transact.insert(self.data)