Ejemplo n.º 1
0
def update_nodes_from_path(sess,
                           root,
                           oldest_refresh=None,
                           auto_tag_folder_tail=False,
                           auto_tag_words=[]):
    import os
    from os.path import join, abspath

    root_ltree = uri_to_ltree(root)
    if not oldest_refresh:
        oldest_refresh = select([func.max(Node.updated)])
        oldest_refresh = oldest_refresh.where(Node.path.op("<@")(root_ltree))
        oldest_refresh = oldest_refresh.execute().first()[0]

    LOG.info("Rescanning files that changed since %s" % oldest_refresh)

    for root, dirs, files in os.walk(root):

        if 'Thumbs.db' in files:
            files.remove('Thumbs.db')

        scanned_files = 0
        for file in files:
            path = abspath(join(root, file))
            update_one_node(sess, path, auto_tag_folder_tail, auto_tag_words)
            scanned_files += 1

            mod_time = max(datetime.fromtimestamp(os.stat(path).st_mtime),
                           datetime.fromtimestamp(os.stat(path).st_ctime))

            # ignore files which have not been modified since last scan
            if oldest_refresh and mod_time < oldest_refresh:
                continue

        if scanned_files > 0:
            LOG.info("commit")
            sess.commit()

        if 'CVS' in dirs:
            dirs.remove('CVS')  # don't visit CVS directories

        if '.git' in dirs:
            dirs.remove('.git')  # don't visit CVS directories

        if '.svn' in dirs:
            dirs.remove('.svn')  # don't visit CVS directories

    LOG.info("commit")
    sess.commit()
Ejemplo n.º 2
0
def remove_orphans(sess, root):
    root_ltree = uri_to_ltree(root)
    qry = select([Node.uri, Node.mimetype])
    qry = qry.where(Node.path.op("<@")(root_ltree))
    for row in qry.execute():
        if not exists(row[0]):
            LOG.info('Removing orphan %r' % row[0])
            try:
                nodes_table.delete(nodes_table.c.uri == row[0]).execute()
                LOG.info("commit")
                sess.commit()
            except:
                sess.rollback()

    remove_empty_dirs(sess, root)
Ejemplo n.º 3
0
def remove_orphans(sess, root):
    root_ltree = uri_to_ltree(root)
    qry = select([Node.uri, Node.mimetype])
    qry = qry.where( Node.path.op("<@")(root_ltree) )
    for row in qry.execute():
        if not exists(row[0]):
            LOG.info('Removing orphan %r' % row[0])
            try:
                nodes_table.delete(nodes_table.c.uri == row[0]).execute()
                LOG.info("commit")
                sess.commit()
            except:
                sess.rollback()

    remove_empty_dirs(sess, root)
Ejemplo n.º 4
0
def update_nodes_from_path(sess, root, oldest_refresh=None, auto_tag_folder_tail=False, auto_tag_words=[]):
    import os
    from os.path import join, abspath

    root_ltree = uri_to_ltree(root)
    if not oldest_refresh:
        oldest_refresh = select([func.max(Node.updated)])
        oldest_refresh = oldest_refresh.where( Node.path.op("<@")(root_ltree) )
        oldest_refresh = oldest_refresh.execute().first()[0]

    LOG.info("Rescanning files that changed since %s" % oldest_refresh)

    for root, dirs, files in os.walk(root):

        if 'Thumbs.db' in files:
            files.remove('Thumbs.db')

        scanned_files = 0
        for file in files:
            path = abspath(join(root, file))
            update_one_node(sess, path, auto_tag_folder_tail, auto_tag_words)
            scanned_files += 1

            mod_time = max(
                    datetime.fromtimestamp(os.stat(path).st_mtime),
                    datetime.fromtimestamp(os.stat(path).st_ctime)
                    )

            # ignore files which have not been modified since last scan
            if oldest_refresh and mod_time < oldest_refresh:
                continue


        if scanned_files > 0:
            LOG.info("commit")
            sess.commit()

        if 'CVS' in dirs:
            dirs.remove('CVS')  # don't visit CVS directories

        if '.git' in dirs:
            dirs.remove('.git')  # don't visit CVS directories

        if '.svn' in dirs:
            dirs.remove('.svn')  # don't visit CVS directories

    LOG.info("commit")
    sess.commit()
Ejemplo n.º 5
0
def all(sess, nodes, flatten=False):

    parent_uri = '/'.join(nodes)

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    stmt = sess.query(
        distinct(func.subpath(Node.path, 0, depth + 1).label("subpath")))

    stmt = stmt.filter(Node.path.op("<@")(parent_path))
    stmt = stmt.subquery()
    qry = sess.query(Node)
    qry = qry.filter(Node.path.in_(stmt))

    return qry
Ejemplo n.º 6
0
def all(sess, nodes, flatten=False):

    parent_uri = '/'.join(nodes)

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    stmt = sess.query(
            distinct(func.subpath(Node.path, 0, depth+1).label("subpath"))
            )

    stmt = stmt.filter( Node.path.op("<@")(parent_path) )
    stmt = stmt.subquery()
    qry = sess.query( Node )
    qry = qry.filter( Node.path.in_(stmt) )

    return qry
Ejemplo n.º 7
0
def remove_empty_dirs(sess, root):
    root_ltree = uri_to_ltree(root)
    nodes = root_ltree.split('.')

    if not nodes:
        return

    qry = select([Node.path])
    qry = qry.where(Node.path.op("<@")('.'.join(nodes)))
    qry = qry.where(Node.mimetype == 'other/directory')
    child_nodes = [row[0] for row in qry.execute()]

    for node in child_nodes:
        qry = select([func.count(Node.uri)])
        qry = qry.where(Node.path.op("<@")(node))
        for row in qry.execute():
            if row[0] == 1:
                LOG.debug('Removing empty dir: %r' % node)
                nodes_table.delete(nodes_table.c.path == node).execute()
Ejemplo n.º 8
0
def remove_empty_dirs(sess, root):
    root_ltree = uri_to_ltree(root)
    nodes = root_ltree.split('.')

    if not nodes:
        return

    qry = select([Node.path])
    qry = qry.where( Node.path.op("<@")('.'.join(nodes)) )
    qry = qry.where( Node.mimetype == 'other/directory' )
    child_nodes = [ row[0] for row in qry.execute() ]

    for node in child_nodes:
        qry = select([func.count(Node.uri)])
        qry = qry.where( Node.path.op("<@")(node) )
        for row in qry.execute():
            if row[0] == 1:
                LOG.debug('Removing empty dir: %r' % node)
                nodes_table.delete(nodes_table.c.path == node).execute()
Ejemplo n.º 9
0
def calc_md5(sess, root, since=None):
    root_ltree = uri_to_ltree(root)
    qry = sess.query(Node)
    qry = qry.filter(Node.path.op("<@")(root_ltree))
    qry = qry.filter(Node.mimetype != 'other/directory')

    if since:
        qry = qry.filter(Node.updated >= since)

    count = 0
    for node in qry:
        if not exists(node.uri):
            continue
        node.md5 = file_md5(node.uri)
        LOG.info('Updated md5 of %s' % node)
        count += 1

        if count % 500 == 0:
            # commit from time to time
            LOG.info('commit')
            sess.commit()
    LOG.info('commit')
    sess.commit()
Ejemplo n.º 10
0
def calc_md5(sess, root, since=None):
    root_ltree = uri_to_ltree(root)
    qry = sess.query(Node)
    qry = qry.filter( Node.path.op("<@")(root_ltree) )
    qry = qry.filter( Node.mimetype != 'other/directory' )

    if since:
        qry = qry.filter(Node.updated >= since)

    count = 0
    for node in qry:
        if not exists(node.uri):
            continue
        node.md5 = file_md5(node.uri)
        LOG.info('Updated md5 of %s' % node)
        count += 1

        if count % 500 == 0:
            # commit from time to time
            LOG.info('commit')
            sess.commit()
    LOG.info('commit')
    sess.commit()
Ejemplo n.º 11
0
def from_incremental_query(sess, query):
    LOG.debug('parsing incremental query %r' % query)

    if not query or query == 'root' or query == '/':
        # list the available query schemes
        return [
            DummyNode('all'),
            DummyNode('date'),
            DummyNode('in_path'),
            DummyNode('md5'),
            DummyNode('named_queries'),
            DummyNode('rating'),
            DummyNode('tag'),
            DummyNode('tag_group'),
        ]
    else:
        if query.startswith('root'):
            query = query[5:]
        query_nodes = query.split('/')

    LOG.debug('Query nodes: %r' % query_nodes)

    # pop the query type off the beginning
    query_types = query_nodes.pop(0).lower()
    query_types = [x.strip() for x in query_types.split(',')]

    # handle flattened queries
    if query_nodes and query_nodes[-1] == "__flat__":
        query_nodes.pop()
        flatten = True
    else:
        flatten = False

    # Construct the different queries
    if len(query_types) == 1 and query_types[0] == 'all':
        return all(sess, query_nodes, flatten).order_by(Node.uri)

    if 'named_queries' in query_types and not query_nodes:
        nq_qry = sess.query(Query)
        nq_qry = nq_qry.filter(Query.label != None)
        nq_qry = nq_qry.order_by(Query.label)
        return [DummyNode(x.label) for x in nq_qry.all()]
    elif query_types[0] == 'named_queries':
        # fetch the saved query and replace the named query by that string
        query_name = query_nodes.pop(0)
        nq_qry = sess.query(Query)
        nq_qry = nq_qry.filter(Query.label == query_name).first()
        if not nq_qry:
            return []

        prepend_nodes = nq_qry.query.split('/')
        query_nodes = prepend_nodes + query_nodes

    num_params = expected_params(query_types)
    if not query_nodes or len(query_nodes) < num_params:
        # no all query parmeters known yet. Find appropriate queries
        output = []
        stmt = sess.query(Query.query)
        LOG.debug('Listing nodes starting with %r' % query)
        stmt = stmt.filter(query_table.c.query.startswith(query))
        stmt = stmt.order_by(query_table.c.query)
        for row in stmt:
            sub_nodes = row.query.split('/')
            # we're in the case where the initial nodes were empty. We only return
            # the next element
            output.append(DummyNode(sub_nodes[len(query_nodes) + 1]))
        return output

    parent_uri = '/'.join(query_nodes[num_params:])

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    if flatten:
        stmt = sess.query(Node)
    else:
        stmt = sess.query(
            distinct(func.subpath(Node.path, 0, depth + 1).label("subpath")))

    stmt = stmt.filter(Node.path.op("<@")(parent_path))

    # apply all filters in sequence
    for query_type in query_types:
        if query_type == 'date':
            stmt = dated(sess, stmt, parent_uri, query_nodes)

        if query_type == 'major_mimetype':
            stmt = major_mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'mimetype':
            stmt = mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'rating':
            stmt = rated(stmt, parent_uri, query_nodes)

        if query_type == 'aspect':
            stmt = aspect(stmt, parent_uri, query_nodes)

        if query_type == 'aspect_range':
            stmt = aspect_range(stmt, parent_uri, query_nodes)

        if query_type == 'md5':
            stmt = has_md5(stmt, parent_uri, query_nodes)

        if query_type == 'in_path':
            stmt = in_path(stmt, query_nodes)

        if query_type == 'tag':
            stmt = tagged(sess, stmt, parent_uri, query_nodes)

        if query_type == 'tag_group':
            stmt = in_tag_group(sess, stmt, parent_uri, query_nodes)

    print stmt

    if not flatten:
        stmt = stmt.subquery()
        qry = sess.query(Node)
        qry = qry.filter(Node.path.in_(stmt))
        qry = qry.order_by(Node.uri)
        return qry

    return stmt.order_by(Node.uri)
Ejemplo n.º 12
0
def subdirs(sess, query):
    LOG.debug('subfolders in %s' % query)

    if not query or query == 'root' or query == '/':
        return []
        # handled by from incremental_query
    else:
        if query.startswith('root'):
            query = query[5:]
        query_nodes = query.split('/')

    LOG.debug('Query nodes: %r' % query_nodes)

    # pop the query type off the beginning
    query_types = query_nodes.pop(0).lower()
    query_types = [x.strip() for x in query_types.split(',')]

    # handle flattened queries
    if query_nodes and query_nodes[-1] == "__flat__":
        return []

    stmt = sess.query(Node)

    if 'named_queries' in query_types and not query_nodes:
        # handled by incremental_query
        return []
    elif query_types[0] == 'named_queries':
        # handled by incremental_query
        return []

    num_params = expected_params(query_types)
    if not query_nodes or len(query_nodes) < num_params:
        # todo: query not complete: offer some virtual folders
        output = []
        return output

    parent_uri = '/'.join(query_nodes[num_params:])

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    stmt = sess.query(
        distinct(func.subpath(Node.path, 0, depth + 1).label("subpath")))

    stmt = stmt.filter(Node.path.op("<@")(parent_path))
    stmt = stmt.filter(func.nlevel(Node.path) > uri_depth(parent_uri) + 1)

    if len(query_types) == 1 and query_types[0] == 'all':
        return [DummyNode(x[0].rsplit('.')[-1]) for x in stmt]

    # apply all filters in sequence
    for query_type in query_types:
        if query_type == 'date':
            stmt = dated(sess, stmt, parent_uri, query_nodes)

        if query_type == 'rating':
            stmt = rated(stmt, parent_uri, query_nodes)

        if query_type == 'major_mimetype':
            stmt = major_mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'mimetype':
            stmt = mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'aspect_range':
            stmt = aspect_range(stmt, parent_uri, query_nodes)

        if query_type == 'aspect':
            stmt = aspect(stmt, parent_uri, query_nodes)

        if query_type == 'md5':
            stmt = has_md5(stmt, parent_uri, query_nodes)

        if query_type == 'in_path':
            stmt = in_path(stmt, query_nodes)

        if query_type == 'tag':
            stmt = tagged(sess, stmt, parent_uri, query_nodes)

        if query_type == 'tag_group':
            stmt = in_tag_group(sess, stmt, parent_uri, query_nodes)

    return [DummyNode(x[0].rsplit('.', 1)[-1]) for x in stmt]
Ejemplo n.º 13
0
 def __init__(self, uri):
     self.path = uri_to_ltree(uri)
     self.uri = uri
Ejemplo n.º 14
0
def from_incremental_query(sess, query):
    LOG.debug('parsing incremental query %r' % query)

    if not query or query == 'root' or query == '/':
        # list the available query schemes
        return [
                DummyNode('all'),
                DummyNode('date'),
                DummyNode('in_path'),
                DummyNode('md5'),
                DummyNode('named_queries'),
                DummyNode('rating'),
                DummyNode('tag'),
                DummyNode('tag_group'),
                ]
    else:
        if query.startswith('root'):
            query = query[5:]
        query_nodes = query.split('/')

    LOG.debug('Query nodes: %r' % query_nodes)

    # pop the query type off the beginning
    query_types = query_nodes.pop(0).lower()
    query_types = [x.strip() for x in query_types.split(',')]

    # handle flattened queries
    if query_nodes and query_nodes[-1] == "__flat__":
        query_nodes.pop()
        flatten = True
    else:
        flatten = False

    # Construct the different queries
    if len(query_types) == 1 and query_types[0] == 'all':
        return all(sess, query_nodes, flatten).order_by(Node.uri)

    if 'named_queries' in query_types and not query_nodes:
        nq_qry = sess.query(Query)
        nq_qry = nq_qry.filter( Query.label != None )
        nq_qry = nq_qry.order_by(Query.label)
        return [ DummyNode(x.label) for x in nq_qry.all() ]
    elif query_types[0] == 'named_queries':
        # fetch the saved query and replace the named query by that string
        query_name = query_nodes.pop(0)
        nq_qry = sess.query(Query)
        nq_qry = nq_qry.filter( Query.label == query_name ).first()
        if not nq_qry:
            return []

        prepend_nodes = nq_qry.query.split('/')
        query_nodes = prepend_nodes + query_nodes

    num_params = expected_params(query_types)
    if not query_nodes or len(query_nodes) < num_params:
        # no all query parmeters known yet. Find appropriate queries
        output = []
        stmt = sess.query(Query.query)
        LOG.debug('Listing nodes starting with %r' % query)
        stmt = stmt.filter(query_table.c.query.startswith(query))
        stmt = stmt.order_by(query_table.c.query)
        for row in stmt:
            sub_nodes = row.query.split('/')
            # we're in the case where the initial nodes were empty. We only return
            # the next element
            output.append(DummyNode(sub_nodes[len(query_nodes)+1]))
        return output

    parent_uri = '/'.join(query_nodes[num_params:])

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    if flatten:
        stmt = sess.query(Node)
    else:
        stmt = sess.query(
                distinct(func.subpath(Node.path, 0, depth+1).label("subpath"))
                )

    stmt = stmt.filter( Node.path.op("<@")(parent_path) )

    # apply all filters in sequence
    for query_type in query_types:
        if query_type == 'date':
            stmt = dated(sess, stmt, parent_uri, query_nodes)

        if query_type == 'major_mimetype':
            stmt = major_mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'mimetype':
            stmt = mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'rating':
            stmt = rated(stmt, parent_uri, query_nodes)

        if query_type == 'aspect':
            stmt = aspect(stmt, parent_uri, query_nodes)

        if query_type == 'aspect_range':
            stmt = aspect_range(stmt, parent_uri, query_nodes)

        if query_type == 'md5':
            stmt = has_md5(stmt, parent_uri, query_nodes)

        if query_type == 'in_path':
            stmt = in_path(stmt, query_nodes)

        if query_type == 'tag':
            stmt = tagged(sess, stmt, parent_uri, query_nodes)

        if query_type == 'tag_group':
            stmt = in_tag_group(sess, stmt, parent_uri, query_nodes)

    print stmt

    if not flatten:
        stmt = stmt.subquery()
        qry = sess.query( Node )
        qry = qry.filter( Node.path.in_(stmt) )
        qry = qry.order_by(Node.uri)
        return qry

    return stmt.order_by(Node.uri)
Ejemplo n.º 15
0
def subdirs(sess, query):
    LOG.debug('subfolders in %s' % query)

    if not query or query == 'root' or query == '/':
        return []
        # handled by from incremental_query
    else:
        if query.startswith('root'):
            query = query[5:]
        query_nodes = query.split('/')

    LOG.debug('Query nodes: %r' % query_nodes)

    # pop the query type off the beginning
    query_types = query_nodes.pop(0).lower()
    query_types = [x.strip() for x in query_types.split(',')]

    # handle flattened queries
    if query_nodes and query_nodes[-1] == "__flat__":
        return []

    stmt = sess.query(Node)

    if 'named_queries' in query_types and not query_nodes:
        # handled by incremental_query
        return []
    elif query_types[0] == 'named_queries':
        # handled by incremental_query
        return []

    num_params = expected_params(query_types)
    if not query_nodes or len(query_nodes) < num_params:
        # todo: query not complete: offer some virtual folders
        output = []
        return output

    parent_uri = '/'.join(query_nodes[num_params:])

    parent_path = uri_to_ltree(parent_uri)
    depth = uri_depth(parent_uri)

    stmt = sess.query(
            distinct(func.subpath(Node.path, 0, depth+1).label("subpath"))
            )

    stmt = stmt.filter( Node.path.op("<@")(parent_path) )
    stmt = stmt.filter( func.nlevel(Node.path) > uri_depth(parent_uri)+1)

    if len(query_types) == 1 and query_types[0] == 'all':
        return [DummyNode(x[0].rsplit('.')[-1]) for x in stmt]

    # apply all filters in sequence
    for query_type in query_types:
        if query_type == 'date':
            stmt = dated(sess, stmt, parent_uri, query_nodes)

        if query_type == 'rating':
            stmt = rated(stmt, parent_uri, query_nodes)

        if query_type == 'major_mimetype':
            stmt = major_mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'mimetype':
            stmt = mimetype(stmt, parent_uri, query_nodes)

        if query_type == 'aspect_range':
            stmt = aspect_range(stmt, parent_uri, query_nodes)

        if query_type == 'aspect':
            stmt = aspect(stmt, parent_uri, query_nodes)

        if query_type == 'md5':
            stmt = has_md5(stmt, parent_uri, query_nodes)

        if query_type == 'in_path':
            stmt = in_path(stmt, query_nodes)

        if query_type == 'tag':
            stmt = tagged(sess, stmt, parent_uri, query_nodes)

        if query_type == 'tag_group':
            stmt = in_tag_group(sess, stmt, parent_uri, query_nodes)

    return [DummyNode(x[0].rsplit('.', 1)[-1]) for x in stmt]
Ejemplo n.º 16
0
 def __init__(self, uri):
     self.path = uri_to_ltree(uri)
     self.uri = uri