コード例 #1
0
ファイル: nodes.py プロジェクト: pombredanne/metafilter
def calc_md5(sess, root, since=None):
    root_ltree = uri_to_ltree(root)
    qry = sess.query(Node)
    qry = qry.filter(Node.path.op("<@")(root_ltree))
    qry = qry.filter(Node.mimetype != 'other/directory')

    if since:
        qry = qry.filter(Node.updated >= since)

    count = 0
    for node in qry:
        if not exists(node.uri):
            continue
        node.md5 = file_md5(node.uri)
        LOG.info('Updated md5 of %s' % node)
        count += 1

        if count % 500 == 0:
            # commit from time to time
            LOG.info('commit')
            sess.commit()
    LOG.info('commit')
    sess.commit()
コード例 #2
0
ファイル: nodes.py プロジェクト: exhuma/metafilter
def calc_md5(sess, root, since=None):
    root_ltree = uri_to_ltree(root)
    qry = sess.query(Node)
    qry = qry.filter( Node.path.op("<@")(root_ltree) )
    qry = qry.filter( Node.mimetype != 'other/directory' )

    if since:
        qry = qry.filter(Node.updated >= since)

    count = 0
    for node in qry:
        if not exists(node.uri):
            continue
        node.md5 = file_md5(node.uri)
        LOG.info('Updated md5 of %s' % node)
        count += 1

        if count % 500 == 0:
            # commit from time to time
            LOG.info('commit')
            sess.commit()
    LOG.info('commit')
    sess.commit()
コード例 #3
0
ファイル: nodes.py プロジェクト: pombredanne/metafilter
def update_one_node(sess, path, auto_tag_folder_tail=False, auto_tag_words=[]):
    from os.path import isfile, join
    import mimetypes
    mimetypes.init()
    if not isfile(path):
        LOG.warning("Not a regular file: %r" % path)
        return

    if file == 'tag.hints':
        LOG.debug('Skipping tag.hints file')
        return

    mod_time = max(datetime.fromtimestamp(os.stat(path).st_mtime),
                   datetime.fromtimestamp(os.stat(path).st_ctime))
    create_time = datetime.fromtimestamp(os.stat(path).st_ctime)

    mimetype, _ = mimetypes.guess_type(path)

    auto_tags = set([])
    try:
        unipath = path.decode(getfilesystemencoding())
    except UnicodeEncodeError:
        LOG.error('Unable to encode %r using %s' %
                  (path, getfilesystemencoding()))
        return
    if auto_tag_folder_tail:
        tailname = split(dirname(unipath))[-1]
        if tailname and len(tailname) > TAIL_DIR_THRESHOLD:
            auto_tags.add(tailname)
        else:
            LOG.warning(
                "Not using %r as auto-tag-name. Either it's empty or too short",
                tailname)

    if auto_tag_words:
        for word in auto_tag_words:
            if word.lower() in [
                    x.lower() for x in splitpath(dirname(unipath))
            ]:
                auto_tags.add(word)

    db_node = sess.query(Node).filter_by(uri=unipath).first()
    if not db_node:
        db_node = Node(unipath)
        LOG.info("New node: %s" % (db_node, ))
    db_node.mimetype = mimetype
    add_sparse_metadata(db_node)
    db_node.created = create_time
    db_node.updated = mod_time

    # process "tag.hints"
    #
    # the file contains a comma-separated list of tags applied to
    # all files in this folder
    #
    # If a line contains '::' the tags only apply to the filename
    # given before the '::' separator. Example:
    #
    # thefile.txt::documentation, project a, draft
    if not db_node.md5:
        LOG.info('Updating MD5')
        db_node.md5 = file_md5(unipath)
    unidir = dirname(unipath)
    hints_file = join(unidir, 'tag.hints')
    if exists(hints_file):
        for line in open(hints_file).readlines():
            if not '::' in line:
                hint_tags = [_.strip() for _ in line.split(',')]
                for tag in hint_tags:
                    auto_tags.add(tag)
            else:
                filename, tags = line.split('::')
                if file == filename.strip():
                    hint_tags = [_.strip() for _ in tags.split(',')]
                    for tag in hint_tags:
                        auto_tags.add(tag)

    if auto_tags:
        set_tags(sess, db_node.md5, auto_tags, False)
    sess.add(db_node)
    LOG.info("Updated %s with tags %r" % (db_node, auto_tags))
コード例 #4
0
ファイル: nodes.py プロジェクト: pombredanne/metafilter
    """
    Adds additional metadata into the HSTORE table
    """
    import Image
    if node.mimetype not in ('image/jpeg', ):
        return

    try:
        im = Image.open(node.uri)
    except IOError, exc:
        LOG.warning('Unable to add sparse metadata for %r (%s)' % (node, exc))
        return

    md5 = node.md5
    if not md5:
        md5 = file_md5(node.uri)
        upd = nodes_table.update().where(nodes_table.c.uri == node.uri).values(
            md5=md5)
    aspect_ratio = "%.3f" % (float(im.size[0]) / float(im.size[1]))
    values = dict(md5=md5,
                  metadata=dict(
                      dimensions="%s, %s" % im.size,
                      aspect_ratio=aspect_ratio,
                  ))
    try:
        ins = node_meta_table.insert().values(**values)
        ins.execute()
    except IntegrityError:
        upd = node_meta_table.update().where(
            node_meta_table.c.md5 == md5).values(**values)
        upd.execute()
コード例 #5
0
ファイル: nodes.py プロジェクト: exhuma/metafilter
def update_one_node(sess, path, auto_tag_folder_tail=False, auto_tag_words=[]):
    from os.path import isfile, join
    import mimetypes
    mimetypes.init()
    if not isfile(path):
        LOG.warning("Not a regular file: %r" % path)
        return

    if file == 'tag.hints':
        LOG.debug('Skipping tag.hints file')
        return

    mod_time = max(
            datetime.fromtimestamp(os.stat(path).st_mtime),
            datetime.fromtimestamp(os.stat(path).st_ctime)
            )
    create_time = datetime.fromtimestamp(os.stat(path).st_ctime)

    mimetype, _ = mimetypes.guess_type(path)

    auto_tags = set([])
    try:
        unipath = path.decode(getfilesystemencoding())
    except UnicodeEncodeError:
        LOG.error('Unable to encode %r using %s' % (path, getfilesystemencoding()))
        return
    if auto_tag_folder_tail:
        tailname = split(dirname(unipath))[-1]
        if tailname and len(tailname) > TAIL_DIR_THRESHOLD:
            auto_tags.add(tailname)
        else:
            LOG.warning("Not using %r as auto-tag-name. Either it's empty or too short", tailname)

    if auto_tag_words:
        for word in auto_tag_words:
            if word.lower() in [x.lower() for x in splitpath(dirname(unipath))]:
                auto_tags.add(word)

    db_node = sess.query(Node).filter_by(uri=unipath).first()
    if not db_node:
        db_node = Node(unipath)
        LOG.info("New node: %s" % (db_node, ))
    db_node.mimetype = mimetype
    add_sparse_metadata(db_node)
    db_node.created = create_time
    db_node.updated = mod_time

    # process "tag.hints"
    #
    # the file contains a comma-separated list of tags applied to
    # all files in this folder
    #
    # If a line contains '::' the tags only apply to the filename
    # given before the '::' separator. Example:
    #
    # thefile.txt::documentation, project a, draft
    if not db_node.md5:
        LOG.info('Updating MD5')
        db_node.md5 = file_md5(unipath)
    unidir = dirname(unipath)
    hints_file = join(unidir, 'tag.hints')
    if exists(hints_file):
        for line in open(hints_file).readlines():
            if not '::' in line:
                hint_tags = [_.strip() for _ in line.split(',')]
                for tag in hint_tags:
                    auto_tags.add(tag)
            else:
                filename, tags = line.split('::')
                if file == filename.strip():
                    hint_tags = [_.strip() for _ in tags.split(',')]
                    for tag in hint_tags:
                        auto_tags.add(tag)

    if auto_tags:
        set_tags(sess, db_node.md5, auto_tags, False)
    sess.add(db_node)
    LOG.info("Updated %s with tags %r" % (db_node, auto_tags))
コード例 #6
0
ファイル: nodes.py プロジェクト: exhuma/metafilter
    """
    Adds additional metadata into the HSTORE table
    """
    import Image
    if node.mimetype not in ('image/jpeg', ):
        return

    try:
        im = Image.open(node.uri)
    except IOError, exc:
        LOG.warning('Unable to add sparse metadata for %r (%s)' % (node, exc))
        return

    md5 = node.md5
    if not md5:
        md5 = file_md5(node.uri)
        upd = nodes_table.update().where(
                nodes_table.c.uri == node.uri).values(
                        md5=md5)
    aspect_ratio = "%.3f" % (float(im.size[0]) / float(im.size[1]))
    values = dict(
        md5 = md5,
        metadata = dict(
            dimensions = "%s, %s" % im.size,
            aspect_ratio = aspect_ratio,
            ))
    try:
        ins = node_meta_table.insert().values(
                **values
                )
        ins.execute()