Ejemplo n.º 1
0
def get_dir_meta(worker_name, path, cliargs, reindex_dict, statsembeded=False):
    """This is the get directory meta data function.
    It gets directory metadata and returns dir meta dict.
    It checks if meta data is in Redis and compares times
    mtime and ctime on disk compared to Redis and if same
    returns sametimes string.
    """

    try:
        if statsembeded:
            metadata = path[1]
            dirpath = path[0]
            # get directory meta embeded in path
            mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = metadata
        else:
            dirpath = path
            # get directory meta using lstat
            mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = os.lstat(dirpath)

        # convert times to utc for es
        mtime_utc = datetime.utcfromtimestamp(mtime).isoformat()
        atime_utc = datetime.utcfromtimestamp(atime).isoformat()
        ctime_utc = datetime.utcfromtimestamp(ctime).isoformat()

        if cliargs['index2']:
            # check if directory times cached in Redis
            redis_dirtime = redis_conn.get(base64.encodestring(dirpath.encode('utf-8', errors='ignore')))
            if redis_dirtime:
                cached_times = float(redis_dirtime.decode('utf-8'))
                # check if cached times are the same as on disk
                current_times = float(mtime + ctime)
                if cached_times == current_times:
                    return "sametimes"

        # get time now in utc
        indextime_utc = datetime.utcnow().isoformat()

        # get owner and group names
        owner, group = get_owner_group_names(uid, gid)

        filename = os.path.basename(dirpath)
        parentdir = os.path.abspath(os.path.join(dirpath, os.pardir))

        dirmeta_dict = {
            "filename": filename,
            "path_parent": parentdir,
            "filesize": 0,
            "items": 1,  # 1 for itself
            "items_files": 0,
            "items_subdirs": 0,
            "last_modified": mtime_utc,
            "last_access": atime_utc,
            "last_change": ctime_utc,
            "hardlinks": nlink,
            "inode": str(ino),
            "owner": owner,
            "group": group,
            "tag": "",
            "tag_custom": "",
            "crawl_time": 0,
            "change_percent_filesize": "",
            "change_percent_items": "",
            "change_percent_items_files": "",
            "change_percent_items_subdirs": "",
            "costpergb": "",
            "worker_name": worker_name,
            "indexing_date": indextime_utc,
            "_type": "directory"
        }

        # check plugins for adding extra meta data to dirmeta_dict
        for plugin in plugins:
            try:
                # check if plugin is for directory doc
                mappings = {'mappings': {'directory': {'properties': {}}}}
                plugin.add_mappings(mappings)
                dirmeta_dict.update(plugin.add_meta(dirpath))
            except KeyError:
                pass

        # add any autotags to dirmeta_dict
        if cliargs['autotag'] and len(config['autotag_dirs']) > 0:
            dirmeta_dict = auto_tag(dirmeta_dict, 'directory', mtime, atime, ctime)

        # search for and copy over any existing tags from reindex_dict
        for sublist in reindex_dict['directory']:
            if sublist[0] == dirpath:
                dirmeta_dict['tag'] = sublist[1]
                dirmeta_dict['tag_custom'] = sublist[2]
                break

    except (OSError, IOError) as e:
        warnings.warn("OS/IO Exception caused by: %s" % e)
        return False
    except Exception as e:
        warnings.warn("Exception caused by: %s" % e)
        return False

    # cache directory times in Redis, encode path (key) using base64
    if config['redis_cachedirtimes'] == 'true':
        redis_conn.set(base64.encodestring(dirpath.encode('utf-8', errors='ignore')), mtime + ctime,
                       ex=config['redis_dirtimesttl'])

    return dirmeta_dict
Ejemplo n.º 2
0
def get_dir_meta(worker_name, path, cliargs, reindex_dict, statsembeded=False):
    """This is the get directory meta data function.
    It gets directory metadata and returns dir meta dict.
    It checks if meta data is in Redis and compares times
    mtime and ctime on disk compared to Redis and if same
    returns sametimes string.
    """

    try:
        if statsembeded:
            metadata = path[1]
            dirpath = path[0]
            # get directory meta embeded in path
            mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = metadata
        else:
            dirpath = path
            # get directory meta using lstat
            mode, ino, dev, nlink, uid, gid, size, atime, mtime, ctime = os.lstat(
                dirpath)

        # convert times to utc for es
        mtime_utc = datetime.utcfromtimestamp(mtime).isoformat()
        atime_utc = datetime.utcfromtimestamp(atime).isoformat()
        ctime_utc = datetime.utcfromtimestamp(ctime).isoformat()

        if cliargs['index2']:
            # check if directory times cached in Redis
            redis_dirtime = redis_conn.get(
                base64.encodestring(dirpath.encode('utf-8', errors='ignore')))
            if redis_dirtime:
                cached_times = float(redis_dirtime.decode('utf-8'))
                # check if cached times are the same as on disk
                current_times = float(mtime + ctime)
                if cached_times == current_times:
                    return "sametimes"

        # get time now in utc
        indextime_utc = datetime.utcnow().isoformat()

        # try to get owner user name
        # first check cache
        if uid in uids:
            owner = owners[uid]
        # not in cache
        else:
            try:
                owner = pwd.getpwuid(uid).pw_name.split('\\')
                # remove domain before owner
                if len(owner) == 2:
                    owner = owner[1]
                else:
                    owner = owner[0]
            # if we can't find the owner's user name, use the uid number
            except KeyError:
                owner = uid
            # store it in cache
            if not uid in uids:
                uids.append(uid)
                owners[uid] = owner

        # try to get group name
        # first check cache
        if gid in gids:
            group = groups[gid]
        # not in cache
        else:
            try:
                group = grp.getgrgid(gid).gr_name.split('\\')
                # remove domain before group
                if len(group) == 2:
                    group = group[1]
                else:
                    group = group[0]
            # if we can't find the group name, use the gid number
            except KeyError:
                group = gid
            # store in cache
            if not gid in gids:
                gids.append(gid)
                groups[gid] = group

        filename = os.path.basename(dirpath)
        parentdir = os.path.abspath(os.path.join(dirpath, os.pardir))

        dirmeta_dict = {
            "filename": filename,
            "path_parent": parentdir,
            "filesize": 0,
            "items": 1,  # 1 for itself
            "items_files": 0,
            "items_subdirs": 0,
            "last_modified": mtime_utc,
            "last_access": atime_utc,
            "last_change": ctime_utc,
            "hardlinks": nlink,
            "inode": ino,
            "owner": owner,
            "group": group,
            "tag": "",
            "tag_custom": "",
            "crawl_time": 0,
            "change_percent_filesize": "",
            "change_percent_items": "",
            "change_percent_items_files": "",
            "change_percent_items_subdirs": "",
            "worker_name": worker_name,
            "indexing_date": indextime_utc,
            "_type": "directory"
        }

        # check plugins for adding extra meta data to dirmeta_dict
        for plugin in plugins:
            try:
                # check if plugin is for directory doc
                mappings = {'mappings': {'directory': {'properties': {}}}}
                plugin.add_mappings(mappings)
                dirmeta_dict.update(plugin.add_meta(dirpath))
            except KeyError:
                pass

        # add any autotags to dirmeta_dict
        if cliargs['autotag'] and len(config['autotag_dirs']) > 0:
            auto_tag(dirmeta_dict, 'directory', mtime, atime, ctime)

        # search for and copy over any existing tags from reindex_dict
        for sublist in reindex_dict['directory']:
            if sublist[0] == dirpath:
                dirmeta_dict['tag'] = sublist[1]
                dirmeta_dict['tag_custom'] = sublist[2]
                break

    except (IOError, OSError) as e:
        return False
    except FileNotFoundError as e:
        return False

    # cache directory times in Redis, encode path (key) using base64
    if config['redis_cachedirtimes'] == 'True' or config[
            'redis_cachedirtimes'] == 'true':
        redis_conn.set(base64.encodestring(
            dirpath.encode('utf-8', errors='ignore')),
                       mtime + ctime,
                       ex=config['redis_dirtimesttl'])

    return dirmeta_dict