Example #1
0
def qumulo_get_file_meta(worker_name, path, cliargs, reindex_dict):
    filename = path['name']

    # check if file is in exluded_files list
    extension = os.path.splitext(filename)[1][1:].strip().lower()
    if file_excluded(filename, extension):
        return None

    # get file size (bytes)
    size = int(path['size'])

    # Skip files smaller than minsize cli flag
    if size < cliargs['minsize']:
        return None

    # check file modified time
    mtime_utc = path['modification_time']
    mtime_unix = time.mktime(time.strptime(mtime_utc, '%Y-%m-%dT%H:%M:%S'))

    # Convert time in days (mtime cli arg) to seconds
    time_sec = cliargs['mtime'] * 86400
    file_mtime_sec = time.time() - mtime_unix
    # Only process files modified at least x days ago
    if file_mtime_sec < time_sec:
        return None

    # get change time
    ctime_utc = path['change_time']
    ctime_unix = time.mktime(time.strptime(ctime_utc, '%Y-%m-%dT%H:%M:%S'))
    # get creation time
    creation_time_utc = path['creation_time']

    # create md5 hash of file using metadata filesize and mtime
    filestring = str(size) + str(mtime_unix)
    filehash = hashlib.md5(filestring.encode('utf-8')).hexdigest()
    # get time
    indextime_utc = datetime.utcnow().isoformat()
    # get absolute path of parent directory
    parentdir = os.path.abspath(os.path.join(path['path'], os.pardir))
    # get user id of owner
    uid = path['owner']
    # try to get owner user name
    # first check cache
    if uid in uids:
        owner = owners[uid]
    # not in cache
    else:
        owner = uid
        # store it in cache
        if not uid in uids:
            uids.append(uid)
            owners[uid] = owner
    # get group id
    gid = path['group']
    # try to get group name
    # first check cache
    if gid in gids:
        group = groups[gid]
    # not in cache
    else:
        group = gid
        # store in cache
        if not gid in gids:
            gids.append(gid)
            groups[gid] = group

    # create file metadata dictionary
    filemeta_dict = {
        "filename": filename,
        "extension": extension,
        "path_parent": parentdir,
        "filesize": size,
        "owner": owner,
        "group": group,
        "last_modified": mtime_utc,
        "creation_time": creation_time_utc,
        "last_change": ctime_utc,
        "hardlinks": path['num_links'],
        "inode": str(path['id']),
        "filehash": filehash,
        "tag": "",
        "tag_custom": "",
        "dupe_md5": "",
        "indexing_date": indextime_utc,
        "worker_name": worker_name,
        "_type": "file"
    }

    # check plugins for adding extra meta data to filemeta_dict
    for plugin in plugins:
        try:
            # check if plugin is for file doc
            mappings = {'mappings': {'file': {'properties': {}}}}
            plugin.add_mappings(mappings)
            filemeta_dict.update(plugin.add_meta(path['path']))
        except KeyError:
            pass

    # add any autotags to filemeta_dict
    if cliargs['autotag'] and len(config['autotag_files']) > 0:
        auto_tag(filemeta_dict, 'file', mtime_unix, None, ctime_unix)

    # search for and copy over any existing tags from reindex_dict
    for sublist in reindex_dict['file']:
        if sublist[0] == path['path']:
            filemeta_dict['tag'] = sublist[1]
            filemeta_dict['tag_custom'] = sublist[2]
            break

    return filemeta_dict
Example #2
0
def qumulo_get_dir_meta(worker_name, path, cliargs, reindex_dict, redis_conn):
    if path['path'] != '/':
        fullpath = path['path'].rstrip(os.path.sep)
    else:
        fullpath = path['path']
    mtime_utc = path['modification_time']
    mtime_unix = time.mktime(time.strptime(mtime_utc, '%Y-%m-%dT%H:%M:%S'))
    ctime_utc = path['change_time']
    ctime_unix = time.mktime(time.strptime(ctime_utc, '%Y-%m-%dT%H:%M:%S'))
    creation_time_utc = path['creation_time']
    if cliargs['index2']:
        # check if directory times cached in Redis
        redis_dirtime = redis_conn.get(base64.encodestring(fullpath.encode('utf-8', errors='ignore')))
        if redis_dirtime:
            cached_times = float(redis_dirtime.decode('utf-8'))
            # check if cached times are the same as on disk
            current_times = float(mtime_unix + ctime_unix)
            if cached_times == current_times:
                return "sametimes"
    # get time now in utc
    indextime_utc = datetime.utcnow().isoformat()
    # get user id of owner
    uid = path['owner']
    # try to get owner user name
    # first check cache
    if uid in uids:
        owner = owners[uid]
    # not in cache
    else:
        owner = uid
        # store it in cache
        if not uid in uids:
            uids.append(uid)
            owners[uid] = owner
    # get group id
    gid = path['group']
    # try to get group name
    # first check cache
    if gid in gids:
        group = groups[gid]
    # not in cache
    else:
        group = gid
        # store in cache
        if not gid in gids:
            gids.append(gid)
            groups[gid] = group

    filename = path['name']
    parentdir = os.path.abspath(os.path.join(fullpath, os.pardir))

    dirmeta_dict = {
        "filename": filename,
        "path_parent": parentdir,
        "filesize": 0,
        "items": 1,  # 1 for itself
        "items_files": 0,
        "items_subdirs": 0,
        "last_modified": mtime_utc,
        "creation_time": creation_time_utc,
        "last_change": ctime_utc,
        "hardlinks": path['num_links'],
        "inode": str(path['id']),
        "owner": owner,
        "group": group,
        "tag": "",
        "tag_custom": "",
        "indexing_date": indextime_utc,
        "worker_name": worker_name,
        "change_percent_filesize": "",
        "change_percent_items": "",
        "change_percent_items_files": "",
        "change_percent_items_subdirs": "",
        "_type": "directory"
    }

    # check plugins for adding extra meta data to dirmeta_dict
    for plugin in plugins:
        try:
            # check if plugin is for directory doc
            mappings = {'mappings': {'directory': {'properties': {}}}}
            plugin.add_mappings(mappings)
            dirmeta_dict.update(plugin.add_meta(fullpath))
        except KeyError:
            pass

    # add any autotags to dirmeta_dict
    if cliargs['autotag'] and len(config['autotag_dirs']) > 0:
        auto_tag(dirmeta_dict, 'directory', mtime_unix, None, ctime_unix)

    # search for and copy over any existing tags from reindex_dict
    for sublist in reindex_dict['directory']:
        if sublist[0] == fullpath:
            dirmeta_dict['tag'] = sublist[1]
            dirmeta_dict['tag_custom'] = sublist[2]
            break

    # cache directory times in Redis
    if config['redis_cachedirtimes'] == 'True' or config['redis_cachedirtimes'] == 'true':
        redis_conn.set(base64.encodestring(fullpath.encode('utf-8', errors='ignore')), mtime_unix + ctime_unix,
                       ex=config['redis_dirtimesttl'])

    return dirmeta_dict