Ejemplo n.º 1
0
    def __init__(self):
        self.at_change_id = AccountInfo.get_instance().largest_change_id
        _logger.debug("Latest change-ID at startup is (%d)." %
                      (self.at_change_id))

        self.__t = None
        self.__t_quit_ev = threading.Event()
Ejemplo n.º 2
0
    def __init__(self):
        self.at_change_id = AccountInfo.get_instance().largest_change_id
        _logger.debug("Latest change-ID at startup is (%d)." % 
                      (self.at_change_id))

        self.__t = None
        self.__t_quit_ev = threading.Event()
Ejemplo n.º 3
0
    def find_path_components_goandget(self, path):
        """Do the same thing that find_path_components() does, except that 
        when we don't have record of a path-component, try to go and find it 
        among the children of the previous path component, and then try again.
        """

        gd = get_gdrive()

        with PathRelations.rlock:
            previous_results = []
            i = 0
            while 1:
                #                self.__log.debug("Attempting to find path-components (go and "
                #                                 "get) for path [%s].  CYCLE= (%d)", path, i)

                # See how many components can be found in our current cache.

                result = self.__find_path_components(path)

                # If we could resolve the entire path, return success.

                if result[2] == True:
                    return result

                # If we could not resolve the entire path, and we're no more
                # successful than a prior attempt, we'll just have to return a
                # partial.

                num_results = len(result[0])
                if num_results in previous_results:
                    return result

                previous_results.append(num_results)

                # Else, we've encountered a component/depth of the path that we
                # don't currently know about.
                # TODO: This is going to be the general area that we'd have to adjust to
                #        support multiple, identical entries. This currently only considers the
                #        first result. We should rewrite this to be recursive in order to make
                #        it easier to keep track of a list of results.
                # The parent is the last one found, or the root if none.
                parent_id = result[0][num_results - 1] \
                                if num_results \
                                else AccountInfo.get_instance().root_id

                # The child will be the first part that was not found.
                child_name = result[1][num_results]

                children = gd.list_files(parent_id=parent_id,
                                         query_is_string=child_name)

                for child in children:
                    self.register_entry(child)

                filenames_phrase = ', '.join(
                    [candidate.id for candidate in children])
                #                self.__log.debug("(%d) candidate children were found: %s",
                #                                 len(children), filenames_phrase)

                i += 1
Ejemplo n.º 4
0
    def __init__(self):
        self.__log = logging.getLogger().getChild('ChangeMan')

        try:
            self.at_change_id = AccountInfo.get_instance().largest_change_id
        except:
            self.__log.exception("Could not get largest change-ID.")
            raise

        self.__log.info("Latest change-ID at startup is (%d)." % 
                     (self.at_change_id))
Ejemplo n.º 5
0
    def __init__(self):
        self.__log = logging.getLogger().getChild('ChangeMan')

        try:
            self.at_change_id = AccountInfo.get_instance().largest_change_id
        except:
            self.__log.exception("Could not get largest change-ID.")
            raise

        self.__log.info("Latest change-ID at startup is (%d)." %
                        (self.at_change_id))
Ejemplo n.º 6
0
    def statfs(self, filepath):
        """Return filesystem status info (for df).

        The given file-path seems to always be '/'.

        REF: http://www.ibm.com/developerworks/linux/library/l-fuse/
        REF: http://stackoverflow.com/questions/4965355/converting-statvfs-to-percentage-free-correctly
        """

        block_size = 512

        try:
            account_info = AccountInfo.get_instance()
            total = account_info.quota_bytes_total / block_size
            used = account_info.quota_bytes_used / block_size
            free = total - used
        except:
            self.__log.exception("Could not get account-info.")
            raise FuseOSError(EIO)

        return {
            # Optimal transfer block size.
            'f_bsize': block_size,

            # Total data blocks in file system.
            'f_blocks': total,

            # Fragment size.
            'f_frsize': block_size,

            # Free blocks in filesystem.
            'f_bfree': free,

            # Free blocks avail to non-superuser.
            'f_bavail': free

            # Total file nodes in filesystem.
#            'f_files': 0,

            # Free file nodes in filesystem.
#            'f_ffree': 0,

            # Free inodes for unprivileged users.
#            'f_favail': 0
        }
Ejemplo n.º 7
0
    def statfs(self, filepath):
        """Return filesystem status info (for df).

        The given file-path seems to always be '/'.

        REF: http://www.ibm.com/developerworks/linux/library/l-fuse/
        REF: http://stackoverflow.com/questions/4965355/converting-statvfs-to-percentage-free-correctly
        """

        block_size = 512

        try:
            account_info = AccountInfo.get_instance()
            total = account_info.quota_bytes_total / block_size
            used = account_info.quota_bytes_used / block_size
            free = total - used
        except:
            self.__log.exception("Could not get account-info.")
            raise FuseOSError(EIO)

        return {
            # Optimal transfer block size.
            'f_bsize': block_size,

            # Total data blocks in file system.
            'f_blocks': total,

            # Fragment size.
            'f_frsize': block_size,

            # Free blocks in filesystem.
            'f_bfree': free,

            # Free blocks avail to non-superuser.
            'f_bavail': free

            # Total file nodes in filesystem.
            #            'f_files': 0,

            # Free file nodes in filesystem.
            #            'f_ffree': 0,

            # Free inodes for unprivileged users.
            #            'f_favail': 0
        }
Ejemplo n.º 8
0
    def __find_path_components(self, path):
        """Given a path, return a list of all Google Drive entries that 
        comprise each component, or as many as can be found. As we've ensured 
        that all sibling filenames are unique, there can not be multiple 
        matches.
        """

        self.__log.debug("Searching for path components of [%s]. Now "
                         "resolving entry_clause." % (path))

        if path[0] == '/':
            path = path[1:]

        if len(path) and path[-1] == '/':
            path = path[:-1]

        if path in self.path_cache:
            return self.path_cache[path]

        with PathRelations.rlock:
            self.__log.debug("Locating entry information for path [%s]." % (path))

            try:
                root_id = AccountInfo.get_instance().root_id
            except:
                self.__log.exception("Could not get root-ID.")
                raise

            # Ensure that the root node is loaded.

            try:
                self.__get_entry_clause_by_id(root_id)
            except:
                self.__log.exception("Could not ensure root-node with entry-ID "
                                  "[%s]." % (root_id))
                raise

            path_parts = path.split('/')

            entry_ptr = root_id
            parent_id = None
            i = 0
            num_parts = len(path_parts)
            results = [ ]
            while i < num_parts:
                child_filename_to_search_fs = get_utility(). \
                    translate_filename_charset(path_parts[i])

                self.__log.debug("Checking for part (%d) [%s] under parent with "
                              "ID [%s]." % (i, child_filename_to_search_fs, 
                                            entry_ptr))

                try:
                    current_clause = self.entry_ll[entry_ptr]
                except:
                    # TODO: If entry with ID entry_ptr is not registered, update 
                    #       children of parent parent_id. Throttle how often this 
                    #       happens.

                    self.__log.exception("Could not find current subdirectory.  "
                                      "ENTRY_ID= [%s]" % (entry_ptr))
                    raise
            
                # Search this entry's children for the next filename further down 
                # in the path among this entry's children. Any duplicates should've 
                # already beeen handled as entries were stored. We name the variable 
                # just to emphasize that no ambiguity -as well as- no error will 
                # occur in the traversal process.
                first_matching_child_clause = None
                children = current_clause[2]
            
                # If they just wanted the "" path (root), return the root-ID.
                if path == "":
                    found = [ root_id ]
                else:
#                    self.__log.debug("Looking for child [%s] among (%d): %s" % 
#                                  (child_filename_to_search_fs, len(children),
#                                   [ child_tuple[0] for child_tuple 
#                                     in children ]))

                    found = [ child_tuple[1][3] 
                              for child_tuple 
                              in children 
                              if child_tuple[0] == child_filename_to_search_fs ]

                if found:
                    self.__log.debug("Found matching child with ID [%s]." % (found[0]))
                    results.append(found[0])
                else:
                    self.__log.debug("Did not find matching child.")
                    return (results, path_parts, False)

                # Have we traveled far enough into the linked list?
                if (i + 1) >= num_parts:
                    self.__log.debug("Path has been completely resolved: %s" % (', '.join(results)))

                    self.path_cache[path] = (results, path_parts, True)
                    final_entry_id = results[-1]
                    self.path_cache_byid[final_entry_id] = path

                    return self.path_cache[path]

                parent_id = entry_ptr
                entry_ptr = found[0]
                i += 1
Ejemplo n.º 9
0
    def find_path_components_goandget(self, path):
        """Do the same thing that find_path_components() does, except that 
        when we don't have record of a path-component, try to go and find it 
        among the children of the previous path component, and then try again.
        """

        with PathRelations.rlock:
            previous_results = []
            i = 0
            while 1:
                self.__log.info("Attempting to find path-components (go and "
                                "get) for path [%s].  CYCLE= (%d)" % (path, i))

                # See how many components can be found in our current cache.

                try:
                    result = self.__find_path_components(path)
                except:
                    self.__log.exception("There was a problem doing an "
                                         "iteration of find_path_components() "
                                         "on [%s]." % (path))
                    raise

                self.__log.debug("Path resolution cycle (%d) results: %s" % 
                                 (i, result))

                # If we could resolve the entire path, return success.

                self.__log.debug("Found within current cache? %s" % 
                                 (result[2]))

                if result[2] == True:
                    return result

                # If we could not resolve the entire path, and we're no more 
                # successful than a prior attempt, we'll just have to return a 
                # partial.

                num_results = len(result[0])
                if num_results in previous_results:
                    self.__log.debug("We couldn't improve our results. This "
                                     "path most likely does not exist.")
                    return result

                previous_results.append(num_results)

                self.__log.debug("(%d) path-components were found, but not "
                                 "all." % (num_results))

                # Else, we've encountered a component/depth of the path that we 
                # don't currently know about.
# TODO: This is going to be the general area that we'd have to adjust to 
#        support multiple, identical entries. This currently only considers the 
#        first result. We should rewrite this to be recursive in order to make 
#        it easier to keep track of a list of results.
                # The parent is the last one found, or the root if none.
                parent_id = result[0][num_results - 1] \
                                if num_results \
                                else AccountInfo.get_instance().root_id

                # The child will be the first part that was not found.
                child_name = result[1][num_results]

                self.__log.debug("Trying to reconcile child named [%s] under "
                                 "folder with entry-ID [%s]." % (child_name, 
                                                                 parent_id))

                try:
                    children = drive_proxy('list_files', parent_id=parent_id, 
                                           query_is_string=child_name)
                except:
                    self.__log.exception("Could not retrieve children for "
                                         "parent with ID [%s]." % (parent_id))
                    raise
                
                for child in children:
                    try:
                        self.register_entry(child)
                    except:
                        self.__log.exception("Could not register child entry "
                                             "for entry with ID [%s] in path-"
                                             "cache." % (child.id))
                        raise

                filenames_phrase = ', '.join([ candidate.id for candidate
                                                            in children ])
                self.__log.debug("(%d) candidate children were found: %s" % 
                                 (len(children), filenames_phrase))

                i += 1
Ejemplo n.º 10
0
class EntryCache(CacheClientBase):
    """Manages our knowledge of file entries."""

    __log = None
    about = AccountInfo.get_instance()

    def __init__(self):
        self.__log = logging.getLogger().getChild('EntryCache')
        CacheClientBase.__init__(self)

    def __get_entries_to_update(self, requested_entry_id):
        # Get more entries than just what was requested, while we're at it.

        parent_ids = drive_proxy('get_parents_containing_id', 
                                 child_id=requested_entry_id)

#        self.__log.debug("Found (%d) parents.", len(parent_ids))

        affected_entries = [ requested_entry_id ]
        considered_entries = { }
        max_readahead_entries = Conf.get('max_readahead_entries')
        for parent_id in parent_ids:
#            self.__log.debug("Retrieving children for parent with ID [%s].",
#                             parent_id)

            child_ids = drive_proxy('get_children_under_parent_id', 
                                    parent_id=parent_id)

#            self.__log.debug("(%d) children found under parent with ID [%s].",
#                             len(child_ids), parent_id)

            for child_id in child_ids:
                if child_id == requested_entry_id:
                    continue

                # We've already looked into this entry.

                try:
                    considered_entries[child_id]
                    continue
                except:
                    pass

                considered_entries[child_id] = True

                # Is it already cached?

                if self.cache.exists(child_id):
                    continue

                affected_entries.append(child_id)

                if len(affected_entries) >= max_readahead_entries:
                    break

        return affected_entries

    def __do_update_for_missing_entry(self, requested_entry_id):

        # Get the entries to update.

        affected_entries = self.__get_entries_to_update(requested_entry_id)

        # Read the entries, now.

#        self.__log.debug("(%d) primary and secondary entry/entries will be "
#                        "updated." % (len(affected_entries)))

        # TODO: We have to determine when this is called, and either remove it 
        # (if it's not), or find another way to not have to load them 
        # individually.

        retrieved = drive_proxy('get_entries', entry_ids=affected_entries)

        # Update the cache.

        path_relations = PathRelations.get_instance()

        for entry_id, entry in retrieved.iteritems():
            path_relations.register_entry(entry)

#        self.__log.debug("(%d) entries were loaded.", len(retrieved))

        return retrieved

    def fault_handler(self, resource_name, requested_entry_id):
        """A requested entry wasn't stored."""

#        self.__log.debug("EntryCache has faulted on entry with ID [%s].",
#                         requested_entry_id)

        retrieved = self.__do_update_for_missing_entry(requested_entry_id)

        # Return the requested entry.
        return retrieved[requested_entry_id]

    def cleanup_pretrigger(self, resource_name, entry_id, force):
        """The core entry cache has a clean-up process that will remove old "
        entries. This is called just before any record is removed.
        """

        # Now that the local cache-item has been removed, remove the same from
        # the PathRelations cache.

        path_relations = PathRelations.get_instance()

        if path_relations.is_cached(entry_id):
#            self.__log.debug("Removing PathRelations entry for cleaned-up entry "
#                             "with ID [%s]." % (entry_id))

            path_relations.remove_entry_recursive(entry_id)

    def get_max_cache_age_seconds(self):
        return Conf.get('cache_entries_max_age')
Ejemplo n.º 11
0
    def __find_path_components(self, path):
        """Given a path, return a list of all Google Drive entries that 
        comprise each component, or as many as can be found. As we've ensured 
        that all sibling filenames are unique, there can not be multiple 
        matches.
        """

#        self.__log.debug("Searching for path components of [%s]. Now "
#                         "resolving entry_clause." % (path))

        if path[0] == '/':
            path = path[1:]

        if len(path) and path[-1] == '/':
            path = path[:-1]

        if path in self.path_cache:
            return self.path_cache[path]

        with PathRelations.rlock:
#            self.__log.debug("Locating entry information for path [%s].", path)
            root_id = AccountInfo.get_instance().root_id

            # Ensure that the root node is loaded.
            self.__get_entry_clause_by_id(root_id)

            path_parts = path.split('/')

            entry_ptr = root_id
            parent_id = None
            i = 0
            num_parts = len(path_parts)
            results = [ ]
            while i < num_parts:
                child_filename_to_search_fs = utility. \
                    translate_filename_charset(path_parts[i])

#                self.__log.debug("Checking for part (%d) [%s] under parent "
#                                 "with ID [%s].",
#                                 i, child_filename_to_search_fs, entry_ptr)

                current_clause = self.entry_ll[entry_ptr]
            
                # Search this entry's children for the next filename further down 
                # in the path among this entry's children. Any duplicates should've 
                # already beeen handled as entries were stored. We name the variable 
                # just to emphasize that no ambiguity -as well as- no error will 
                # occur in the traversal process.
                first_matching_child_clause = None
                children = current_clause[2]
            
                # If they just wanted the "" path (root), return the root-ID.
                if path == "":
                    found = [ root_id ]
                else:
#                    self.__log.debug("Looking for child [%s] among (%d): %s" % 
#                                  (child_filename_to_search_fs, len(children),
#                                   [ child_tuple[0] for child_tuple 
#                                     in children ]))

                    found = [ child_tuple[1][3] 
                              for child_tuple 
                              in children 
                              if child_tuple[0] == child_filename_to_search_fs ]

                if found:
#                    self.__log.debug("Found matching child with ID [%s]." % (found[0]))
                    results.append(found[0])
                else:
#                    self.__log.debug("Did not find matching child.")
                    return (results, path_parts, False)

                # Have we traveled far enough into the linked list?
                if (i + 1) >= num_parts:
#                    self.__log.debug("Path has been completely resolved: %s" % (', '.join(results)))

                    self.path_cache[path] = (results, path_parts, True)
                    final_entry_id = results[-1]
                    self.path_cache_byid[final_entry_id] = path

                    return self.path_cache[path]

                parent_id = entry_ptr
                entry_ptr = found[0]
                i += 1
Ejemplo n.º 12
0
 def __init__(self):
     self.at_change_id = AccountInfo.get_instance().largest_change_id
     _logger.debug("Latest change-ID at startup is (%d)." %
                   (self.at_change_id))
Ejemplo n.º 13
0
    def __find_path_components(self, path):
        """Given a path, return a list of all Google Drive entries that 
        comprise each component, or as many as can be found. As we've ensured 
        that all sibling filenames are unique, there can not be multiple 
        matches.
        """

        if path[0] == '/':
            path = path[1:]

        if len(path) and path[-1] == '/':
            path = path[:-1]

        if path in self.path_cache:
            return self.path_cache[path]

        with PathRelations.rlock:
#            self.__log.debug("Locating entry information for path [%s].", path)

            root_id = AccountInfo.get_instance().root_id

            # Ensure that the root node is loaded.
            self.__get_entry_clause_by_id(root_id)

            path_parts = path.split('/')

            entry_ptr = root_id
            parent_id = None
            i = 0
            num_parts = len(path_parts)
            results = [ ]
            while i < num_parts:
                child_filename_to_search_fs = utility. \
                    translate_filename_charset(path_parts[i])

#                self.__log.debug("Checking for part (%d) [%s] under parent "
#                                 "with ID [%s].",
#                                 i, child_filename_to_search_fs, entry_ptr)

                current_clause = self.entry_ll[entry_ptr]
            
                # Search this entry's children for the next filename further down 
                # in the path among this entry's children. Any duplicates should've 
                # already beeen handled as entries were stored. We name the variable 
                # just to emphasize that no ambiguity -as well as- no error will 
                # occur in the traversal process.
                first_matching_child_clause = None
                children = current_clause[2]
            
                # If they just wanted the "" path (root), return the root-ID.
                if path == "":
                    found = [ root_id ]
                else:
                    found = [ child_tuple[1][3] 
                              for child_tuple 
                              in children 
                              if child_tuple[0] == child_filename_to_search_fs ]

                if found:
                    results.append(found[0])
                else:
                    return (results, path_parts, False)

                # Have we traveled far enough into the linked list?
                if (i + 1) >= num_parts:
                    self.path_cache[path] = (results, path_parts, True)
                    final_entry_id = results[-1]
                    self.path_cache_byid[final_entry_id] = path

                    return self.path_cache[path]

                parent_id = entry_ptr
                entry_ptr = found[0]
                i += 1
Ejemplo n.º 14
0
    def find_path_components_goandget(self, path):
        """Do the same thing that find_path_components() does, except that 
        when we don't have record of a path-component, try to go and find it 
        among the children of the previous path component, and then try again.
        """

        gd = get_gdrive()

        with PathRelations.rlock:
            previous_results = []
            i = 0
            while True:
#                self.__log.debug("Attempting to find path-components (go and "
#                                 "get) for path [%s].  CYCLE= (%d)", path, i)

                # See how many components can be found in our current cache.

                result = self.__find_path_components(path)

                # If we could resolve the entire path, return success.

                if result[2] == True:
                    return result

                # If we could not resolve the entire path, and we're no more 
                # successful than a prior attempt, we'll just have to return a 
                # partial.

                num_results = len(result[0])
                if num_results in previous_results:
                    return result

                previous_results.append(num_results)

                # Else, we've encountered a component/depth of the path that we 
                # don't currently know about.
# TODO: This is going to be the general area that we'd have to adjust to 
#        support multiple, identical entries. This currently only considers the 
#        first result. We should rewrite this to be recursive in order to make 
#        it easier to keep track of a list of results.
                # The parent is the last one found, or the root if none.
                parent_id = result[0][num_results - 1] \
                                if num_results \
                                else AccountInfo.get_instance().root_id

                # The child will be the first part that was not found.
                child_name = result[1][num_results]

                children = gd.list_files(
                                parent_id=parent_id, 
                                query_is_string=child_name)
                
                for child in children:
                    self.register_entry(child)

                filenames_phrase = ', '.join([ candidate.id for candidate
                                                            in children ])
#                self.__log.debug("(%d) candidate children were found: %s",
#                                 len(children), filenames_phrase)

                i += 1
Ejemplo n.º 15
0
 def __init__(self):
     self.at_change_id = AccountInfo.get_instance().largest_change_id
     _logger.debug("Latest change-ID at startup is (%d)." % 
                   (self.at_change_id))
Ejemplo n.º 16
0
    def find_path_components_goandget(self, path):
        """Do the same thing that find_path_components() does, except that 
        when we don't have record of a path-component, try to go and find it 
        among the children of the previous path component, and then try again.
        """

        with PathRelations.rlock:
            previous_results = []
            i = 0
            while 1:
                self.__log.info("Attempting to find path-components (go and "
                                "get) for path [%s].  CYCLE= (%d)" % (path, i))

                # See how many components can be found in our current cache.

                try:
                    result = self.__find_path_components(path)
                except:
                    self.__log.exception("There was a problem doing an "
                                         "iteration of find_path_components() "
                                         "on [%s]." % (path))
                    raise

                self.__log.debug("Path resolution cycle (%d) results: %s" %
                                 (i, result))

                # If we could resolve the entire path, return success.

                self.__log.debug("Found within current cache? %s" %
                                 (result[2]))

                if result[2] == True:
                    return result

                # If we could not resolve the entire path, and we're no more
                # successful than a prior attempt, we'll just have to return a
                # partial.

                num_results = len(result[0])
                if num_results in previous_results:
                    self.__log.debug("We couldn't improve our results. This "
                                     "path most likely does not exist.")
                    return result

                previous_results.append(num_results)

                self.__log.debug("(%d) path-components were found, but not "
                                 "all." % (num_results))

                # Else, we've encountered a component/depth of the path that we
                # don't currently know about.
                # TODO: This is going to be the general area that we'd have to adjust to
                #        support multiple, identical entries. This currently only considers the
                #        first result. We should rewrite this to be recursive in order to make
                #        it easier to keep track of a list of results.
                # The parent is the last one found, or the root if none.
                parent_id = result[0][num_results - 1] \
                                if num_results \
                                else AccountInfo.get_instance().root_id

                # The child will be the first part that was not found.
                child_name = result[1][num_results]

                self.__log.debug("Trying to reconcile child named [%s] under "
                                 "folder with entry-ID [%s]." %
                                 (child_name, parent_id))

                try:
                    children = drive_proxy('list_files',
                                           parent_id=parent_id,
                                           query_is_string=child_name)
                except:
                    self.__log.exception("Could not retrieve children for "
                                         "parent with ID [%s]." % (parent_id))
                    raise

                for child in children:
                    try:
                        self.register_entry(child)
                    except:
                        self.__log.exception("Could not register child entry "
                                             "for entry with ID [%s] in path-"
                                             "cache." % (child.id))
                        raise

                filenames_phrase = ', '.join(
                    [candidate.id for candidate in children])
                self.__log.debug("(%d) candidate children were found: %s" %
                                 (len(children), filenames_phrase))

                i += 1