Exemple #1
0
  def __init__(self):
    super(Registry, self).__init__()

    # Set up YAML parser for optional config file
    self.config_path = path("cosmid.yaml")
    self.config = ConfigReader(self.config_path)

    # Extract stuff from config
    self.email = self.config.find("email")

    # Path to resource storage directory
    self.directory = path(self.config.find("directory", default="resources"))

    # Load history file consisting of already downloaded resources
    self.history_path = path(self.directory + "/.cosmid.yaml")
    self.history = HistoryReader(self.history_path)

    # Set up a :class:`cosmid.messenger.Messenger`
    self.messenger = Messenger("cosmid")
Exemple #2
0
  def __init__(self):
    super(Registry, self).__init__()

    # Set up YAML parser for optional config file
    self.config_path = path("cosmid.yaml")
    self.config = ConfigReader(self.config_path)

    # Extract stuff from config
    self.email = self.config.find("email")

    # Path to resource storage directory
    self.directory = path(self.config.find("directory", default="resources"))

    # Load history file consisting of already downloaded resources
    self.history_path = path(self.directory + "/.cosmid.yaml")
    self.history = HistoryReader(self.history_path)

    # Set up a :class:`cosmid.messenger.Messenger`
    self.messenger = Messenger("cosmid")
Exemple #3
0
class Registry(object):
  """
  Hub of-sorts to talk with different `Cosmid` related files and resources. Can
  be seen as the API endpoint for `Cosmid`.
  """
  def __init__(self):
    super(Registry, self).__init__()

    # Set up YAML parser for optional config file
    self.config_path = path("cosmid.yaml")
    self.config = ConfigReader(self.config_path)

    # Extract stuff from config
    self.email = self.config.find("email")

    # Path to resource storage directory
    self.directory = path(self.config.find("directory", default="resources"))

    # Load history file consisting of already downloaded resources
    self.history_path = path(self.directory + "/.cosmid.yaml")
    self.history = HistoryReader(self.history_path)

    # Set up a :class:`cosmid.messenger.Messenger`
    self.messenger = Messenger("cosmid")

  def get(self, resource_id, type_="class"):
    """
    <public> Returns an instance of the specified resource class. Dodges an
    ``ImportError`` when failing to import a resource and returns ``None``
    instead.

    .. code-block:: python

      >>> resource = registry.get("ccds")
      >>> resource.latest()
      'Hs104'

    :param str resource_id: The resource key (name of module)
    :returns: A class instance of the resource
    """
    try:

      if type_ == "class":
        return load_class("cosmid.resources.{}.Resource".format(resource_id))()

      elif type_ == "module":
        return importlib.import_module("cosmid.resources." + resource_id)

      else:
          raise ValueError("Argument must be either 'class' or 'module'.")

    except ImportError:
      return None

  def grab(self, resource_id, target, collapse=False):
    """
    <public> Returns all that's nessesary to download a specific resource.
    The method will try to correct both ``resource_id`` and the ``target``
    release tag.

    :param str resource_id: What resource to download
    :param str target: What release of the resource to download
    """
    # Either import resource class or print warning and move on.
    # Test matching the resource ID
    options = [item[0] for item in self.ls()]
    resource_id = self.matchOne(resource_id, options)

    if resource_id is None:

      message = "Couldn't match resource ID: '{}'".format(resource_id)
      self.messenger.send("warning", message)

      return None, None, None, None

    # Get the resource
    resource = self.get(resource_id)

    # Now let's figure out the version
    # No specified version will match to the latest resource release
    if target == "latest":
      version = resource.latest()
    else:
      options = resource.versions()
      version = self.matchOne(target, options)

    if version is None:
      message = ("Couldn't match version '{id}#{v}'; {vers}"
                 .format(v=target, id=resource.id, vers=", ".join(options)))

      self.messenger.send("warning", message)

      return None, None, None, None

    # Get the goahead! (that we haven't already downloaded it)
    if self.goahead(resource, version):
      # Finally we can determine the paths to download and save the files
      dl_paths = resource.paths(version)

      if collapse:
        # The user can select to store all downloaded files in the same
        # directory
        resource_dir = ""

      else:
        # Or by default separate different resources into subdirectories
        resource_dir = "/" + resource.id

      save_paths = [("{dir}{mid}/{file}"
                     .format(dir=self.directory, mid=resource_dir, file=name))
                    for name in resource.names]

      # Add the resource to the history file as downloaded
      self.history.add(resource_id, {
        "version": version,
        "target": target,
        "names": resource.names,
        "sources": dl_paths
      })

      return resource, dl_paths, save_paths, version

    else:

      # The resource was already downloaded
      return None, None, None, None

  def ls(self):
    """
    <public> Returns a list of resource IDs and docstrings for all the
    included resource modules.

    *Reference*: http://stackoverflow.com/questions/1707709/list-all-the-modules-that-are-part-of-a-python-package

    .. code-block:: python

      >>> registry.ls()
      [('ccds', 'A curated database of generic element'), ...]

    :returns: A list of tuples: ``(resource_id, docstring)``
    :rtype: list
    """
    # Store everything here
    items = []

    prefix = resources.__name__ + "."
    # Fetch all the resource modules
    modules = pkgutil.iter_modules(resources.__path__, prefix)

    # Loop over all resource modules
    for importer, modpath, ispkg in modules:
      # Strip path
      modname = modpath.split(".")[-1]

      # Load the `Resource` class for the module
      module = self.get(modname, type_="module")

      # Save name and docstring
      items.append((modname, module.__doc__))

    return items

  def search(self, query, limit=5):
    """
    <public> Fuzzy matches a query string against each of the resource IDs and
    returns a limited number of results in order of match score.

    .. code-block:: python

      >>> registry.search("asmebly", limit=2)
      [('ensembl_assembly', 68),
       ('ncbi_assembly', 68)]

    :param str query: A string to match against the resource IDs
    :param int limit: (optional) A maximum number of results to return
    :returns: A list of tuples: ``(resource_id, score)`
    :rtype: list
    """
    # List all the available resources
    resources = self.ls()

    # Focus on resource IDs
    resource_ids = [resource[0] for resource in resources]

    # Fuzzy match against the resource IDs and return in order of best match
    return process.extract(query, resource_ids, limit=limit)

  def matchOne(self, target, options, threshold=60):
    """
    <public> Fuzzy matches e.g. a target version tag against a list of options.
    Returns the most likely match if the match score is sufficient.

    .. code-block:: python

      >>> resource = registry.get("ccds")
      >>> registry.matchOne(104, resource.versions())
      'Hs104'

      >>> registry.matchOne("ensembl", registry.ls())
      'ensembl_assembly'

    :param object target: Any Python object to match with
    :param list options: A list of possible options to match against
    :param int threshold: A lower threshold for accepting a best match
    :returns: The object with the best match (unless score is below threshold)
    :rtype: Python object
    """
    # Match against the options and extract the top match only
    result, score = process.extractOne(target, map(str, options))

    # Arbitrary lower limit for returning a *mathcing* result
    if score >= threshold:
      return result
    else:
      return None

  def goahead(self, resource, version):
    """
    Determines whether it's any idea in going ahead with a download.
    """
    # Get any currently downloaded resources
    current = self.history.find(resource.id, default={})

    # Make sure we haven't already downloaded the resource
    if current.get("version") == version:
      message = "'{}' already downloaded and up-to-date.".format(resource.id)
      self.messenger.send("update", message)

      return False

    return True
Exemple #4
0
class Registry(object):
    """
  Hub of-sorts to talk with different `Cosmid` related files and resources. Can
  be seen as the API endpoint for `Cosmid`.
  """
    def __init__(self):
        super(Registry, self).__init__()

        # Set up YAML parser for optional config file
        self.config_path = path("cosmid.yaml")
        self.config = ConfigReader(self.config_path)

        # Extract stuff from config
        self.email = self.config.find("email")

        # Path to resource storage directory
        self.directory = path(
            self.config.find("directory", default="resources"))

        # Load history file consisting of already downloaded resources
        self.history_path = path(self.directory + "/.cosmid.yaml")
        self.history = HistoryReader(self.history_path)

        # Set up a :class:`cosmid.messenger.Messenger`
        self.messenger = Messenger("cosmid")

    def get(self, resource_id, type_="class"):
        """
    <public> Returns an instance of the specified resource class. Dodges an
    ``ImportError`` when failing to import a resource and returns ``None``
    instead.

    .. code-block:: python

      >>> resource = registry.get("ccds")
      >>> resource.latest()
      'Hs104'

    :param str resource_id: The resource key (name of module)
    :returns: A class instance of the resource
    """
        try:

            if type_ == "class":
                return load_class(
                    "cosmid.resources.{}.Resource".format(resource_id))()

            elif type_ == "module":
                return importlib.import_module("cosmid.resources." +
                                               resource_id)

            else:
                raise ValueError(
                    "Argument must be either 'class' or 'module'.")

        except ImportError:
            return None

    def grab(self, resource_id, target, collapse=False):
        """
    <public> Returns all that's nessesary to download a specific resource.
    The method will try to correct both ``resource_id`` and the ``target``
    release tag.

    :param str resource_id: What resource to download
    :param str target: What release of the resource to download
    """
        # Either import resource class or print warning and move on.
        # Test matching the resource ID
        options = [item[0] for item in self.ls()]
        resource_id = self.matchOne(resource_id, options)

        if resource_id is None:

            message = "Couldn't match resource ID: '{}'".format(resource_id)
            self.messenger.send("warning", message)

            return None, None, None, None

        # Get the resource
        resource = self.get(resource_id)

        # Now let's figure out the version
        # No specified version will match to the latest resource release
        if target == "latest":
            version = resource.latest()
        else:
            options = resource.versions()
            version = self.matchOne(target, options)

        if version is None:
            message = ("Couldn't match version '{id}#{v}'; {vers}".format(
                v=target, id=resource.id, vers=", ".join(options)))

            self.messenger.send("warning", message)

            return None, None, None, None

        # Get the goahead! (that we haven't already downloaded it)
        if self.goahead(resource, version):
            # Finally we can determine the paths to download and save the files
            dl_paths = resource.paths(version)

            if collapse:
                # The user can select to store all downloaded files in the same
                # directory
                resource_dir = ""

            else:
                # Or by default separate different resources into subdirectories
                resource_dir = "/" + resource.id

            save_paths = [("{dir}{mid}/{file}".format(dir=self.directory,
                                                      mid=resource_dir,
                                                      file=name))
                          for name in resource.names]

            # Add the resource to the history file as downloaded
            self.history.add(
                resource_id, {
                    "version": version,
                    "target": target,
                    "names": resource.names,
                    "sources": dl_paths
                })

            return resource, dl_paths, save_paths, version

        else:

            # The resource was already downloaded
            return None, None, None, None

    def ls(self):
        """
    <public> Returns a list of resource IDs and docstrings for all the
    included resource modules.

    *Reference*: http://stackoverflow.com/questions/1707709/list-all-the-modules-that-are-part-of-a-python-package

    .. code-block:: python

      >>> registry.ls()
      [('ccds', 'A curated database of generic element'), ...]

    :returns: A list of tuples: ``(resource_id, docstring)``
    :rtype: list
    """
        # Store everything here
        items = []

        prefix = resources.__name__ + "."
        # Fetch all the resource modules
        modules = pkgutil.iter_modules(resources.__path__, prefix)

        # Loop over all resource modules
        for importer, modpath, ispkg in modules:
            # Strip path
            modname = modpath.split(".")[-1]

            # Load the `Resource` class for the module
            module = self.get(modname, type_="module")

            # Save name and docstring
            items.append((modname, module.__doc__))

        return items

    def search(self, query, limit=5):
        """
    <public> Fuzzy matches a query string against each of the resource IDs and
    returns a limited number of results in order of match score.

    .. code-block:: python

      >>> registry.search("asmebly", limit=2)
      [('ensembl_assembly', 68),
       ('ncbi_assembly', 68)]

    :param str query: A string to match against the resource IDs
    :param int limit: (optional) A maximum number of results to return
    :returns: A list of tuples: ``(resource_id, score)`
    :rtype: list
    """
        # List all the available resources
        resources = self.ls()

        # Focus on resource IDs
        resource_ids = [resource[0] for resource in resources]

        # Fuzzy match against the resource IDs and return in order of best match
        return process.extract(query, resource_ids, limit=limit)

    def matchOne(self, target, options, threshold=60):
        """
    <public> Fuzzy matches e.g. a target version tag against a list of options.
    Returns the most likely match if the match score is sufficient.

    .. code-block:: python

      >>> resource = registry.get("ccds")
      >>> registry.matchOne(104, resource.versions())
      'Hs104'

      >>> registry.matchOne("ensembl", registry.ls())
      'ensembl_assembly'

    :param object target: Any Python object to match with
    :param list options: A list of possible options to match against
    :param int threshold: A lower threshold for accepting a best match
    :returns: The object with the best match (unless score is below threshold)
    :rtype: Python object
    """
        # Match against the options and extract the top match only
        result, score = process.extractOne(target, map(str, options))

        # Arbitrary lower limit for returning a *mathcing* result
        if score >= threshold:
            return result
        else:
            return None

    def goahead(self, resource, version):
        """
    Determines whether it's any idea in going ahead with a download.
    """
        # Get any currently downloaded resources
        current = self.history.find(resource.id, default={})

        # Make sure we haven't already downloaded the resource
        if current.get("version") == version:
            message = "'{}' already downloaded and up-to-date.".format(
                resource.id)
            self.messenger.send("update", message)

            return False

        return True