Example #1
0
    def __init__(self, organism="H**o sapiens", cache=False):
        """.. rubric:: Constructor

        :param str orgamism: the organism to look at. H**o sapiens
            is the default. Other possible organisms can be found
            in :attr:`organisms`.

        """
        self.logging = Logging()

        self.devtools = DevTools()
        self.webserv = IntactComplex(verbose=verbose, cache=cache)
        df = self.webserv.search("*", frmt="pandas")
        self.df = df

        #: list of valid organisms found in the database
        self.valid_organisms = list(set(df["organismName"]))
        self.valid_organisms = [x.split(";")[0] for x in self.valid_organisms]

        #: list of valid organisms found in the database
        self.organisms = list(set(df["organismName"]))
        self._organism = None
        if organism in self.organisms:
            self.organism = organism
        else:
            print("Organism not set yet. ")

        # This will populated on request as a cache/buffer
        self._details = None
        self._complexes = None
Example #2
0
    def __init__(self, name, url=None, verbose=True, requests_per_sec=10):
        """.. rubric:: Constructor

        :param str name: a name for this service
        :param str url: its URL
        :param bool verbose: prints informative messages if True (default is
            True)
        :param requests_per_sec: maximum number of requests per seconds
            are restricted to 3. You can change that value. If you reach the
            limit, an error is raise. The reason for this limitation is
            that some services (e.g.., NCBI) may black list you IP.
            If you need or can do more (e.g., ChEMBL does not seem to have
            restrictions), change the value. You can also have several instance
            but again, if you send too many requests at the same, your future
            requests may be retricted. Currently implemented for REST only


        All instances have an attribute called :attr:`~Service.logging` that
        is an instanceof the :mod:`logging` module. It can be used to print
        information, warning, error messages::

            self.logging.info("informative message")
            self.logging.warning("warning message")
            self.logging.error("error message")

        The attribute :attr:`~Service.debugLevel`  can be used to set the behaviour
        of the logging messages. If the argument verbose is True, the debugLebel
        is set to INFO. If verbose if False, the debugLevel is set to WARNING.
        However, you can use the :attr:`debugLevel` attribute to change it to
        one of DEBUG, INFO, WARNING, ERROR, CRITICAL. debugLevel=WARNING means
        that only WARNING, ERROR and CRITICAL messages are shown.

        """
        super(Service, self).__init__()
        self.requests_per_sec = requests_per_sec
        self.name = name
        self.logging = Logging("bioservices:%s" % self.name, verbose)

        self._url = url
        try:
            if self.url is not None:
                urlopen(self.url)
        except Exception as err:
            self.logging.warning("The URL (%s) provided cannot be reached." %
                                 self.url)
        self._easyXMLConversion = True

        # used by HGNC where some XML contains non-utf-8 characters !!
        # should be able to fix it with requests once HGNC works again
        #self._fixing_unicode = False
        #self._fixing_encoding = "utf-8"

        self.devtools = DevTools()
        self.settings = BioServicesConfig()

        self._last_call = 0
Example #3
0
    def __init__(self, name, url=None, verbose=True, requests_per_sec=3):
        """.. rubric:: Constructor

        :param str name: a name for this service
        :param str url: its URL
        :param bool verbose: prints informative messages if True (default is
            True)
        :param requests_per_sec: maximum number of requests per seconds
            are restricted to 3. You can change that value. If you reach the
            limit, an error is raise. The reason for this limitation is
            that some services (e.g.., NCBI) may black list you IP. 
            If you need or can do more (e.g., ChEMBL does not seem to have
            restrictions), change the value. You can also have several instance
            but again, if you send too many requests at the same, your future
            requests may be retricted. Currently implemented for REST only


        All instances have an attribute called :attr:`~Service.logging` that
        is an instanceof the :mod:`logging` module. It can be used to print
        information, warning, error messages::

            self.logging.info("informative message")
            self.logging.warning("warning message")
            self.logging.error("error message")

        The attribute :attr:`~Service.debugLevel`  can be used to set the behaviour
        of the logging messages. If the argument verbose is True, the debugLebel
        is set to INFO. If verbose if False, the debugLevel is set to WARNING.
        However, you can use the :attr:`debugLevel` attribute to change it to
        one of DEBUG, INFO, WARNING, ERROR, CRITICAL. debugLevel=WARNING means
        that only WARNING, ERROR and CRITICAL messages are shown.

        """
        super(Service, self).__init__()
        self.requests_per_sec = requests_per_sec

        self.name = name
        self.logging = Logging("bioservices:%s" % self.name, verbose)

        self._url = url
        try:
            if self.url is not None:
                urlopen(self.url)
        except Exception as err:
            self.logging.warning("The URL (%s) provided cannot be reached." % self.url)
        self._easyXMLConversion = True

        # used by HGNC where some XML contains non-utf-8 characters !!
        # should be able to fix it with requests once HGNC works again
        #self._fixing_unicode = False
        #self._fixing_encoding = "utf-8"

        self.devtools = DevTools()
        self.settings = BioServicesConfig()
Example #4
0
    def __init__(self,  package, build_options="--no-build-vignettes"):
        """

        :param str package: name of a valid package (e.g., CellNOptR)
        :param revision: SVN revision (default is HEAD )
        :param build_options: additional build options for R (default is
            --no-build-vignettes)

        You can also change the logging level (e.g., self.logging.debugLevel="WARNING")


        """
        self.url = "https://github.com/cellnopt/CellNOptR"

        self.exclude = [".git"]
        self.package = package
        self.package_path = 'packages' + os.sep + package
        self.dtemp = None
        self.cwd = os.getcwd()
        self.build_options = build_options
        self.logging = Logging("INFO")
Example #5
0
def test_logging():

    l = Logging("INFO")

    l.info("test")
    l.level = "WARNING"
    l.level == "INFO"
    l.level == "CRITICAL"
    l.level == "ERROR"
    l.level == "DEBUG"
    l.level = True
    l.level = False

    try:
        l.level = "WARN"
        assert Fales
    except:
        assert True

    # FIXME is this working ??wierd syntax in loggibg_tools.
    import copy
    copy.copy(l)
    copy.deepcopy(l)
Example #6
0
 def __init__(self, verbose=True):
     self.session = RSession()
     self.logging = Logging(verbose)
     self.logging.info('Fetching package information')
     self.update()
Example #7
0
class RPackageManager(object):
    """Implements a R package manager from Python

    So far you can install a package (from source, or CRAN, or biocLite)

    ::

        pm = PackageManager()
        [(x, pm.installed[x][2]) for x in pm.installed.keys()]


    You can access to all information within a dataframe called **packages** where
    indices are the name packages. Some aliases are provided as attributes (e.g., available, 
    installed)


    """
    cran_repos = "http://cran.univ-lyon1.fr/"

    def __init__(self, verbose=True):
        self.session = RSession()
        self.logging = Logging(verbose)
        self.logging.info('Fetching package information')
        self.update()

    def _update(self):
        # local import ?
        import numpy
        import pandas
        # figure out the installed packages first
        code = """rvar_packages = as.data.frame(installed.packages())"""
        self.session.run(code)
        s = self.session.rvar_packages
        # FIXME. these 4 lines are needed as a hack related to pyper.
        try:
            s = s.replace("\n", "")
            df = eval(s)
        except:
            df = s

        df.set_index('Package', inplace=True)
        self._packages = df.copy()

        # Now, fetch was is possible to install from the default cran repo
        code = """rvar_status=packageStatus(repos="%s/src/contrib")"""
        code = code % self.cran_repos

        self.session.run(code)
        s = self.session.rvar_status

        # FIXME.
        try:
            s = s.replace("\n", "")
            res = eval(s)
        except:
            res = s
        res['inst'].set_index('Package', inplace=True)
        res['avail'].set_index('Package', inplace=True)
        self._status = res

    def update(self):
        """If you install/remove packages yourself elsewhere, you may need to 
        call this function to update the package manager"""
        try:
            #self.session.reconnect()          
            self._update()
        except:
            self.logging.warning("Could not update the packages. Call update() again")
        
    def _compat_version(self, version):
        return version.replace("-", "a")

    def _get_installed(self):
        # we do not buffer because packages may be removed manually or from R of
        # using remove_packages method, ....
        #self._package_status()
        return self._status['inst']
    installed = property(_get_installed, "returns list of packages installed as a dataframe")

    def _get_available(self):
        # we do not buffer because packages may be removed manually or from R of
        # using remove_packages method, ....
        #self._package_status()
        return self._status['avail']
    available = property(_get_available, "returns list of packages available as a dataframe")

    def  _get_packages(self):
        # do not buffer since it may change in many places
        return self._packages
    packages = property(_get_packages)

    def get_package_latest_version(self, package):
        """Get latest version available of a package"""
        return self.available['Version'].ix[package]

    def get_package_version(self, package):
        """Get version of an install package"""
        if package not in self.installed.index:
            self.logging.error("package {0} not installed".format(package))
        return self.installed['Version'].ix[package]

    def biocLite(self, package=None, suppressUpdates=True, verbose=False):
        """Installs one or more biocLite packages

        :param package: a package name (string) or list of package names (list of 
            strings) that will be installed from BioConductor. If package is set 
            to None, all packages already installed will be updated.

        """
        if isinstance(package, str):
            if package not in self.installed.index:
                biocLite(package, suppressUpdates, verbose=verbose)
        elif isinstance(package, list):
            for pkg in package:
                self.logging.info("Installing %s" % pkg)
                if self.is_installed(pkg) is False:
                    biocLite(pkg, suppressUpdates, verbose=verbose)
        else: # trying other cases (e.g., None updates biocLite itself). 
            biocLite(package, suppressUpdates, verbose=verbose)
        self.update()

    def _isLocal(self, pkg):
        if os.path.exists(pkg):
            return True
        else:
            return False

    def remove(self, package):
        """Remove a package (or list) from local repository"""
        rcode ="""remove.packages("%s")"""
        if isinstance(package, str):
            package = [package]
        for pkg in package:
            if pkg in self.installed.index:
                self.session(rcode % pkg)
            else:
                self.logging.warning("Package not found. Nothing to remove")
        self.update()

    def require(self, pkg, version):
        "Check if a package with given version is available"

        if pkg not in self.installed.index:
            self.logging.info("Package %s not installed" % pkg)
            return False
        currentVersion = self.packageVersion(pkg)
        if self._get_version(currentVersion) >= self._get_version(version):
            return True
        else:
            return False

    def _install_package(self, packageName, dependencies=True):
        """Installs one or more CRAN packages
        
        .. todo:: check if it is already available to prevent renstallation ?
        """

        repos = self.cran_repos
        # if this is a source file we want to reset the repo
        if isinstance(packageName, str):
            packageName = [packageName]
        for pkg in packageName:
            if self.is_installed(pkg) is False:
                self.logging.info("Package not found. Installing %s..." % pkg)
                install_package(pkg, dependencies=dependencies, 
                        repos=repos)
            else:
                self.logging.info("Package %s found. " % pkg)
                install_package(pkg, dependencies=dependencies, 
                        repos=repos)
        self.update()

    def install(self, pkg, require=None, update=True, reinstall=False):
        """install a package automatically scanning CRAN and biocLite repos

        if require is not set and update is True, when a newest version of a package
        is available, it is installed

        """
        from easydev import to_list
        pkgs = to_list(pkg)
        for pkg in pkgs:
            self._install(pkg, require=require, update=update, reinstall=reinstall)

    def _install(self, pkg, require=None, update=update, reinstall=False):
        # LOCAL file
        if self._isLocal(pkg):
            # if a local file, we do not want to jump to biocLite or CRAN. Let
            # us install it directly. We cannot check version yet so we will
            # overwrite what is already installed
            self.logging.warning("Installing from source")
            self._install_package(pkg)
            return

        # From CRAN
        if self.is_installed(pkg):
            currentVersion = self.get_package_version(pkg)
            # if not provided, require should be the latest version
            if require is None and update is True:
                try:
                    require = self.get_package_latest_version(pkg)
                except:
                    # a non-cran package (bioclite maybe)
                    pass

            if require is None:
                self.logging.info("%s already installed with version %s" % \
                    (pkg, currentVersion))
                return
            
            # if require is not none, is it the required version ?
            if self._get_version(currentVersion) >= self._get_version(require) and reinstall is False:
                self.logging.info("%s already installed with required version %s" \
                    % (pkg, currentVersion))
                # if so, nothing to do
            else:
                # Try updating
                self.logging.info("Updating")
                self._install_package(pkg)
                if require is None:
                    return
                currentVersion = self.get_package_version(pkg)
                if self._get_version(currentVersion) < self._get_version(require):
                    self.logging.warning("%s installed but current version (%s) does not fulfill your requirement" % \
                        (pkg, currentVersion))

        elif pkg in self.available.index:
            self._install_package(pkg)
        else:
            # maybe a biocLite package ?
            # require is ignored. The latest will be installed
            self.logging.info("Trying to find the package on bioconductor")
            self.biocLite(pkg)
            if require is None:
                return
            currentVersion = self.get_package_version(pkg)
            if self._get_version(currentVersion) >= self._get_version(require):
                self.logging.warning("%s installed but version is %s too small (even after update)" % \
                    (pkg, currentVersion, require))

    def _get_version(self, version):
        # some pacakge do not use the correct version convention
        try:
            return StrictVersion(version)
        except:
            try:
                return StrictVersion(version.replace("-", "a"))
            except:
                # snowfall package example was 1.86-6.1
                # This becomes 1.86a61  which is not great but not workaround
                # for now
                left, right = version.split("-")
                version = left + "a" + right.replace('.', '')
                return StrictVersion(version)

    def is_installed(self, pkg_name):
        if pkg_name in self.installed.index:
            return True
        else:
            return False
Example #8
0
class Complexes():
    """Manipulate complexes of Proteins

    This class uses Intact Complex database to extract information about
    complexes of proteins.

    When creating an instance, the default organism is "H**o sapiens".
    The organism can be set to another one during the instanciation or later::

        >>> from biokit.network.complexes import Complexes
        >>> c = Complexes(organism='H**o sapiens')
        >>> c.organism = 'Rattus norvegicus'

    Valid organisms can be found in :attr:`organisms`. When changing the
    organism, a request to the Intact database is sent, which may take some
    time to update. Once done, information related to  this organism is stored
    in the :attr:`df` attribute, which is a Pandas dataframe. It
    contains 4 columns. Here is for example one row::

        complexAC                                             EBI-2660609
        complexName                            COP9 signalosome variant 1
        description     Essential regulator of the ubiquitin (Ubl) con...
        organismName                                   H**o sapiens; 9606

    This is basic information but once a complex accession (e.g., EBI-2660609)
    is known, you can retrieve detailled information. This is done
    automatically for all the accession when needed. The first time, it will
    take a while (20 seconds for 250 accession) but will be cache for this
    instance.

    The :attr:`complexes` contains all details about the entries found in
    :attr:`df`. It is a dictionary where keys are the complex accession. For
    instance::

        >>> c.complexes['EBI-2660609']

    In general, one is interested in the participants of the complex, that is
    the proteins that form the complex. Another attribute is set for you::

        >>> c.participants['EBI-2660609']

    Finally, you may even want to obtain just the identifier of the participants
    for each complex. This is stored in the :attr:`identifiers`::

        >>> c.identifiers['EBI-2660609']

    Note however, that the identifiers are not neceseraly uniprot identifiers.
    Could be ChEBI or sometimes even set to None. The :meth:`strict_filter`
    removes the complexes with less than 2 (strictly) uniprot identifiers.

    Some basic statistics can be printed with :meth:`stats` that indeticates
    the number of complexes, number of identifiers in those complexes ,and
    number of unique identifiers. A histogram of number of appearance of each
    identifier is also shown.

    The :meth:`hist_participants` shows the number of participants per complex.

    Finally, the meth:`search_complexes` can be used in the context of
    logic modelling to infer the AND gates from a list of uniprot identifiers
    provided by the user. See :meth:`search_complexes` for details.

    Access to the Intact Complex database is performed using the
    package BioServices provided in Pypi.
    """

    def __init__(self, organism="H**o sapiens", cache=False):
        """.. rubric:: Constructor

        :param str orgamism: the organism to look at. H**o sapiens
            is the default. Other possible organisms can be found
            in :attr:`organisms`.

        """
        self.logging = Logging()

        self.devtools = DevTools()
        self.webserv = IntactComplex(verbose=verbose, cache=cache)
        df = self.webserv.search("*", frmt="pandas")
        self.df = df

        #: list of valid organisms found in the database
        self.valid_organisms = list(set(df["organismName"]))
        self.valid_organisms = [x.split(";")[0] for x in self.valid_organisms]

        #: list of valid organisms found in the database
        self.organisms = list(set(df["organismName"]))
        self._organism = None
        if organism in self.organisms:
            self.organism = organism
        else:
            print("Organism not set yet. ")

        # This will populated on request as a cache/buffer
        self._details = None
        self._complexes = None

    def _get_organism(self):
        return self._organism

    def _set_organism(self, organism):
        self.devtools.check_param_in_list(
            organism, [str(x.split(";")[0]) for x in self.valid_organisms]
        )
        self._organism = organism

        self.df = self.webserv.search(
            "*", frmt="pandas", filters='species_f:("%s")' % self.organism
        )
        self._complexes = None

    organism = property(
        _get_organism, _set_organism, doc="Getter/Setter of the organism"
    )

    def hist_participants(self):
        """Histogram of the number of participants per complexes

        :return: a dictionary with complex identifiers as keys and
            number of participants as values

        ::

            from biokit.network.complexes import Complexes
            c = Complexes()
            c.hist_participants()

        """
        N = []
        count = {}
        for i, identifier in enumerate(self.complexes.keys()):
            n = len(self.complexes[identifier]["participants"])
            N.append(n)
            count[identifier] = n

        _ = pylab.hist(N, bins=range(0, max(N)))
        pylab.title("Number of participants per complex")
        pylab.grid()
        return count

    def stats(self):
        """Prints some stats about the number of complexes and histogram of the
        number of appearances of each species"""
        species = []
        for k in self.participants.keys():
            species.extend([x["identifier"] for x in self.participants[k]])
            N = []
        for spec in set(species):
            N.append(species.count(spec))
        _ = pylab.hist(N, bins=range(0, max(N)))
        pylab.title("Number of appaerances of each species")
        pylab.grid()
        print(
            """There are %s complexes involving %s participants with %s unique species. """
            % (len(self.complexes), len(species), len(set(species)))
        )

    def _get_participants(self):
        participants = {}
        for k, v in self.complexes.items():
            participants[k] = v["participants"]
        return participants

    participants = property(
        _get_participants, doc="""Getter of the complex participants (full details)"""
    )

    def _get_identifiers(self):
        identifiers = {}
        for k, v in self.participants.items():
            identifiers[k] = [x["identifier"] for x in v]
        return identifiers

    identifiers = property(
        _get_identifiers,
        doc="""Getter of the identifiers of the complex participants""",
    )

    def _get_complexes(self):
        if self._complexes is None:
            self._load_complexes()
        return self._complexes.copy()

    complexes = property(
        _get_complexes, doc="""Getter of the complexes (full details"""
    )

    def _load_complexes(self, show_progress=True):
        from easydev import Progress
        import time

        pb = Progress(len(self.df.complexAC))
        complexes = {}
        self.logging.info("Loading all details from the IntactComplex database")
        for i, identifier in enumerate(self.df.complexAC):
            res = self.webserv.details(identifier)
            complexes[identifier] = res
            if show_progress:
                pb.animate(i + 1)
        self._complexes = complexes

    def remove_homodimers(self):
        """Remove identifiers that are None or starts with CHEBI
        and keep complexes that have at least 2 participants


        :return: list of complex identifiers that have been removed.
        """

        # None are actually h**o dimers
        toremove = []
        for k, this in self.identifiers.items():
            remains = [x for x in this if x is not None]
            if len(remains) <= 1:
                toremove.append(k)
        self.logging.info("removing %s homodimers complexes" % len(toremove))
        for this in toremove:
            del self._complexes[this]
        return toremove

    def search_complexes(self, user_species, verbose=False):
        """Given a list of uniprot identifiers, return complexes and
            possible complexes.

        :param list user_species: list of uniprot identifiers to be
            found in the complexes
        :return: two dictionaries. First one contains the complexes
            for which all participants have been found in the user_species
            list. The second one contains complexes for which some participants
            (not all) have been found in the user_species list.

        """
        level = self.debugLevel[:]
        if verbose:
            self.debugLevel = "INFO"
        else:
            self.debugLevel = "ERROR"

        and_gates = {}
        candidates = {}

        identifiers = self.identifiers.values()

        for k, identifiers in self.identifiers.items():

            # get rid of suffixes such as -1 or -PRO_xxx
            prefixes = [x.split("-")[0] if x is not None else x for x in identifiers]

            # You may have a complex with ['P12222', 'P33333-PRO1',
            # 'P33333-PRO2'], in which case P33333 is found only once and
            # thereofre the final number of found participants is not the length
            # of the complexes...so we need to get rid of the duplicates if any
            prefixes = list(set(prefixes))
            N = len(prefixes)
            found = [spec for spec in user_species if spec in prefixes]

            if len(found) == N:
                self.logging.info("Found entire complex %s " % k)
                and_gates[k] = identifiers[:]
            elif len(found) >= 1:
                self.logging.info(
                    "Found partial complex %s with %s participants out of %s"
                    % (k, len(found), len(identifiers))
                )
                candidates[k] = {"participants": identifiers, "found": found}
        self.debugLevel = level[:]
        return and_gates, candidates

    def search(self, name):
        """Search for a unique identifier (e.g. uniprot) in all complexes

        :return: list of complex identifiers where the name was found
        """
        found = []
        for k, identifiers in self.identifiers.items():
            prefixes = [x.split("-")[0] if x is not None else x for x in identifiers]
            if name in prefixes:
                self.logging.info(
                    "Found %s in complex %s (%s)" % (name, k, identifiers)
                )
                found.append(k)
        return found

    def chebi2name(self, name):
        """Return the ASCII name of a CHEBI identifier"""
        from bioservices import ChEBI

        c = ChEBI()
        name = dict(c.getLiteEntity(name)[0])["chebiAsciiName"]
        return name

    def uniprot2genename(self, name):
        """Return the gene names of a UniProt identifier"""
        from bioservices import UniProt

        c = UniProt(cache=True)

        try:
            res = pd.read_csv(StringIO(c.search(name, limit=1)), sep="\t")
            return list(res["Gene names"].values)
        except:
            print("Could not find %s" % name)

    def report(self, species):
        complete, partial = self.search_complexes(species, verbose=False)
        res = {
            "Found": [],
            "Participants": [],
            "Complete": [],
            "Identifier": [],
            "Number found": [],
            "Number of participants": [],
            "Name": [],
        }

        for k, v in complete.items():
            res["Name"].append(self.complexes[k]["name"])
            res["Found"].append(";".join(v))
            res["Number found"].append(len(v))
            res["Participants"].append(";".join(self.identifiers[k]))
            res["Number of participants"].append(len(self.identifiers[k]))
            res["Complete"].append(True)
            res["Identifier"].append(k)

        for k, v in partial.items():
            res["Name"].append(self.complexes[k]["name"])
            res["Found"].append(";".join(v["found"]))
            res["Number found"].append(len(v["found"]))
            res["Participants"].append(";".join(self.identifiers[k]))
            res["Number of participants"].append(len(self.identifiers[k]))
            res["Complete"].append(False)
            res["Identifier"].append(k)

        res = pd.DataFrame(
            res,
            columns=[
                "Found",
                "Participants",
                "Identifier",
                "Name",
                "Number found",
                "Number of participants",
                "Complete",
            ],
        )
        return res
Example #9
0
class DistributeRPackage(object):
    """Class to ease distribution of CellNOptR packages from SVN


    Can be used for any SVN containing valid R packages by setting the
    repository URL

        >>> d = DistributeRPackage()
        >>> d.distribute()

    You can also use the executable provided in cellnopt.admin package itself::

        cellnopt_distribute --package CNORdt --revision HEAD

    equivalent to (if you have the sources)::

        python distribute.py --package CNORdt --revision 666
    
    Version of cellnopt.admin were based on private SVN but we moved to github and
    therefore this class is now related to the github repository only.

    In practice, this is more complicated than SVN:
    
    - cannot get a nice revision number and be able to compare
      between revisions
    - Cannot checkout a sub directory (e.g., CNORdt)

    So, we will therefore build up all packages in one go and unfortunately
    add the commit hash long number as a tag....although the --short option
    seems to be a solution.
    

    .. todo:: MEIGOR
    """
    #: mapping between package name and actual directory name 
    _valid_packages = {"CellNOptR":"CellNOptR",
                       "CNORdt":pj("CNOR_dt","CNORdt"),
                       "CNORode":pj("CNOR_ode","CNORode"),
                       "CNORfuzzy":"CNOR_fuzzy",
                       "CNORfeeder":"CNOR_add_links",
                       "MEIGOR": pj('essR','MEIGOR')}

    def __init__(self,  package, build_options="--no-build-vignettes"):
        """

        :param str package: name of a valid package (e.g., CellNOptR)
        :param revision: SVN revision (default is HEAD )
        :param build_options: additional build options for R (default is
            --no-build-vignettes)

        You can also change the logging level (e.g., self.logging.debugLevel="WARNING")


        """
        self.url = "https://github.com/cellnopt/CellNOptR"

        self.exclude = [".git"]
        self.package = package
        self.package_path = 'packages' + os.sep + package
        self.dtemp = None
        self.cwd = os.getcwd()
        self.build_options = build_options
        self.logging = Logging("INFO")

    def _get_version(self):
        data = open(self.dtemp + os.sep + self.package_path + os.sep + "DESCRIPTION", "r").read()
        res = [x.split(':')[1].strip() for x in data.split("\n") if x.startswith('Version')]
        return res[0]

    def _create_temp_directory(self):
        self.dtemp = tempfile.mkdtemp()

    def _checkout_git(self):
        self.logging.info("1. Gettting the source from GIT --------------")
        if self.dtemp == None:
            self._create_temp_directory()

        cmd = """
         git init %(directory)s;
         cd %(directory)s;
         git remote add -f origin %(repo)s ;
         echo "packages/%(package_name)s" >> .git/info/sparse-checkout;
         git pull origin master
        """
        print(self.package, self.url, self.dtemp)
        cmd = cmd % {'package_name':self.package, 'repo': self.url , 
                'directory': self.dtemp}
        
        self.logging.info(cmd,)
        try:
            ret = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
            ret.wait()
            if ret.poll()!=0:
                raise Exception
            self.logging.info('...done')
        except Exception:
            raise Exception

    @staticmethod
    def help():
        """Return usage help message"""
        print("\nPURPOSE:"+__doc__)
        print("USAGE: python distribute.py --package valid_package_name")
        print("")
        print("Possible package names are %s ." % DistributeRPackage._valid_packages.keys())
        #sys.exit(1)

    def distribute(self):
        """This is the main method to create package distribution.

        It follows these steps:

         #. creates temp directoy
         #. svn checkout clean revision
         #. calls R CMD build

        """
        if self.dtemp == None:
            self._create_temp_directory()
        try:
            self._checkout_git()
        except Exception, e:
            self._stop()
            raise Exception(e)

        self._build_R_package()
        self._stop()
Example #10
0
 def __init__(self, verbose=True):
     self.session = RSession()
     self.logging = Logging(verbose)
     self.logging.info('Fetching package information')
     self.update()
Example #11
0
class RPackageManager(object):
    """Implements a R package manager from Python

    So far you can install a package (from source, or CRAN, or biocLite)

    ::

        pm = PackageManager()
        [(x, pm.installed[x][2]) for x in pm.installed.keys()]


    You can access to all information within a dataframe called **packages** where
    indices are the name packages. Some aliases are provided as attributes (e.g., available, 
    installed)


    """
    cran_repos = "http://cran.univ-lyon1.fr/"

    def __init__(self, verbose=True):
        self.session = RSession()
        self.logging = Logging(verbose)
        self.logging.info('Fetching package information')
        self.update()

    def _update(self):
        # local import ?
        import numpy
        import pandas
        # figure out the installed packages first
        code = """rvar_packages = as.data.frame(installed.packages())"""
        self.session.run(code)
        s = self.session.rvar_packages
        # FIXME. these 4 lines are needed as a hack related to pyper.
        try:
            s = s.replace("\n", "")
            df = eval(s)
        except:
            df = s

        df.set_index('Package', inplace=True)
        self._packages = df.copy()

        # Now, fetch was is possible to install from the default cran repo
        code = """rvar_status=packageStatus(repos="%s/src/contrib")"""
        code = code % self.cran_repos

        self.session.run(code)
        s = self.session.rvar_status

        # FIXME.
        try:
            s = s.replace("\n", "")
            res = eval(s)
        except:
            res = s
        res['inst'].set_index('Package', inplace=True)
        res['avail'].set_index('Package', inplace=True)
        self._status = res

    def update(self):
        """If you install/remove packages yourself elsewhere, you may need to 
        call this function to update the package manager"""
        try:
            #self.session.reconnect()          
            self._update()
        except:
            self.logging.warning("Could not update the packages. Call update() again")

    def _compat_version(self, version):
        return version.replace("-", "a")

    def _get_installed(self):
        # we do not buffer because packages may be removed manually or from R of
        # using remove_packages method, ....
        #self._package_status()
        return self._status['inst']
    installed = property(_get_installed, "returns list of packages installed as a dataframe")

    def _get_available(self):
        # we do not buffer because packages may be removed manually or from R of
        # using remove_packages method, ....
        #self._package_status()
        return self._status['avail']
    available = property(_get_available, "returns list of packages available as a dataframe")

    def  _get_packages(self):
        # do not buffer since it may change in many places
        return self._packages
    packages = property(_get_packages)

    def get_package_latest_version(self, package):
        """Get latest version available of a package"""
        return self.available['Version'].ix[package]

    def get_package_version(self, package):
        """Get version of an install package"""
        if package not in self.installed.index:
            self.logging.error("package {0} not installed".format(package))
        return self.installed['Version'].ix[package]

    def biocLite(self, package=None, suppressUpdates=True, verbose=False):
        """Installs one or more biocLite packages

        :param package: a package name (string) or list of package names (list of 
            strings) that will be installed from BioConductor. If package is set 
            to None, all packages already installed will be updated.

        """
        if isinstance(package, str):
            if package not in self.installed.index:
                biocLite(package, suppressUpdates, verbose=verbose)
        elif isinstance(package, list):
            for pkg in package:
                self.logging.info("Installing %s" % pkg)
                if self.is_installed(pkg) is False:
                    biocLite(pkg, suppressUpdates, verbose=verbose)
        else: # trying other cases (e.g., None updates biocLite itself). 
            biocLite(package, suppressUpdates, verbose=verbose)
        self.update()

    def _isLocal(self, pkg):
        if os.path.exists(pkg):
            return True
        else:
            return False

    def remove(self, package):
        """Remove a package (or list) from local repository"""
        rcode ="""remove.packages("%s")"""
        if isinstance(package, str):
            package = [package]
        for pkg in package:
            if pkg in self.installed.index:
                self.session(rcode % pkg)
            else:
                self.logging.warning("Package not found. Nothing to remove")
        self.update()

    def require(self, pkg, version):
        "Check if a package with given version is available"

        if pkg not in self.installed.index:
            self.logging.info("Package %s not installed" % pkg)
            return False
        currentVersion = self.packageVersion(pkg)
        if self._get_version(currentVersion) >= self._get_version(version):
            return True
        else:
            return False

    def _install_package(self, packageName, dependencies=True):
        """Installs one or more CRAN packages
        
        .. todo:: check if it is already available to prevent renstallation ?
        """

        repos = self.cran_repos
        # if this is a source file we want to reset the repo
        if isinstance(packageName, str):
            packageName = [packageName]
        for pkg in packageName:
            if self.is_installed(pkg) is False:
                self.logging.info("Package not found. Installing %s..." % pkg)
                install_package(pkg, dependencies=dependencies, 
                        repos=repos)
            else:
                self.logging.info("Package %s found. " % pkg)
                install_package(pkg, dependencies=dependencies, 
                        repos=repos)
        self.update()

    def install(self, pkg, require=None, update=True, reinstall=False):
        """install a package automatically scanning CRAN and biocLite repos

        if require is not set and update is True, when a newest version of a package
        is available, it is installed

        """
        from easydev import to_list
        pkgs = to_list(pkg)
        for pkg in pkgs:
            self._install(pkg, require=require, update=update, reinstall=reinstall)

    def _install(self, pkg, require=None, update=update, reinstall=False):
        # LOCAL file
        if self._isLocal(pkg):
            # if a local file, we do not want to jump to biocLite or CRAN. Let
            # us install it directly. We cannot check version yet so we will
            # overwrite what is already installed
            self.logging.warning("Installing from source")
            self._install_package(pkg)
            return

        # From CRAN
        if self.is_installed(pkg):
            currentVersion = self.get_package_version(pkg)
            # if not provided, require should be the latest version
            if require is None and update is True:
                try:
                    require = self.get_package_latest_version(pkg)
                except:
                    # a non-cran package (bioclite maybe)
                    pass

            if require is None:
                self.logging.info("%s already installed with version %s" % \
                    (pkg, currentVersion))
                return
            
            # if require is not none, is it the required version ?
            if self._get_version(currentVersion) >= self._get_version(require) and reinstall is False:
                self.logging.info("%s already installed with required version %s" \
                    % (pkg, currentVersion))
                # if so, nothing to do
            else:
                # Try updating
                self.logging.info("Updating")
                self._install_package(pkg)
                if require is None:
                    return
                currentVersion = self.get_package_version(pkg)
                if self._get_version(currentVersion) < self._get_version(require):
                    self.logging.warning("%s installed but current version (%s) does not fulfill your requirement" % \
                        (pkg, currentVersion))

        elif pkg in self.available.index:
            self._install_package(pkg)
        else:
            # maybe a biocLite package ?
            # require is ignored. The latest will be installed
            self.logging.info("Trying to find the package on bioconductor")
            self.biocLite(pkg)
            if require is None:
                return
            currentVersion = self.get_package_version(pkg)
            if self._get_version(currentVersion) >= self._get_version(require):
                self.logging.warning("%s installed but version is %s too small (even after update)" % \
                    (pkg, currentVersion, require))

    def _get_version(self, version):
        # some pacakge do not use the correct version convention
        try:
            return StrictVersion(version)
        except:
            try:
                return StrictVersion(version.replace("-", "a"))
            except:
                # snowfall package example was 1.86-6.1
                # This becomes 1.86a61  which is not great but not workaround
                # for now
                left, right = version.split("-")
                version = left + "a" + right.replace('.', '')
                return StrictVersion(version)

    def is_installed(self, pkg_name):
        if pkg_name in self.installed.index:
            return True
        else:
            return False
Example #12
0
class Service(object):
    """Base class for WSDL and REST classes

    .. seealso:: :class:`REST`, :class:`WSDLService`
    """

    #: some useful response codes
    response_codes = {
        200: 'OK',
        201: 'Created',
        400: 'Bad Request. There is a problem with your input',
        404: 'Not found. The resource you requests does not exist',
        405: 'Method not allowed',
        406: "Not Acceptable. Usually headers issue",
        410: 'Gone. The resource you requested was removed.',
        415: "Unsupported Media Type",
        500: 'Internal server error. Most likely a temporary problem',
        503:
        'Service not available. The server is being updated, try again later'
    }

    def __init__(self, name, url=None, verbose=True, requests_per_sec=10):
        """.. rubric:: Constructor

        :param str name: a name for this service
        :param str url: its URL
        :param bool verbose: prints informative messages if True (default is
            True)
        :param requests_per_sec: maximum number of requests per seconds
            are restricted to 3. You can change that value. If you reach the
            limit, an error is raise. The reason for this limitation is
            that some services (e.g.., NCBI) may black list you IP.
            If you need or can do more (e.g., ChEMBL does not seem to have
            restrictions), change the value. You can also have several instance
            but again, if you send too many requests at the same, your future
            requests may be retricted. Currently implemented for REST only


        All instances have an attribute called :attr:`~Service.logging` that
        is an instanceof the :mod:`logging` module. It can be used to print
        information, warning, error messages::

            self.logging.info("informative message")
            self.logging.warning("warning message")
            self.logging.error("error message")

        The attribute :attr:`~Service.debugLevel`  can be used to set the behaviour
        of the logging messages. If the argument verbose is True, the debugLebel
        is set to INFO. If verbose if False, the debugLevel is set to WARNING.
        However, you can use the :attr:`debugLevel` attribute to change it to
        one of DEBUG, INFO, WARNING, ERROR, CRITICAL. debugLevel=WARNING means
        that only WARNING, ERROR and CRITICAL messages are shown.

        """
        super(Service, self).__init__()
        self.requests_per_sec = requests_per_sec
        self.name = name
        self.logging = Logging("bioservices:%s" % self.name, verbose)

        self._url = url
        try:
            if self.url is not None:
                urlopen(self.url)
        except Exception as err:
            self.logging.warning("The URL (%s) provided cannot be reached." %
                                 self.url)
        self._easyXMLConversion = True

        # used by HGNC where some XML contains non-utf-8 characters !!
        # should be able to fix it with requests once HGNC works again
        #self._fixing_unicode = False
        #self._fixing_encoding = "utf-8"

        self.devtools = DevTools()
        self.settings = BioServicesConfig()

        self._last_call = 0

    def _calls(self):
        time_lapse = 1. / self.requests_per_sec
        current_time = time.time()
        dt = current_time - self._last_call

        if self._last_call == 0:
            self._last_call = current_time
            return
        else:
            self._last_call = current_time
            if dt > time_lapse:
                return
            else:
                time.sleep(time_lapse - dt)

    def _get_caching(self):
        return self.settings.params['cache.on'][0]

    def _set_caching(self, caching):
        self.devtools.check_param_in_list(caching, [True, False])
        self.settings.params['cache.on'][0] = caching
        # reset the session, which will be automatically created if we
        # access to the session attribute
        self._session = None

    CACHING = property(_get_caching, _set_caching)

    def _get_url(self):
        return self._url

    def _set_url(self, url):
        # something more clever here to check the URL e.g. starts with http
        if url is not None:
            url = url.rstrip("/")
            self._url = url

    url = property(_get_url, _set_url, doc="URL of this service")

    def _get_easyXMLConversion(self):
        return self._easyXMLConversion

    def _set_easyXMLConversion(self, value):
        if isinstance(value, bool) is False:
            raise TypeError("value must be a boolean value (True/False)")
        self._easyXMLConversion = value

    easyXMLConversion = property(
        _get_easyXMLConversion,
        _set_easyXMLConversion,
        doc=
        """If True, xml output from a request are converted to easyXML object (Default behaviour)."""
    )

    def easyXML(self, res):
        """Use this method to convert a XML document into an
            :class:`~bioservices.xmltools.easyXML` object

        The easyXML object provides utilities to ease access to the XML
        tag/attributes.

        Here is a simple example starting from the following XML

        .. doctest::

            >>> from bioservices import *
            >>> doc = "<xml> <id>1</id> <id>2</id> </xml>"
            >>> s = Service("name")
            >>> res = s.easyXML(doc)
            >>> res.findAll("id")
            [<id>1</id>, <id>2</id>]

        """
        from bioservices import xmltools
        return xmltools.easyXML(res)

    def __str__(self):
        txt = "This is an instance of %s service" % self.name
        return txt

    def pubmed(self, Id):
        """Open a pubmed Id into a browser tab

        :param Id: a valid pubmed Id in string or integer format.

        The URL is a concatenation of the pubmed URL
        http://www.ncbi.nlm.nih.gov/pubmed/ and the provided Id.

        """
        url = "http://www.ncbi.nlm.nih.gov/pubmed/"
        import webbrowser
        webbrowser.open(url + str(Id))

    def on_web(self, url):
        """Open a URL into a browser"""
        import webbrowser
        webbrowser.open(url)

    def save_str_to_image(self, data, filename):
        """Save string object into a file converting into binary"""
        with open(filename, 'wb') as f:
            import binascii
            try:
                #python3
                newres = binascii.a2b_base64(bytes(data, "utf-8"))
            except:
                newres = binascii.a2b_base64(data)
            f.write(newres)
Example #13
0
class Service(object):
    """Base class for WSDL and REST classes

    .. seealso:: :class:`REST`, :class:`WSDLService`
    """

    #: some useful response codes
    response_codes = {
        200: 'OK',
        201: 'Created',
        400: 'Bad Request. There is a problem with your input',
        404: 'Not found. The resource you requests does not exist',
        405: 'Method not allowed',
        406: "Not Acceptable. Usually headers issue",
        410:  'Gone. The resource you requested was removed.',
        415: "Unsupported Media Type",
        500: 'Internal server error. Most likely a temporary problem',
        503: 'Service not available. The server is being updated, try again later'
        }

    def __init__(self, name, url=None, verbose=True, requests_per_sec=3):
        """.. rubric:: Constructor

        :param str name: a name for this service
        :param str url: its URL
        :param bool verbose: prints informative messages if True (default is
            True)
        :param requests_per_sec: maximum number of requests per seconds
            are restricted to 3. You can change that value. If you reach the
            limit, an error is raise. The reason for this limitation is
            that some services (e.g.., NCBI) may black list you IP. 
            If you need or can do more (e.g., ChEMBL does not seem to have
            restrictions), change the value. You can also have several instance
            but again, if you send too many requests at the same, your future
            requests may be retricted. Currently implemented for REST only


        All instances have an attribute called :attr:`~Service.logging` that
        is an instanceof the :mod:`logging` module. It can be used to print
        information, warning, error messages::

            self.logging.info("informative message")
            self.logging.warning("warning message")
            self.logging.error("error message")

        The attribute :attr:`~Service.debugLevel`  can be used to set the behaviour
        of the logging messages. If the argument verbose is True, the debugLebel
        is set to INFO. If verbose if False, the debugLevel is set to WARNING.
        However, you can use the :attr:`debugLevel` attribute to change it to
        one of DEBUG, INFO, WARNING, ERROR, CRITICAL. debugLevel=WARNING means
        that only WARNING, ERROR and CRITICAL messages are shown.

        """
        super(Service, self).__init__()
        self.requests_per_sec = requests_per_sec

        self.name = name
        self.logging = Logging("bioservices:%s" % self.name, verbose)

        self._url = url
        try:
            if self.url is not None:
                urlopen(self.url)
        except Exception as err:
            self.logging.warning("The URL (%s) provided cannot be reached." % self.url)
        self._easyXMLConversion = True

        # used by HGNC where some XML contains non-utf-8 characters !!
        # should be able to fix it with requests once HGNC works again
        #self._fixing_unicode = False
        #self._fixing_encoding = "utf-8"

        self.devtools = DevTools()
        self.settings = BioServicesConfig()

    def _get_caching(self):
        return self.settings.params['cache.on'][0]
    def _set_caching(self, caching):
        self.devtools.check_param_in_list(caching, [True, False])
        self.settings.params['cache.on'][0] = caching
        # reset the session, which will be automatically created if we
        # access to the session attribute
        self._session = None
    CACHING = property(_get_caching, _set_caching)

    def _get_url(self):
        return self._url

    def _set_url(self, url):
        # something more clever here to check the URL e.g. starts with http
        if url is not None:
            url = url.rstrip("/")
            self._url = url
    url = property(_get_url, _set_url, doc="URL of this service")

    def _get_easyXMLConversion(self):
        return self._easyXMLConversion

    def _set_easyXMLConversion(self, value):
        if isinstance(value, bool) is False:
            raise TypeError("value must be a boolean value (True/False)")
        self._easyXMLConversion = value
    easyXMLConversion = property(_get_easyXMLConversion,
            _set_easyXMLConversion, 
            doc="""If True, xml output from a request are converted to easyXML object (Default behaviour).""")

    def easyXML(self, res):
        """Use this method to convert a XML document into an
            :class:`~bioservices.xmltools.easyXML` object

        The easyXML object provides utilities to ease access to the XML
        tag/attributes.

        Here is a simple example starting from the following XML

        .. doctest::

            >>> from bioservices import *
            >>> doc = "<xml> <id>1</id> <id>2</id> </xml>"
            >>> s = Service("name")
            >>> res = s.easyXML(doc)
            >>> res.findAll("id")
            [<id>1</id>, <id>2</id>]

        """
        from bioservices import xmltools
        return xmltools.easyXML(res)


    def __str__(self):
        txt = "This is an instance of %s service" % self.name
        return txt

    def pubmed(self, Id):
        """Open a pubmed Id into a browser tab

        :param Id: a valid pubmed Id in string or integer format.

        The URL is a concatenation of the pubmed URL
        http://www.ncbi.nlm.nih.gov/pubmed/ and the provided Id.

        """
        url = "http://www.ncbi.nlm.nih.gov/pubmed/"
        import webbrowser
        webbrowser.open(url + str(Id))

    def on_web(self, url):
        """Open a URL into a browser"""
        import webbrowser
        webbrowser.open(url)

    def save_str_to_image(self, data, filename):
        """Save string object into a file converting into binary"""
        with open(filename,'wb') as f:
            import binascii
            try:
                #python3
                newres = binascii.a2b_base64(bytes(data, "utf-8"))
            except:
                newres = binascii.a2b_base64(data)
            f.write(newres)
Example #14
0
def test_logging():

    l = Logging("INFO")
    l.name = "test"
    l.info("test")
    l.debug("test")
    l.warning("test")
    l.error("test")
    l.critical("test")

    for level in ['DEBUG', 'INFO', 'ERROR', 'WARNING', 'CRITICAL']:
        l.level = level
        assert l.level == level
    l.level = True
    l.level = False
    for x in [10, 20, 30, 40, 50]:
        l.level = x

    try:
        l.level = "WARN"
        assert Fales
    except:
        assert True

    # FIXME is this working ??wierd syntax in loggibg_tools.
    import copy
    copy.copy(l)
    copy.deepcopy(l)