class Tools(object): # Helper class to simplify following code dv = DevTools() def __init__(self, verbose=True): self.verbose = verbose def purple(self, txt, force=False): if self.verbose or force is True: print(purple(txt)) def red(self, txt, force=False): if self.verbose or force is True: print(red(txt)) def green(self, txt, force=False): if self.verbose or force is True: print(green(txt)) def blue(self, txt, force=False): if self.verbose or force is True: print(blue(txt)) def mkdir(self, name): self.dv.mkdir(name)
def __init__(self, organism="H**o sapiens", cache=False): """.. rubric:: Constructor :param str orgamism: the organism to look at. H**o sapiens is the default. Other possible organisms can be found in :attr:`organisms`. """ self.logging = Logging() self.devtools = DevTools() self.webserv = IntactComplex(verbose=verbose, cache=cache) df = self.webserv.search("*", frmt="pandas") self.df = df #: list of valid organisms found in the database self.valid_organisms = list(set(df["organismName"])) self.valid_organisms = [x.split(";")[0] for x in self.valid_organisms] #: list of valid organisms found in the database self.organisms = list(set(df["organismName"])) self._organism = None if organism in self.organisms: self.organism = organism else: print("Organism not set yet. ") # This will populated on request as a cache/buffer self._details = None self._complexes = None
def __init__(self, organism='H**o sapiens', verbose=True, cache=False): """.. rubric:: Constructor :param str orgamism: the organism to look at. H**o sapiens is the default. Other possible organisms can be found in :attr:`organisms`. :param str verbose: a verbose level in ERROR/DEBUG/INFO/WARNING compatible with those used in BioServices. """ super(Complexes, self).__init__(level=verbose) self.devtools = DevTools() self.webserv = IntactComplex(verbose=verbose, cache=cache) df = self.webserv.search('*', frmt='pandas') self.df = df #: list of valid organisms found in the database self.valid_organisms = list(set(df['organismName'])) self.valid_organisms = [x.split(';')[0] for x in self.valid_organisms] #: list of valid organisms found in the database self.organisms = list(set(df['organismName'])) self._organism = None if organism in self.organisms: self.organism = organism else: print("Organism not set yet. ") # This will populated on request as a cache/buffer self._details = None self._complexes = None
def __init__(self, fastq, database, threads=4, output_directory="kraken", dbname=None): """.. rubric:: Constructor :param fastq: either a fastq filename or a list of 2 fastq filenames :param database: the path to a valid Kraken database :param threads: number of threads to be used by Kraken :param output_directory: output filename of the Krona HTML page :param dbname: Description: internally, once Kraken has performed an analysis, reads are associated to a taxon (or not). We then find the correponding lineage and scientific names to be stored within a Krona formatted file. KtImportTex is then used to create the Krona page. """ # Set and create output directory self._devtools = DevTools() self.output_directory = output_directory self._devtools.mkdir(output_directory) self.ka = KrakenAnalysis(fastq, database, threads) if dbname is None: self.dbname = os.path.basename(database) else: self.dbname = dbname
def __init__(self, fastq, database, threads=4): """.. rubric:: Constructor :param fastq: either a fastq filename or a list of 2 fastq filenames :param database: the path to a valid Kraken database :param threads: number of threads to be used by Kraken :param output: output filename of the Krona HTML page :param return: """ self._devtools = DevTools() self._devtools.check_exists(database) self.database = database self.threads = threads # Fastq input if isinstance(fastq, str): self.paired = False self.fastq = [fastq] elif isinstance(fastq, list): if len(fastq) == 2: self.paired = True else: self.paired = False self.fastq = fastq else: raise ValueError( "Expected a fastq filename or list of 2 fastq filenames") for this in self.fastq: self._devtools.check_exists(database)
def __init__(self, pattern="**/summary.json", output_filename=None, verbose=True, **kargs): super().__init__() from sequana import logger logger.level = "INFO" if verbose is False: logger.level = "WARNING" logger.info( "Sequana Summary is still a tool in progress and have been " + " tested with the quality_control pipeline only for now.") self.title = "Sequana multiple summary" self.devtools = DevTools() self.filenames = list(glob.iglob(pattern, recursive=True)) self.summaries = [ReadSummary(filename) for filename in self.filenames] self.projects = [ ReadSummary(filename).data['project'] for filename in self.filenames ] self.create_report_content() self.create_html(output_filename)
def __init__(self, name, url=None, verbose=True, requests_per_sec=10): """.. rubric:: Constructor :param str name: a name for this service :param str url: its URL :param bool verbose: prints informative messages if True (default is True) :param requests_per_sec: maximum number of requests per seconds are restricted to 3. You can change that value. If you reach the limit, an error is raise. The reason for this limitation is that some services (e.g.., NCBI) may black list you IP. If you need or can do more (e.g., ChEMBL does not seem to have restrictions), change the value. You can also have several instance but again, if you send too many requests at the same, your future requests may be retricted. Currently implemented for REST only All instances have an attribute called :attr:`~Service.logging` that is an instanceof the :mod:`logging` module. It can be used to print information, warning, error messages:: self.logging.info("informative message") self.logging.warning("warning message") self.logging.error("error message") The attribute :attr:`~Service.debugLevel` can be used to set the behaviour of the logging messages. If the argument verbose is True, the debugLebel is set to INFO. If verbose if False, the debugLevel is set to WARNING. However, you can use the :attr:`debugLevel` attribute to change it to one of DEBUG, INFO, WARNING, ERROR, CRITICAL. debugLevel=WARNING means that only WARNING, ERROR and CRITICAL messages are shown. """ super(Service, self).__init__() self.requests_per_sec = requests_per_sec self.name = name self.logging = Logging("bioservices:%s" % self.name, verbose) self._url = url try: if self.url is not None: urlopen(self.url) except Exception as err: self.logging.warning("The URL (%s) provided cannot be reached." % self.url) self._easyXMLConversion = True # used by HGNC where some XML contains non-utf-8 characters !! # should be able to fix it with requests once HGNC works again #self._fixing_unicode = False #self._fixing_encoding = "utf-8" self.devtools = DevTools() self.settings = BioServicesConfig() self._last_call = 0
def init(self): # mkdir library self.library_path = self.dbname + os.sep + "library" self.taxon_path = self.dbname + os.sep + "taxonomy" self.fasta_path = self.library_path + os.sep + "added" self._devtools = DevTools() self._devtools.mkdir(self.dbname) self._devtools.mkdir(self.library_path) self._devtools.mkdir(self.fasta_path) self._devtools.mkdir(self.taxon_path)
def _download_minikraken(self, verbose=True): dv = DevTools() base = sequana_config_path + os.sep + "" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) logger.info("Downloading minikraken (4Gb)") filename = base + os.sep + "minikraken.tgz" if os.path.exists(filename) and md5(filename) == "30eab12118158d0b31718106785195e2": logger.warning("%s already present" % filename) else: wget("https://ccb.jhu.edu/software/kraken/dl/minikraken.tgz", filename)
def __init__(self): dv = DevTools() self.base = sequana_config_path + os.sep + "busco" dv.mkdir(self.base) self.filenames = sorted([ "bacteria_odb9", "proteobacteria_odb9", "rhizobiales_odb9", "betaproteobacteria_odb9", "gammaproteobacteria_odb9", "enterobacteriales_odb9", "deltaepsilonsub_odb9", "actinobacteria_odb9", "cyanobacteria_odb9", "firmicutes_odb9", "clostridia_odb9", "lactobacillales_odb9", "bacillales_odb9", "bacteroidetes_odb9", "spirochaetes_odb9", "tenericutes_odb9", "eukaryota_odb9", "fungi_odb9", "microsporidia_odb9", "dikarya_odb9", "ascomycota_odb9", "pezizomycotina_odb9", "eurotiomycetes_odb9", "sordariomyceta_odb9", "saccharomyceta_odb9", "saccharomycetales_odb9", "basidiomycota_odb9", "metazoa_odb9", "nematoda_odb9", "arthropoda_odb9", "insecta_odb9", "endopterygota_odb9", "hymenoptera_odb9", "diptera_odb9", "vertebrata_odb9", "actinopterygii_odb9", "tetrapoda_odb9", "aves_odb9", "mammalia_odb9", "euarchontoglires_odb9", "laurasiatheria_odb9", "embryophyta_odb9", "protists_ensembl", "alveolata_stramenophiles_ensembl" ])
def _download_kraken_toydb(self, verbose=True): """Download the kraken DB toy example from sequana_data into .config/sequana directory Checks the md5 checksums. About 32Mb of data """ dv = DevTools() base = sequana_config_path + os.sep + "kraken_toydb" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) baseurl = "https://github.com/sequana/data/raw/master/" # download only if required logger.info("Downloading the database into %s" % base) md5sums = [ "28661f8baf0514105b0c6957bec0fc6e", "97a39d44ed86cadea470352d6f69748d", "d91a0fcbbc0f4bbac918755b6400dea6", "c8bae69565af2170ece194925b5fdeb9" ] filenames = [ "database.idx", "database.kdb", "taxonomy/names.dmp", "taxonomy/nodes.dmp" ] for filename, md5sum in zip(filenames, md5sums): url = baseurl + "kraken_toydb/%s" % filename filename = base + os.sep + filename if os.path.exists(filename) and md5(filename) == md5sum: logger.warning("%s already present" % filename) else: logger.info("Downloading %s" % url) wget(url, filename)
def main(args=None): if args is None: args = sys.argv[:] user_options = Options(prog="sequana") # If --help or no options provided, show the help if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) logger.level = options.level if options.update_taxonomy is True: from sequana.taxonomy import Taxonomy tax = Taxonomy() from sequana import sequana_config_path as cfg logger.info( "Will overwrite the local database taxonomy.dat in {}".format(cfg)) tax.download_taxonomic_file(overwrite=True) sys.exit(0) # We put the import here to make the --help faster from sequana import KrakenPipeline from sequana.kraken import KrakenSequential devtools = DevTools() if options.download: from sequana import KrakenDownload kd = KrakenDownload() kd.download(options.download) sys.exit() fastq = [] if options.file1: devtools.check_exists(options.file1) fastq.append(options.file1) if options.file2: devtools.check_exists(options.file2) fastq.append(options.file2) from sequana import sequana_config_path as scfg if options.databases is None: logger.critical("You must provide a database") sys.exit(1) databases = [] for database in options.databases: if database == "toydb": database = "kraken_toydb" elif database == "minikraken": database = "minikraken_20141208" if os.path.exists(scfg + os.sep + database): # in Sequana path databases.append(scfg + os.sep + database) elif os.path.exists(database): # local database databases.append(database) else: msg = "Invalid database name (%s). Neither found locally " msg += "or in the sequana path %s; Use the --download option" raise ValueError(msg % (database, scfg)) output_directory = options.directory + os.sep + "kraken" devtools.mkdirs(output_directory) # if there is only one database, use the pipeline else KrakenHierarchical _pathto = lambda x: "{}/kraken/{}".format(options.directory, x) if x else x if len(databases) == 1: logger.info("Using 1 database") k = KrakenPipeline(fastq, databases[0], threads=options.thread, output_directory=output_directory, confidence=options.confidence) k.run(output_filename_classified=_pathto(options.classified_out), output_filename_unclassified=_pathto(options.unclassified_out)) else: logger.info("Using %s databases" % len(databases)) k = KrakenSequential(fastq, databases, threads=options.thread, output_directory=output_directory + os.sep, force=True, keep_temp_files=options.keep_temp_files, output_filename_unclassified=_pathto( options.unclassified_out), confidence=options.confidence) k.run(output_prefix="kraken") # This statements sets the directory where HTML will be saved from sequana.utils import config config.output_dir = options.directory # output_directory first argument: the directory where to find the data # output_filename is relative to the config.output_dir defined above kk = KrakenModule(output_directory, output_filename="summary.html") logger.info("Open ./%s/summary.html" % options.directory) logger.info("or ./%s/kraken/kraken.html" % options.directory) if options.html is True: ss.onweb()
# source # http://nbviewer.ipython.org/github/tritemio/notebooks/blob/master/Mixture_Model_Fitting.ipynb from easydev import DevTools devtools = DevTools() from scipy.optimize import minimize, show_options import scipy.stats as ss import numpy as np import pylab from easydev import AttrDict from . import criteria import numpy as np half_log_two_pi = 0.5 * np.log(2 * np.pi) class Model(object): """New base model""" def __init__(self): pass def log_density(self, data): raise NotImplementedError def estimate(self, data, weights): raise NotImplementedError def generate(self): raise NotImplementedError
class KrakenDownload(object): """Utility to download Kraken DB and place them in a local directory :: from sequana import KrakenDownload kd = KrakenDownload() kd.download('toydb') kd.download('minikraken') A large database (8Gb) is available on synapse and has the following DOI:: doi:10.7303/syn6171000 It can be downloaded manually or if you have a Synapse login (https://www.synapse.org), you can use:: from sequana import KrakenDownload kd = KrakenDownload() kd.downloaded("sequana_db1") """ dv = DevTools() def download(self, name, verbose=True): if name == "minikraken": self._download_minikraken(verbose=verbose) elif name == "toydb": self._download_kraken_toydb(verbose=verbose) elif name == "sequana_db1": self._download_sequana_db1(verbose=verbose) else: raise ValueError( "name must be toydb or minikraken, or sequana_db1") def _download_kraken_toydb(self, verbose=True): """Download the kraken DB toy example from sequana_data into .config/sequana directory Checks the md5 checksums. About 32Mb of data """ dv = DevTools() base = sequana_config_path + os.sep + "kraken_toydb" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) baseurl = "https://github.com/sequana/data/raw/master/" # download only if required logger.info("Downloading the database into %s" % base) md5sums = [ "28661f8baf0514105b0c6957bec0fc6e", "97a39d44ed86cadea470352d6f69748d", "d91a0fcbbc0f4bbac918755b6400dea6", "c8bae69565af2170ece194925b5fdeb9" ] filenames = [ "database.idx", "database.kdb", "taxonomy/names.dmp", "taxonomy/nodes.dmp" ] for filename, md5sum in zip(filenames, md5sums): url = baseurl + "kraken_toydb/%s" % filename filename = base + os.sep + filename if os.path.exists(filename) and md5(filename) == md5sum: logger.warning("%s already present" % filename) else: logger.info("Downloading %s" % url) wget(url, filename) def _download_minikraken(self, verbose=True): dv = DevTools() base = sequana_config_path + os.sep + "" taxondir = base + os.sep + "taxonomy" dv.mkdir(base) dv.mkdir(taxondir) logger.info("Downloading minikraken (4Gb)") filename = base + os.sep + "minikraken.tgz" if os.path.exists(filename) and md5( filename) == "30eab12118158d0b31718106785195e2": logger.warning("%s already present" % filename) else: wget("https://ccb.jhu.edu/software/kraken/dl/minikraken.tgz", filename) # unzipping. requires tar and gzip def _download_from_synapse(self, synid, target_dir): try: from synapseclient import Synapse except ImportError: raise ImportError( "Please install synapseclient using 'pip install synapseclient'" ) try: self._synapse.get(synid, downloadLocation=target_dir) except: self._synapse = Synapse() self._synapse.login() self._synapse.get(synid, downloadLocation=target_dir) def _download_sequana_db1(self, verbose=True): dbname = "sequana_db1" from easydev import md5 dir1 = sequana_config_path + os.sep + dbname dir2 = dir1 + os.sep + "taxonomy" self.dv.mkdir(dir1) self.dv.mkdir(dir2) logger.info( "Downloading about 8Gb of data (if not already downloaded) from" " Synapse into %s" % dir1) from os.path import exists filename = dir1 + "ena_list.txt" if exists(filename) and md5( filename) == "a9cc6268f3338d1632c4712a412593f2": pass else: self._download_from_synapse('syn6171700', dir1) # database.idx filename = dir1 + "database.idx" if exists(filename) and md5( filename) == "2fa4a99a4f52f2f04c5a965adb1534ac": pass else: self._download_from_synapse('syn6171017', dir1) # database.kdb ; this one is large (8Gb) filename = dir1 + "database.kdb" if exists(filename) and md5( filename) == "ff698696bfc88fe83bc201937cd9cbdf": pass else: self._download_from_synapse('syn6171107', dir1) # Then, the taxonomy directory filename = dir1 + "names.dmp" if exists(filename) and md5( filename) == "10bc7a63c579de02112d125a51fd65d0": pass else: self._download_from_synapse('syn6171286', dir2) filename = dir1 + "nodes.dmp" if exists(filename) and md5( filename) == "a68af5a60434e2067c4a0a16df873980": pass else: self._download_from_synapse('syn6171289', dir2) filename = dir1 + "taxons.txt" if exists(filename) and md5( filename) == "e78fbb43b3b41cbf4511d6af16c0287f": pass else: self._download_from_synapse('syn6171290', dir2) logger.info('done. You should have a kraken DB in %s' % dir1) # The annotations wget( "https://github.com/sequana/data/raw/master/sequana_db1/annotations.csv", dir1 + os.sep + "annotations.csv")
def scoring(args=None): """This function is used by the standalone application called dreamscoring :: dreamscoring --help """ d = DevTools() if args is None: args = sys.argv[:] user_options = Options(prog="dreamtools") if len(args) == 1: user_options.parse_args(["prog", "--help"]) else: options = user_options.parse_args(args[1:]) if options.version is True: print("%s" % dreamtools.version) sys.exit() # Check on the challenge name if options.challenge is None: print_color('--challenge must be provided', red) sys.exit() else: options.challenge = options.challenge.upper() options.challenge = options.challenge.replace('DOT', 'dot') from dreamtools.admin.download_data import get_challenge_list if options.challenge not in get_challenge_list(): print_color( "This challenge %s is not registered in dreamtools." % options.challenge, red) print("Here is the list of registered challenges: " + ", ".join(get_challenge_list())) sys.exit() # Check that the challenge can be loaded class_inst = get_challenge(options.challenge) try: this = class_inst.import_scoring_class() except NotImplementedError as err: print("\n" + str(err)) sys.exit() else: # User may just request some information about the challenge. if options.info is True: print(this) sys.exit() elif options.onweb is True: this.onweb() sys.exit() # Checks name of the sub-challenges subchallenges = get_subchallenges(options.challenge) if len(subchallenges) and options.sub_challenge is None: txt = "This challenge requires a sub challenge name. " txt += "Please use --sub-challenge followed by one value in %s " % subchallenges print_color(txt, red) sys.exit(0) if options.sub_challenge is not None and len(subchallenges) != 0: try: d.check_param_in_list(options.sub_challenge, subchallenges) except ValueError as err: txt = "DREAMTools error: unknown sub challenge or not implemented" txt += "--->" + str(err) print_color(txt, red) sys.exit() # maybe users just need a template if options.download_template is True: c = Challenge(options.challenge) class_inst = c.import_scoring_class() if options.sub_challenge is None: print(class_inst.download_template()) else: print(class_inst.download_template(options.sub_challenge)) return # similary for the GS if options.download_goldstandard is True: c = Challenge(options.challenge) class_inst = c.import_scoring_class() if options.sub_challenge is None: print(class_inst.download_goldstandard()) else: print(class_inst.download_goldstandard(options.sub_challenge)) return # finally, we need a submission if options.filename is None: txt = "---> filename not provided. You must provide a filename with correct format\n" txt += "You may get a template using --download-template \n" txt += "Alternatively, you can user either --info or --onweb option to get information about the challenge.\n" txt += "https://github.com/dreamtools/dreamtools, or http://dreamchallenges.org\n" print_color(txt, red) sys.exit() # filename # filename in general is a single string but could be a list of filenames # Because on the parser, we must convert the string into a single string # if the list haa a length of 1 for filename in options.filename: if os.path.exists(filename) is False: raise IOError("file %s does not seem to exists" % filename) if len(options.filename) == 1: options.filename = options.filename[0] print_color("DREAMTools scoring", purple, underline=True) print('Challenge %s (sub challenge %s)\n\n' % (options.challenge, options.sub_challenge)) res = generic_scoring(options.challenge, options.filename, subname=options.sub_challenge, goldstandard=options.goldstandard) txt = "Solution for %s in challenge %s" % (options.filename, options.challenge) if options.sub_challenge is not None: txt += " (sub-challenge %s)" % options.sub_challenge txt += " is :\n" for k in sorted(res.keys()): txt += darkgreen(" %s:\n %s\n" % (k, res[k])) print(txt)