Esempio n. 1
0
 def __init__(
     self,
     query_path,  # The input sequences
     db_path=pfam.hmm_db,  # The database to search
     seq_type='prot' or 'nucl',  # The seq type of the query_path file
     e_value=0.001,  # The search threshold
     params=None,  # Add extra params for the command line
     out_path=None,  # Where the results will be dropped
     executable=None,  # If you want a specific binary give the path
     cpus=None):  # The number of threads to use
     # Save attributes #
     self.query = FASTA(query_path)
     self.db = FilePath(db_path)
     self.params = params if params else {}
     self.e_value = e_value
     self.seq_type = seq_type
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else: self.cpus = cpus
     # Auto detect database short name #
     if db_path == 'pfam': self.db = pfam.hmm_db
     if db_path == 'tigrfam': self.db = tigrfam.hmm_db
     # Output #
     if out_path is None:
         self.out_path = FilePath(self.query.prefix_path + '.hmmout')
     elif out_path.endswith('/'):
         self.out_path = FilePath(out_path + self.query.prefix + '.hmmout')
     else:
         self.out_path = FilePath(out_path)
Esempio n. 2
0
 def save(self, **kw):
     # Load #
     df = self.df.copy()
     # Modify the index name#
     if self.capital_index and df.index.name is not None:
         df.index.name = df.index.name.capitalize()
     # Modify column names #
     if self.upper_columns: df.columns = df.columns.str.upper()
     # Possibility to overwrite path #
     if 'path' in kw: path = FilePath(kw['path'])
     else:            path = self.path
     # Special cases for float formatting #
     if self.float_format_tex == 'split_thousands':
         self.float_format_tex = self.split_thousands
     # Make sure the directory exists #
     self.base_dir.create_if_not_exists()
     # Latex version #
     if 'tex' in self.formats:
         df.to_latex(str(path),
                     float_format  = self.float_format_tex,
                     na_rep        = self.na_rep,
                     index         = self.index,
                     bold_rows     = self.bold_rows,
                     column_format = self.column_format,
                     escape        = self.escape_tex)
     # CSV version (plain text) #
     if 'csv' in self.formats:
         path = path.replace_extension('csv')
         df.to_csv(str(path),
                   float_format = self.float_format_csv,
                   index        = self.index)
     # Return the path #
     return path
Esempio n. 3
0
 def files_to_retrieve(self):
     """The files we want to download with their destinations."""
     result = OrderedDict()
     result[self.base_url + "protein.sequences.v9.1.fa.gz"] = FilePath(
         self.p.raw_proteins)
     result[self.base_url + "COG.mappings.v9.1.txt.gz"] = FilePath(
         self.p.raw_mappings)
     return result
Esempio n. 4
0
 def overwrite_cache(self, value):
     # Where should we look in the file system ? #
     if 'cache_dir' in self.__dict__:
         path = FilePath(self.__dict__['cache_dir'] + f.func_name + '.pickle')
     else:
         path = getattr(self.p, f.func_name)
     if value is None: path.remove()
     else: raise Exception("You can't set a pickled property, you can only delete it")
Esempio n. 5
0
 def files_to_retrieve(self):
     """The files we want to download with their destinations."""
     if hasattr(self, "pattern"):
         files = self.ftp.listdir(self.ftp.curdir)
         files.sort(key=natural_sort)
         return OrderedDict((f, FilePath(self.autopaths.raw_dir + f))
                            for f in files
                            if fnmatch.fnmatch(f, self.pattern))
     if hasattr(self, "files"):
         return OrderedDict(
             (f, FilePath(self.autopaths.raw_dir + f)) for f in self.files)
Esempio n. 6
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir  = data_dir + self.short_name + '/'
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.url = self.base_url + self.base_name + ".tgz"
     # Location of zip file locally #
     self.dest = self.autopaths.tgz
     # The results after download #
     prefix = self.base_dir + self.base_name + '/' + self.base_name
     self.alignment = FilePath(prefix + ".fasta")
     self.taxonomy  = FilePath(prefix + ".tax")
Esempio n. 7
0
def check_setup_py(path_of_setup):
    """
    Parses the required modules from a `setup.py` file and checks they are
    importable and have the minimum required version installed.

    Some ideas for extracting dependency information from a `setup.py` file:
    https://stackoverflow.com/questions/24236266/

    Instead let's try the `parsesetup` package.
    Note: The code in the setup.py will be evaluated.

    Other interesting projects:
    https://pypi.org/project/requirements-parser/

    Typically you can use this function like this:

        >>> from plumbing.dependencies import check_setup_py
        >>> check_setup_py('~/module_name/setup.py')
    """
    # First let's check we have that module #
    check_module('parsesetup')
    import parsesetup
    # Parse it #
    from autopaths.file_path import FilePath
    path_of_setup = FilePath(path_of_setup)
    # Run it #
    setup_args = parsesetup.parse_setup(path_of_setup, trusted=True)
    requires = setup_args.get('install_requires', [])
    # Parse it #
    requires = [re.split(r'==|>=', req) for req in requires]
    requires = [req if len(req) == 2 else (req[0], None) for req in requires]
    requires = dict(requires)
    # Loop #
    for package, version in requires.items():
        check_module(package, version)
Esempio n. 8
0
 def build_tree_raxml(self,
                new_path    = None,
                seq_type    = 'nucl' or 'prot',
                num_threads = None,
                free_cores  = 2,
                keep_dir    = False):
     """Make a tree with RAxML."""
     # Check output #
     if new_path is None: new_path = self.prefix_path + '.tree'
     # What model to choose #
     if seq_type == 'nucl': model = "GTRGAMMA"
     if seq_type == 'prot': model = "PROTGAMMAJTTF"
     # Threads #
     if num_threads is None: num_threads = multiprocessing.cpu_count() - free_cores
     else:                   num_threads = int(num_threads) - free_cores
     num_threads = max(1, num_threads)
     # Run it #
     temp_dir = new_temp_dir()
     sh.raxml811('-m', model, "-T", num_threads, '-p', 1, '-s', self.path, '-n', 'tree', '-w', temp_dir, '-f', 'a', '-x', 1, '-N', 'autoMR')
     # Move into place #
     if keep_dir:
         shutil.rmtree(new_path)
         shutil.move(temp_dir, new_path)
     if not keep_dir:
         shutil.move(temp_dir + 'RAxML_bestTree.tree', new_path)
     # Return #
     return FilePath(new_path)
Esempio n. 9
0
 def set_defaults(self):
     """
     This method will replace empty attributes with defaults when this is
     needed.
     """
     # In case we got a special object, just use the blast_db attribute #
     if self.algorithm == 'blast' and hasattr(self.database, 'blast_db'):
         self.database = self.database.blast_db
     if self.algorithm == 'vsearch' and hasattr(self.database,
                                                'vsearch_db'):
         self.database = self.database.vsearch_db
     # Otherwise in case we got a path, convert it to a BLASTdb #
     if self.algorithm == 'blast' and not isinstance(
             self.database, BLASTdb):
         self.database = BLASTdb(self.database)
     # The filtering options #
     if self.filtering is None: self.filtering = {}
     # Output path default value #
     if self.out_path is None:
         self.out_path = self.input_fasta.prefix_path + '.' + \
                         self.algorithm + 'out'
     # Output path setting #
     self.out_path = FilePath(self.out_path)
     # Number of cores default value #
     if self.num_threads is None or self.num_threads is True:
         self.num_threads = min(multiprocessing.cpu_count(), 32)
     # Extra params to be given to the search algorithm #
     if self.params is None: self.params = {}
 def index_bowtie(self):
     """Create an index on the fasta file compatible with bowtie2."""
     # It returns exit code 1 if the fasta is empty #
     assert self
     # Call the bowtie executable #
     sh.bowtie2_build(self.path, self.path)
     return FilePath(self.path + '.1.bt2')
 def to_qual(self, path, verbose=False):
     # Select verbosity #
     import tqdm
     wrapper = tqdm.tqdm if verbose else lambda x: x
     # Do it #
     with open(path, 'w') as handle:
         for r in wrapper(self): SeqIO.write(r, handle, 'qual')
     # Return #
     return FilePath(path)
Esempio n. 12
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir = DirectoryPath(data_dir + self.short_name + '/')
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.ref_url = self.base_url + "gg_13_8_99.refalign.tgz"
     self.tax_url = self.base_url + "gg_13_8_99.taxonomy.tgz"
     # Location of zip file locally #
     self.ref_dest = self.autopaths.alignment
     self.tax_dest = self.autopaths.taxonomy
     # The results after download #
     self.alignment = self.base_dir + "gg_13_8_99.refalign"
     self.taxonomy = self.base_dir + "gg_13_8_99.gg.tax"
     # Make them FilePaths objects #
     self.alignment = FilePath(self.alignment)
     self.taxonomy = FilePath(self.taxonomy)
Esempio n. 13
0
 def __init__(self, data_dir=None):
     # The directory that contains all databases #
     if data_dir is None: data_dir = home + 'databases/'
     # Base directory for paths #
     self.base_dir = data_dir + self.short_name + '/'
     self.autopaths = AutoPaths(self.base_dir, self.all_paths)
     # Location of zip file remotely #
     self.url = self.base_url + "silva.nr_v%s.tgz" % self.version
     # Location of zip file locally #
     self.dest = self.autopaths.tgz
     # The results after download #
     self.alignment = self.base_dir + "silva.nr_v%s.align"
     self.taxonomy = self.base_dir + "silva.nr_v%s.tax"
     # Make them FilePaths objects #
     self.alignment = FilePath(self.alignment % self.version)
     self.taxonomy = FilePath(self.taxonomy % self.version)
     # The part that mothur will use for naming files #
     self.nickname = "nr_v%s" % self.version
Esempio n. 14
0
 def __init__(self,
              query_path,
              db_path,
              seq_type     = 'prot' or 'nucl',     # The seq type of the query_path file
              params       = None,                 # Add extra params for the command line
              algorithm    = "blastn" or "blastp", # Will be auto-determined with seq_type
              out_path     = None,                 # Where the results will be dropped
              executable   = None,                 # If you want a specific binary give the path
              cpus         = None,                 # The number of threads to use
              num          = None,                 # When parallelized, the number of this thread
              _out         = None,                 # Store the stdout at this path
              _err         = None):                # Store the stderr at this path
     # Main input #
     self.query = FASTA(query_path)
     # The database to search against #
     self.db = FilePath(db_path)
     # Other attributes #
     self.seq_type     = seq_type
     self.algorithm    = algorithm
     self.num          = num
     self.params       = params if params else {}
     # The standard output and error #
     self._out         = _out
     self._err         = _err
     # Output defaults #
     if out_path is None:
         self.out_path = self.query.prefix_path + self.extension
     elif out_path.endswith('/'):
         self.out_path = out_path + self.query.prefix + self.extension
     else:
         self.out_path = out_path
     # Make it a file path #
     self.out_path = FilePath(self.out_path)
     # Executable #
     self.executable = FilePath(executable)
     # Cores to use #
     if cpus is None: self.cpus = min(multiprocessing.cpu_count(), 32)
     else:            self.cpus = cpus
     # Save the output somewhere #
     if self._out is True:
         self._out = self.out_path + '.stdout'
     if self._err is True:
         self._err = self.out_path + '.stderr'
Esempio n. 15
0
 def __init__(self, version, base_dir=None):
     # Attributes #
     self.version = version
     self.short_name = self.short_name + "_" + self.version
     # Base directory #
     if base_dir is None: base_dir = home
     self.base_dir = base_dir + 'databases/' + self.short_name + '/'
     self.p = AutoPaths(self.base_dir, self.all_paths)
     # URL #
     self.url = self.base_url + self.version
     # The archive #
     self.dest = self.p.archive
     # The results #
     self.alignment = FilePath(self.base_dir +
                               "pr_two.gb203_v%s.align" % self.version)
     self.taxonomy = FilePath(self.base_dir +
                              "pr_two.gb203_v%s.tax" % self.version)
     # The part that mothur will use for naming files #
     self.nickname = "gb203_v%s" % self.version
Esempio n. 16
0
 def build_tree_fast(self, new_path=None, seq_type='nucl' or 'prot'):
     """Make a tree with FastTree. Names will be truncated however."""
     # Check output #
     if new_path is None: new_path = self.prefix_path + '.tree'
     # Command #
     command_args = []
     if seq_type == 'nucl': command_args += ['-nt']
     command_args += ['-gamma']
     command_args += ['-out', new_path]
     command_args += [self.path]
     # Run it #
     sh.FastTree(*command_args)
     # Return #
     return FilePath(new_path)
Esempio n. 17
0
 def set_paths(self, base_dir, script_path):
     """Set the directory, the script path and the outfile path"""
     # Make absolute paths #
     if 'change_dir' in self.kwargs:
         self.kwargs['change_dir'] = DirectoryPath(
             os.path.abspath(self.kwargs['change_dir']))
     if 'out_file' in self.kwargs:
         self.kwargs['out_file'] = FilePath(
             os.path.abspath(self.kwargs['out_file']))
     # In case there is a base directory #
     if base_dir is not None:
         self.base_dir = DirectoryPath(os.path.abspath(base_dir))
         self.script_path = FilePath(base_dir + "run." +
                                     self.extensions[self.language])
         self.kwargs['change_dir'] = base_dir
         self.kwargs['out_file'] = FilePath(base_dir + "run.out")
     # Other cases #
     if base_dir is None and script_path is None:
         self.script_path = FilePath(new_temp_path())
     if script_path is not None:
         self.script_path = FilePath(os.path.abspath(script_path))
Esempio n. 18
0
 def mappings(self):
     """The cog mappings."""
     return FilePath(self.p.unzipped_mappings)
Esempio n. 19
0
 def save_plot(self, fig=None, axes=None, **kwargs):
     # Missing figure #
     if fig is None:   fig = pyplot.gcf()
     # Missing axes #
     if axes is None: axes = pyplot.gca()
     # Parameters #
     self.params = {}
     for key in self.default_params:
         if key in kwargs:                          self.params[key] = kwargs[key]
         elif hasattr(self, key):                   self.params[key] = getattr(self, key)
         elif self.default_params[key] is not None: self.params[key] = self.default_params[key]
     # Backwards compatibility #
     if kwargs.get('x_log', False): self.params['x_scale'] = 'symlog'
     if kwargs.get('y_log', False): self.params['y_scale'] = 'symlog'
     # Log #
     if 'x_scale' in self.params: axes.set_xscale(self.params['x_scale'])
     if 'y_scale' in self.params: axes.set_yscale(self.params['y_scale'])
     # Axis limits #
     if 'x_min' in self.params: axes.set_xlim(self.params['x_min'], axes.get_xlim()[1])
     if 'x_max' in self.params: axes.set_xlim(axes.get_xlim()[0], self.params['x_max'])
     if 'y_min' in self.params: axes.set_ylim(self.params['y_min'], axes.get_ylim()[1])
     if 'y_max' in self.params: axes.set_ylim(axes.get_ylim()[0], self.params['y_max'])
     # Minimum delta on axis limits #
     if 'y_lim_min' in self.params:
         top, bottom = axes.get_ylim()
         minimum     = self.params['y_lim_min']
         delta       = top - bottom
         if delta < minimum:
             center = bottom + delta/2
             axes.set_ylim(center - minimum/2, center + minimum/2)
     # Title #
     title = self.params.get('title', False)
     if title: axes.set_title(title)
     # Axes labels  #
     if self.params.get('x_label'): axes.set_xlabel(self.params['x_label'])
     if self.params.get('y_label'): axes.set_ylabel(self.params['y_label'])
     # Set height and width #
     if self.params.get('width'):  fig.set_figwidth(self.params['width'])
     if self.params.get('height'): fig.set_figheight(self.params['height'])
     # Adjust #
     if self.params.get('bottom'):
         fig.subplots_adjust(hspace=0.0, bottom = self.params['bottom'], top   = self.params['top'],
                                         left   = self.params['left'],   right = self.params['right'])
     # Grid #
     if 'x_grid' in self.params: axes.xaxis.grid(self.params['x_grid'])
     if 'y_grid' in self.params: axes.yaxis.grid(self.params['y_grid'])
     # Data and source extra text #
     if hasattr(self, 'dev_mode') and self.dev_mode is True:
         fig.text(0.99, 0.98, time.asctime(), horizontalalignment='right')
         job_name = os.environ.get('SLURM_JOB_NAME', 'Unnamed')
         user_msg = 'user: %s, job: %s' % (getpass.getuser(), job_name)
         fig.text(0.01, 0.98, user_msg, horizontalalignment='left')
     # Nice digit grouping #
     if 'x' in self.params['sep']:
         separate = lambda x,pos: split_thousands(x)
         axes.xaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(separate))
     if 'y' in self.params['sep']:
         separate = lambda y,pos: split_thousands(y)
         axes.yaxis.set_major_formatter(matplotlib.ticker.FuncFormatter(separate))
     # Add custom labels #
     if 'x_labels' in self.params: axes.set_xticklabels(self.params['x_labels'])
     if 'x_labels_rot' in self.params: pyplot.setp(axes.xaxis.get_majorticklabels(), rotation=self.params['x_labels_rot'])
     # Possibility to overwrite path #
     if 'path' in self.params:   path = FilePath(self.params['path'])
     elif hasattr(self, 'path'): path = FilePath(self.path)
     else:                       path = FilePath(self.short_name + '.pdf')
     # Save it as different formats #
     for ext in self.params['formats']: fig.savefig(path.replace_extension(ext))
     # Close it #
     pyplot.close(fig)
Esempio n. 20
0
    ipython -i -- ~/deploy/plumbing/tests/database/access_db/test_copy_table.py
"""

# Built-in module #
import inspect, os

# Internal modules #
from plumbing.databases.access_database import AccessDatabase
from autopaths.file_path import FilePath

# Third party modules #
import pandas

# Constants #
file_name = inspect.getframeinfo(inspect.currentframe()).filename
this_dir  = os.path.dirname(os.path.abspath(file_name)) + '/'

# Never modify the original #
orig_path    = FilePath(this_dir + 'orig.mdb')
testing_path = FilePath(this_dir + 'testing.mdb')
orig_path.copy(testing_path)

# The source database #
source_db = AccessDatabase(testing_path)

# The destination database #
dest_path = FilePath(this_dir + 'copied_table.mdb')
dest_db   = AccessDatabase.create(dest_path)

# Copy a table #
dest_db.import_table(source_db, "tblClassifierSets")
Esempio n. 21
0
 def __init__(self, path):
     # Call parent constructor #
     FilePath.__init__(self, path)
     # Check if the corresponding FASTA exists #
     self.fasta_path = self.replace_extension('fasta')
Esempio n. 22
0
Or in the Ubuntu WSL:

    ipython -i -- ~/deploy/plumbing/tests/database/access_db/test_conversion.py
"""

# Built-in module #
import inspect, os

# Internal modules #
from plumbing.databases.access_database import AccessDatabase
from autopaths.file_path import FilePath

# Third party modules #
import pandas

# Constants #
file_name = inspect.getframeinfo(inspect.currentframe()).filename
this_dir  = os.path.dirname(os.path.abspath(file_name)) + '/'

# Never modify the original #
orig_db    = FilePath(this_dir + 'orig.mdb')
testing_db = FilePath(this_dir + 'testing.mdb')
orig_db.copy(testing_db)

# The database #
db = AccessDatabase(testing_db)

# Convert #
db.convert_to_sqlite()
Esempio n. 23
0
class JobSLURM(object):
    """Makes launching SLURM jobs easy to write and easy to use. Here are some
    examples on how to use this class:

        for command in ['print "hi"', 'print "hello"']:
            job = JobSLURM(command, time='00:01:00', qos='short')
            job.run()

        for path in ['~/data/scafolds1.txt', '~/data/scafolds2.txt', '~/data/scafolds3.txt']:
            command =  ['import sh\n']
            command += ['script = sh.Command("analyze.py")\n']
            command += ['script(%s)' % path]
            job = JobSLURM(command, time='00:01:00', qos='short', job_name=path[-25:])
            job.run()
            print "Job %i is running !" % job.id

    Then you can easily check the status of your job and the standard out that is associated:

        print job.status
        print job.log_tail
        print job.info['time_left']

    etc.
    """

    extensions = {'bash': "sh", 'python': "py"}

    shebang_headers = {
        'bash': ["#!/bin/bash -le"],  # As a login shell and stop on error
        'python': ["#!/usr/bin/env python"]
    }

    slurm_headers = OrderedDict((
        ('job_name', {
            'tag': '#SBATCH -J %s',
            'needed': True
        }),
        ('change_dir', {
            'tag': '#SBATCH -D %s',
            'needed': True,
            'default': os.path.abspath(os.getcwd())
        }),
        ('out_file', {
            'tag': '#SBATCH -o %s',
            'needed': True,
            'default': '/dev/null'
        }),
        ('project', {
            'tag': '#SBATCH -A %s',
            'needed': False,
            'default': 'b2011035'
        }),
        ('time', {
            'tag': '#SBATCH -t %s',
            'needed': True,
            'default': '7-00:00:00'
        }),
        ('machines', {
            'tag': '#SBATCH -N %s',
            'needed': True,
            'default': '1'
        }),
        ('cores', {
            'tag': '#SBATCH -n %s',
            'needed': True,
            'default': num_processors
        }),
        ('partition', {
            'tag': '#SBATCH -p %s',
            'needed': True,
            'default': 'node'
        }),
        ('email', {
            'tag': '#SBATCH --mail-user %s',
            'needed': False,
            'default': os.environ.get('EMAIL')
        }),
        ('email-when', {
            'tag': '#SBATCH --mail-type=%s',
            'needed': True,
            'default': 'END'
        }),
        ('qos', {
            'tag': '#SBATCH --qos=%s',
            'needed': False,
            'default': 'short'
        }),
        ('dependency', {
            'tag': '#SBATCH -d %s',
            'needed': False,
            'default': 'afterok:1'
        }),
        ('constraint', {
            'tag': '#SBATCH -C %s',
            'needed': False,
            'default': 'fat'
        }),
        ('cluster', {
            'tag': '#SBATCH -M %s',
            'needed': False,
            'default': 'milou'
        }),
        ('alloc', {
            'tag': '#SBATCH --reservation=%s',
            'needed': False,
            'default': 'workstation'
        }),
        ('jobid', {
            'tag': '#SBATCH --jobid=%i',
            'needed': False,
            'default': 2173455
        }),
        ('memory', {
            'tag': '#SBATCH --mem=%i',
            'needed': False,
            'default': 120000
        }),
        ('mem_per_cpu', {
            'tag': '#SBATCH --mem-per-cpu=%i',
            'needed': False,
            'default': 512
        }),
        ('threads', {
            'tag': '#SBATCH --cpus-per-task=%i',
            'needed': False,
            'default': num_processors
        }),
    ))

    script_headers = {
        'bash': ['echo "SLURM: start at $(date) on $(hostname)"'],
        'python': [
            'import dateutil.tz, datetime, platform',
            'now = datetime.datetime.now(dateutil.tz.tzlocal())',
            r'now = now.strftime("%Y-%m-%d %Hh%Mm%Ss %Z%z")',
            'node = platform.node()',
            'print "SLURM: start at {0} on {1}".format(now, node)'
        ]
    }

    script_footers = {
        'bash': ['echo "SLURM: end at $(date)"'],
        'python': [
            'now = datetime.datetime.now(dateutil.tz.tzlocal())',
            r'now = now.strftime("%Y-%m-%d %Hh%Mm%Ss %Z%z")',
            'print "SLURM: end at {0}".format(now)'
        ]
    }

    def __repr__(self):
        return '<%s object "%s">' % (self.__class__.__name__, self.name)

    @property
    def name(self):
        return self.kwargs['job_name']

    def __init__(self,
                 command=["print 'Hello world'"],
                 language='python',
                 base_dir=None,
                 script_path=None,
                 **kwargs):
        # Required attributes #
        self.command = command
        self.language = language
        self.kwargs = kwargs
        # Set the file paths #
        self.set_paths(base_dir, script_path)
        # Check command type #
        if not isinstance(self.command, list): self.command = [self.command]
        # Get the name #
        if 'job_name' not in self.kwargs:
            hashed = hashlib.md5(''.join(self.command)).digest()
            encoded = base64.urlsafe_b64encode(hashed)
            self.kwargs['job_name'] = encoded
        # Check we have a project otherwise choose the one with less hours #
        if hostname.startswith('milou'):
            if 'project' not in self.kwargs and 'SBATCH_ACCOUNT' not in os.environ:
                if projects: self.kwargs['project'] = projects[0]['name']

    def set_paths(self, base_dir, script_path):
        """Set the directory, the script path and the outfile path"""
        # Make absolute paths #
        if 'change_dir' in self.kwargs:
            self.kwargs['change_dir'] = DirectoryPath(
                os.path.abspath(self.kwargs['change_dir']))
        if 'out_file' in self.kwargs:
            self.kwargs['out_file'] = FilePath(
                os.path.abspath(self.kwargs['out_file']))
        # In case there is a base directory #
        if base_dir is not None:
            self.base_dir = DirectoryPath(os.path.abspath(base_dir))
            self.script_path = FilePath(base_dir + "run." +
                                        self.extensions[self.language])
            self.kwargs['change_dir'] = base_dir
            self.kwargs['out_file'] = FilePath(base_dir + "run.out")
        # Other cases #
        if base_dir is None and script_path is None:
            self.script_path = FilePath(new_temp_path())
        if script_path is not None:
            self.script_path = FilePath(os.path.abspath(script_path))

    @property_cached
    def slurm_params(self):
        """The list of parameters to give to the `sbatch` command."""
        # Main loop #
        result = OrderedDict()
        for param, info in self.slurm_headers.items():
            if not info['needed'] and not param in self.kwargs: continue
            if param in self.kwargs: result[param] = self.kwargs.get(param)
            else: result[param] = info['default']
        # Special cases #
        if result.get('cluster') == 'halvan': result['partition'] = 'halvan'
        # Return #
        return result

    @property
    def script(self):
        """The script to be submitted to the SLURM queue."""
        self.shebang_header = self.shebang_headers[self.language]
        self.slurm_header = [
            self.slurm_headers[k]['tag'] % v
            for k, v in self.slurm_params.items()
        ]
        self.script_header = self.script_headers[self.language]
        self.script_footer = self.script_footers[self.language]
        return '\n'.join(
            flatter([
                self.shebang_header, self.slurm_header, self.script_header,
                self.command, self.script_footer
            ]))

    def make_script(self):
        """Make the script and return a FilePath object pointing to the script above."""
        self.script_path.write(self.script)
        self.script_path.permissions.make_executable()
        return self.script_path

    @property
    def log(self):
        """The log as a FilePath object"""
        return self.slurm_params['out_file']

    @property
    def log_tail(self):
        """If we have a log file, what is its tail"""
        if not self.kwargs['out_file'].exists: return False
        else: return tail(self.slurm_params['out_file'])

    @property
    def status(self):
        """What is the status of the job ?"""
        # If there is no script it is either ready or a lost duplicate #
        if not self.script_path.exists:
            if self.name in jobs.names: return "DUPLICATE"
            if self.name not in jobs.names: return "READY"
        # It is submitted already #
        if self.name in jobs.names:
            if jobs[self.name]['type'] == 'queued': return "QUEUED"
            if jobs[self.name]['type'] == 'running': return "RUNNING"
        # So the script exists for sure but it is not in the queue #
        if not self.kwargs['out_file'].exists: return "ABORTED"
        # Let's look in log file #
        if 'CANCELED' in self.log_tail: return "CANCELLED"
        if 'slurmstepd: error' in self.log_tail: return "CANCELLED"
        # It all looks good #
        if 'SLURM: end at' in self.log_tail: return "FINISHED"
        # At this point we have no idea #
        return "INTERUPTED"

    @property
    def info(self):
        """Get the existing job information dictionary"""
        if self.name not in jobs: return {'status': self.status}
        else: return jobs[self.name]

    #-------------------------------------------------------------------------#
    def run(self):
        """Will call self.launch() after performing some checks"""
        # Check already exists #
        if self.status == "READY": return self.launch()
        # Check name conflict #
        if self.status == "DUPLICATE":
            message = "Job with same name '%s' already in queue, but we lost the script."
        if self.status == "QUEUED": message = "Job '%s' already in queue."
        if self.status == "RUNNING": message = "Job '%s' already running."
        if self.status == "FINISHED":
            message = "Job '%s' already ended successfully."
        if self.status == "ABORTED":
            message = "Job '%s' was killed without any output file (?)."
        if self.status == "CANCELED":
            message = "Job '%s' was canceled or killed while running."
        if self.status == "INTERUPTED":
            message = "Job '%s' is not running. We don't know why. Look at the log file."
        print(Color.i_red + message % (self.name, ) + Color.end)
        print("Job might have run already (?). Not starting.")

    def launch(self):
        """Make the script file and return the newly created job id"""
        # Make script file #
        self.make_script()
        # Do it #
        sbatch_out = sh.sbatch(self.script_path)
        jobs.expire()
        # Message #
        print(Color.i_blu + "SLURM:" + Color.end + " " + str(sbatch_out), )
        # Return id #
        self.id = int(
            re.findall("Submitted batch job ([0-9]+)", str(sbatch_out))[0])
        return self.id

    def cancel(self):
        if self.status != "QUEUED" and self.status != "RUNNING":
            raise Exception("Can't cancel job '%s'" % self.name)
        sh.scancel(self.info['jobid'])

    def wait(self):
        """Wait until the job is finished"""
        pass

    #-------------------------------------------------------------------------#
    def run_locally(self):
        """A convenience method to run the same result as a SLURM job
        but locally in a non-blocking way. Useful for testing."""
        self.thread = threading.Thread(target=self.execute_locally)
        self.thread.daemon = True  # So that they die when we die
        self.thread.start()

    def execute_locally(self):
        """Runs the equivalent command locally in a blocking way."""
        # Make script file #
        self.make_script()
        # Do it #
        with open(self.kwargs['out_file'], 'w') as handle:
            sh.python(self.script_path, _out=handle, _err=handle)

    def wait_locally(self):
        """If you have run the query in a non-blocking way, call this method to pause
        until the query is finished."""
        try:
            self.thread.join(
                sys.maxint)  # maxint timeout so that we can Ctrl-C them
        except KeyboardInterrupt:
            print("Stopped waiting on job '%s'" % self.kwargs['job_name'])
 def index_samtools(self):
     """Create an index on the fasta file compatible with samtools."""
     sh.samtools('faidx', self.path)
     return FilePath(self.path + '.fai')
Esempio n. 25
0
def new_temp_file(**kwargs):
    """A new temporary path as a FilePath object."""
    handle = tempfile.NamedTemporaryFile(delete=False, **kwargs)
    path = handle.name
    handle.close()
    return FilePath(path)
 def __init__(self, parent):
     self.parent = parent
     self.path = FilePath(self.parent.prefix_path + '_len_hist.pdf')