예제 #1
0
    def build_genome_index_path(cls, genome, *args, **kwargs):

        # Import here rather than main file as otherwise cluster operations fail.
        from ..models import Program

        conf = Config()

        # Get information about default aligner, check that the program is
        # in path and try to predict its version.
        alignerinfo = ProgramSummary(conf.aligner,
                                     ssh_host=conf.althost,
                                     ssh_user=conf.althostuser,
                                     ssh_path=conf.althostpath,
                                     ssh_port=conf.althostport)

        # Check that the version of aligner has been registered in
        # repository.
        try:
            Program.objects.get(program=alignerinfo.program,
                                version=alignerinfo.version,
                                current=True)
            indexdir = "%s-%s" % (alignerinfo.program, alignerinfo.version)

        # If aligner version is missing, try to insert it into the database
        # (FIXME not yet implemented while we see how this works).
        except Program.DoesNotExist, _err:
            sys.exit((
                """Aligner "%s" version "%s" found at path "%s" """ %
                (alignerinfo.program, alignerinfo.version, alignerinfo.path)) +
                     "not recorded as current in repository! Quitting.")
예제 #2
0
    def build_genome_index_path(cls, genome, *args, **kwargs):

        # Import here rather than main file as otherwise cluster operations fail.
        from ..models import Program

        conf = Config()

        # Get information about default aligner, check that the program is
        # in path and try to predict its version.
        alignerinfo = ProgramSummary('STAR',
                                     ssh_host=conf.cluster,
                                     ssh_port=conf.clusterport,
                                     ssh_user=conf.clusteruser,
                                     ssh_path=conf.clusterpath)
        indexdir = None

        # Check that the version of aligner has been registered in
        # repository.
        try:
            Program.objects.get(program=alignerinfo.program,
                                version=alignerinfo.version,
                                current=True)
            indexdir = "%s_%s" % ('STAR', alignerinfo.version)

        except Program.DoesNotExist, _err:
            sys.exit((
                """Aligner "%s" version "%s" found at path "%s" """ %
                (alignerinfo.program, alignerinfo.version, alignerinfo.path)) +
                     "not recorded as current in repository! Quitting.")
예제 #3
0
    def __init__(self,
                 target,
                 program_name,
                 path=None,
                 program_params='',
                 workdir=None,
                 move_files=True):

        self.target = target
        self.program_name = program_name
        self.program_params = program_params
        self.path = path

        self.output_files = []
        self.output_md5s = []

        self.move_files = move_files

        self.workdir = workdir
        if workdir is not None:
            self._delete_workdir = False
        else:
            if self.move_files == False:
                raise StandardError(
                    "Not moving files from temporary directory to be deleted does not make sense!"
                )

        # This checks that the specified program exists, and where it
        # yields some kind of meaningful version info will record that.
        progdata = ProgramSummary(program_name, path=path)

        # This is a little vulnerable to correct version parsing by
        # progsum.
        try:
            self._dbprog = Program.objects.get(program=progdata.program,
                                               version=progdata.version,
                                               current=True)
        except Program.DoesNotExist, _err:
            raise StandardError(
                ("Unable to find current %s program (version %s)" +
                 " record in the repository") %
                (progdata.program, progdata.version))
    def _save_aln_to_database(self, aln, alnfiles, progname, progvers):
        # Handle the alignment.
        aln.save()
        for alf in alnfiles.values():
            alf.alignment = aln  # ensure alignment_id has been set.
            alf.save()

        if progvers == None:
            alignerinfo = ProgramSummary(progname,
                                         ssh_host=self.config.cluster,
                                         ssh_user=self.config.clusteruser,
                                         ssh_path=self.config.clusterpath,
                                         ssh_port=self.config.clusterport)
            progname = alignerinfo.program
            progvers = alignerinfo.version

        try:
            program = Program.objects.get(program=progname,
                                          version=progvers,
                                          current=True)
        except Program.DoesNotExist, _err:
            raise StandardError(
                "Unable to find current program in database: %s %s" %
                (progname, progvers))
# as published by the Free Software Foundation, either version 3 of
# the License, or (at your option) any later version.
#
# The osqutil python package is distributed in the hope that it will
# be useful, but WITHOUT ANY WARRANTY; without even the implied
# warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with the osqutil python package.  If not, see
# <http://www.gnu.org/licenses/>.

# N.B. The interesting code was moved to its own library under libpy,
# so we don't need to put the bin directory on PYTHONPATH all the
# time.

from osqutil.progsum import ProgramSummary;

if __name__ == '__main__':
    import argparse

    PARSER = argparse.ArgumentParser(
    description='Finds program in file system. Reports program name, program path and program version (if available).')

    PARSER.add_argument('program', metavar='<program>', type=str,
                      help='Name of the program')
    ARGS = PARSER.parse_args()
    
    p = ProgramSummary(ARGS.program,path="ukulele",version="whoknows")
    print "Program: %s\nPath: %s\nVersion: %s" % (p.program,p.path,p.version)
예제 #6
0
class AlignmentHandler(object):

  '''Class designed to manage the insertion of alignment-related files
  into the repository.'''

  __slots__ = ('params', 'prog', 'progvers', 'genome', 'headtrim', 'tailtrim', 'conf')

  def __init__(self, genome, prog, params='', progvers=None, headtrim=0, tailtrim=0):

    # Program and parameters can be a list or scalar. Params elements
    # should always be string; program can be either string or
    # osqpipe.models.Program.
    if all([ type(x) is not list for x in (prog, params) ]):

      # Scalar arguments
      self.prog    = [ prog ]
      self.params  = [ params ]

      # FIXME consider throwing an error here if progvers is already a list.
      self.progvers = [ progvers ]

    elif type(prog) is list:

      # List arguments (params may be the default empty string;
      # progvers may simply by a scalar None)
      self.prog    = prog

      if len(prog) == len(params):
        self.params  = params
      else:
        if params == '': # handle the empty default.
          self.params = [ '' for _x in prog ]
        else:
          raise ValueError("Lengths of prog and params list arguments"
                           + " must match.")
  
      if progvers is None: # handle the empty default.
        self.progvers = [ None for _x in prog ]
      else:
        if len(prog) == len(progvers):
          self.progvers = progvers
        else:
          raise ValueError("Lengths of prog and progvers list arguments"
                           + " must match.")

    else:
      raise TypeError("The params argument cannot be a list if prog is a scalar")

    self.genome  = genome
    self.headtrim = headtrim
    self.tailtrim = tailtrim
    self.conf = Config()

  ## FIXME unused code here?
  @staticmethod
  def calculate_trimming(_fname, name, fq_seq, aln_seq):
    '''Currently unused function?'''
    fq_seq = fq_seq.upper()
    aln_seq = aln_seq.upper()
    if fq_seq == aln_seq:
      return (0, 0)
    pos = fq_seq.find(aln_seq)
    if pos == -1:
      LOGGER.error(
        "%s: aln sequence '%s' does not match fastq sequence '%s'",
        name, aln_seq, fq_seq)
      left = 0
      right = 0
    else:
      left = pos
      right = len(fq_seq) - pos - len(aln_seq)
    return (left, right)

  def aln_from_bedfile(self, bed):

    '''Given a bed file name, retrieve the associated database
    Alignment and Lane objects.'''

    LOGGER.info("Processing bed file: %s", bed)

    # A quick sanity check to try and make sure we don't load a file
    # against the wrong genome. This remains fallible since some
    # genome codes are quite short and might occur in a filename by
    # chance.
    if not re.search(self.genome, bed, re.I): # Note do not merge this re.I change into repackaging branch (it is unnecessary).
      raise ValueError("Genome code not found in bed file name."
                     + " Loading against the wrong genome (%s)?" % self.genome)

    (code, facility, lanenum, _pipeline) = parse_repository_filename(bed)
    lanelist  = Lane.objects.filter(library__code=code, 
                                    facility__code=facility, 
                                    lanenum=lanenum)
    if lanelist.count() == 0:
      raise ValueError("Could not find lane for '%s'" % (bed))
    elif lanelist.count() > 1:
      raise ValueError(("Found multiple lanes for '%s': "
                       % (bed,)) + ", ".join([x.id for x in lanelist]))
    else:
      lane = lanelist[0]

      aln = self._create_alignment(bed, lane)

    return (aln, lane)

  def _create_alignment(self, bed, lane):

    (mapped, unique) = count_reads(bed)
    gen = Genome.objects.get(code=self.genome)

    # We don't save this yet because we're not currently within a
    # transaction.
    aln = Alignment(lane        = lane,
                    genome      = gen,
                    mapped      = mapped,
                    munique     = unique,
                    total_reads = lane.total_passedpf,
                    headtrim    = self.headtrim,
                    tailtrim    = self.tailtrim)
    return (aln)

  def _find_versioned_program(self, subprog, factor, subvers=None):

    subprog = subprog.strip()

    # If the version has been pre-specified, just use that.
    if subvers is not None:
      subvers = subvers.strip()
      prg = self._retrieve_program_object(program=subprog,
                                          version=subvers)
      return prg

    # If no version specified, look for the program on cluster or althost.
    try:
      althost = self.conf.althost
      assert(althost != '')
    except AttributeError, _err:
      althost = None

    # FIXME come up with a better heuristic than this.
    if subprog in ('reallocateReads', 'samtools') \
          and factor \
          and factor.name in self.conf.reallocation_factors:

      # These programs used on the local server.
      alignerinfo = ProgramSummary(subprog, path=self.conf.hostpath)
        
    else:

      # bwa, maq, gsnap et al. as launched on cluster or remote alignment host.
      if althost is not None and subprog == self.conf.aligner:
        # Using the alternative alignment host.
        alignerinfo = ProgramSummary(subprog, ssh_host=althost,
                                     ssh_user=self.conf.althostuser,
                                     ssh_path=self.conf.althostpath,
                                     ssh_port=self.conf.althostport)
      else:
        # Using the compute cluster as standard.
        alignerinfo = ProgramSummary(subprog, ssh_host=self.conf.cluster,
                                     ssh_user=self.conf.clusteruser,
                                     ssh_path=self.conf.clusterpath,
                                     ssh_port=self.conf.clusterport)
    prg = self._retrieve_program_object(program=alignerinfo.program,
                                        version=alignerinfo.version)
    return prg