def build_genome_index_path(cls, genome, *args, **kwargs): # Import here rather than main file as otherwise cluster operations fail. from ..models import Program conf = Config() # Get information about default aligner, check that the program is # in path and try to predict its version. alignerinfo = ProgramSummary(conf.aligner, ssh_host=conf.althost, ssh_user=conf.althostuser, ssh_path=conf.althostpath, ssh_port=conf.althostport) # Check that the version of aligner has been registered in # repository. try: Program.objects.get(program=alignerinfo.program, version=alignerinfo.version, current=True) indexdir = "%s-%s" % (alignerinfo.program, alignerinfo.version) # If aligner version is missing, try to insert it into the database # (FIXME not yet implemented while we see how this works). except Program.DoesNotExist, _err: sys.exit(( """Aligner "%s" version "%s" found at path "%s" """ % (alignerinfo.program, alignerinfo.version, alignerinfo.path)) + "not recorded as current in repository! Quitting.")
def build_genome_index_path(cls, genome, *args, **kwargs): # Import here rather than main file as otherwise cluster operations fail. from ..models import Program conf = Config() # Get information about default aligner, check that the program is # in path and try to predict its version. alignerinfo = ProgramSummary('STAR', ssh_host=conf.cluster, ssh_port=conf.clusterport, ssh_user=conf.clusteruser, ssh_path=conf.clusterpath) indexdir = None # Check that the version of aligner has been registered in # repository. try: Program.objects.get(program=alignerinfo.program, version=alignerinfo.version, current=True) indexdir = "%s_%s" % ('STAR', alignerinfo.version) except Program.DoesNotExist, _err: sys.exit(( """Aligner "%s" version "%s" found at path "%s" """ % (alignerinfo.program, alignerinfo.version, alignerinfo.path)) + "not recorded as current in repository! Quitting.")
def __init__(self, target, program_name, path=None, program_params='', workdir=None, move_files=True): self.target = target self.program_name = program_name self.program_params = program_params self.path = path self.output_files = [] self.output_md5s = [] self.move_files = move_files self.workdir = workdir if workdir is not None: self._delete_workdir = False else: if self.move_files == False: raise StandardError( "Not moving files from temporary directory to be deleted does not make sense!" ) # This checks that the specified program exists, and where it # yields some kind of meaningful version info will record that. progdata = ProgramSummary(program_name, path=path) # This is a little vulnerable to correct version parsing by # progsum. try: self._dbprog = Program.objects.get(program=progdata.program, version=progdata.version, current=True) except Program.DoesNotExist, _err: raise StandardError( ("Unable to find current %s program (version %s)" + " record in the repository") % (progdata.program, progdata.version))
def _save_aln_to_database(self, aln, alnfiles, progname, progvers): # Handle the alignment. aln.save() for alf in alnfiles.values(): alf.alignment = aln # ensure alignment_id has been set. alf.save() if progvers == None: alignerinfo = ProgramSummary(progname, ssh_host=self.config.cluster, ssh_user=self.config.clusteruser, ssh_path=self.config.clusterpath, ssh_port=self.config.clusterport) progname = alignerinfo.program progvers = alignerinfo.version try: program = Program.objects.get(program=progname, version=progvers, current=True) except Program.DoesNotExist, _err: raise StandardError( "Unable to find current program in database: %s %s" % (progname, progvers))
# as published by the Free Software Foundation, either version 3 of # the License, or (at your option) any later version. # # The osqutil python package is distributed in the hope that it will # be useful, but WITHOUT ANY WARRANTY; without even the implied # warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. # See the GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with the osqutil python package. If not, see # <http://www.gnu.org/licenses/>. # N.B. The interesting code was moved to its own library under libpy, # so we don't need to put the bin directory on PYTHONPATH all the # time. from osqutil.progsum import ProgramSummary; if __name__ == '__main__': import argparse PARSER = argparse.ArgumentParser( description='Finds program in file system. Reports program name, program path and program version (if available).') PARSER.add_argument('program', metavar='<program>', type=str, help='Name of the program') ARGS = PARSER.parse_args() p = ProgramSummary(ARGS.program,path="ukulele",version="whoknows") print "Program: %s\nPath: %s\nVersion: %s" % (p.program,p.path,p.version)
class AlignmentHandler(object): '''Class designed to manage the insertion of alignment-related files into the repository.''' __slots__ = ('params', 'prog', 'progvers', 'genome', 'headtrim', 'tailtrim', 'conf') def __init__(self, genome, prog, params='', progvers=None, headtrim=0, tailtrim=0): # Program and parameters can be a list or scalar. Params elements # should always be string; program can be either string or # osqpipe.models.Program. if all([ type(x) is not list for x in (prog, params) ]): # Scalar arguments self.prog = [ prog ] self.params = [ params ] # FIXME consider throwing an error here if progvers is already a list. self.progvers = [ progvers ] elif type(prog) is list: # List arguments (params may be the default empty string; # progvers may simply by a scalar None) self.prog = prog if len(prog) == len(params): self.params = params else: if params == '': # handle the empty default. self.params = [ '' for _x in prog ] else: raise ValueError("Lengths of prog and params list arguments" + " must match.") if progvers is None: # handle the empty default. self.progvers = [ None for _x in prog ] else: if len(prog) == len(progvers): self.progvers = progvers else: raise ValueError("Lengths of prog and progvers list arguments" + " must match.") else: raise TypeError("The params argument cannot be a list if prog is a scalar") self.genome = genome self.headtrim = headtrim self.tailtrim = tailtrim self.conf = Config() ## FIXME unused code here? @staticmethod def calculate_trimming(_fname, name, fq_seq, aln_seq): '''Currently unused function?''' fq_seq = fq_seq.upper() aln_seq = aln_seq.upper() if fq_seq == aln_seq: return (0, 0) pos = fq_seq.find(aln_seq) if pos == -1: LOGGER.error( "%s: aln sequence '%s' does not match fastq sequence '%s'", name, aln_seq, fq_seq) left = 0 right = 0 else: left = pos right = len(fq_seq) - pos - len(aln_seq) return (left, right) def aln_from_bedfile(self, bed): '''Given a bed file name, retrieve the associated database Alignment and Lane objects.''' LOGGER.info("Processing bed file: %s", bed) # A quick sanity check to try and make sure we don't load a file # against the wrong genome. This remains fallible since some # genome codes are quite short and might occur in a filename by # chance. if not re.search(self.genome, bed, re.I): # Note do not merge this re.I change into repackaging branch (it is unnecessary). raise ValueError("Genome code not found in bed file name." + " Loading against the wrong genome (%s)?" % self.genome) (code, facility, lanenum, _pipeline) = parse_repository_filename(bed) lanelist = Lane.objects.filter(library__code=code, facility__code=facility, lanenum=lanenum) if lanelist.count() == 0: raise ValueError("Could not find lane for '%s'" % (bed)) elif lanelist.count() > 1: raise ValueError(("Found multiple lanes for '%s': " % (bed,)) + ", ".join([x.id for x in lanelist])) else: lane = lanelist[0] aln = self._create_alignment(bed, lane) return (aln, lane) def _create_alignment(self, bed, lane): (mapped, unique) = count_reads(bed) gen = Genome.objects.get(code=self.genome) # We don't save this yet because we're not currently within a # transaction. aln = Alignment(lane = lane, genome = gen, mapped = mapped, munique = unique, total_reads = lane.total_passedpf, headtrim = self.headtrim, tailtrim = self.tailtrim) return (aln) def _find_versioned_program(self, subprog, factor, subvers=None): subprog = subprog.strip() # If the version has been pre-specified, just use that. if subvers is not None: subvers = subvers.strip() prg = self._retrieve_program_object(program=subprog, version=subvers) return prg # If no version specified, look for the program on cluster or althost. try: althost = self.conf.althost assert(althost != '') except AttributeError, _err: althost = None # FIXME come up with a better heuristic than this. if subprog in ('reallocateReads', 'samtools') \ and factor \ and factor.name in self.conf.reallocation_factors: # These programs used on the local server. alignerinfo = ProgramSummary(subprog, path=self.conf.hostpath) else: # bwa, maq, gsnap et al. as launched on cluster or remote alignment host. if althost is not None and subprog == self.conf.aligner: # Using the alternative alignment host. alignerinfo = ProgramSummary(subprog, ssh_host=althost, ssh_user=self.conf.althostuser, ssh_path=self.conf.althostpath, ssh_port=self.conf.althostport) else: # Using the compute cluster as standard. alignerinfo = ProgramSummary(subprog, ssh_host=self.conf.cluster, ssh_user=self.conf.clusteruser, ssh_path=self.conf.clusterpath, ssh_port=self.conf.clusterport) prg = self._retrieve_program_object(program=alignerinfo.program, version=alignerinfo.version) return prg