def _create_archive_dir_on_host(fobj): ''' Create folder in foreign host over ssh. ''' arch = fobj.archive if arch is None: raise ValueError( "Attempting to transfer file to null archive location.") folder = bash_quote(os.path.join(arch.host_path, fobj.libcode)) # The ssh command expects double quoting (one for our bash prompt, # one for the server). cmd = ['ssh'] if arch.host_port is not None: cmd += ['-p', arch.host_port] if arch.host_user is not None: cmd += ['%s@%s' % (arch.host_user, arch.host)] else: cmd += [arch.host] cmd += ['mkdir', bash_quote(folder), '&& chmod 750', bash_quote(folder)] subproc = Popen(cmd, stdout=PIPE, stderr=PIPE) (stdout, stderr) = subproc.communicate() retcode = subproc.wait() if retcode != 0: if 'File exists' in stderr: LOGGER.info("Directory %s@%s:%s already exists.", arch.host_user, arch.host, folder) else: raise StandardError(\ "ERROR. Failed to create directory in archive" + (" (cmd=\"%s\").\nSTDOUT: %s\nSTDERR: %s\n" % (" ".join(cmd), stdout, stderr)) )
def chain(self, lavs): ''' Chains the lastz output .lav files together. ''' # We keep the filtered chain file. gen_from = filebasename(self.from_genome) gen_to = filebasename(self.to_genome) prechain = os.path.join(self.local_workdir, '%s_vs_%s.pre.chain' % (gen_from, gen_to)) if os.path.exists(prechain): LOGGER.warning( "Prechain file already exists." + " Assuming we can start from this point: %s", prechain) return prechain # Convert lavs to appropriately-organised psl files. psls = self.process_lavs_to_psl(lavs) # FIXME at some point we need to add these psls to self._tempfiles # Run the initial chaining. LOGGER.info("Running the initial chaining.") chaindir = os.path.join(self.local_tempdir, 'chain/') os.mkdir(chaindir) chains = [] for psl in psls: chfn = os.path.join(chaindir, filebasename(psl) + '.chain') cmd = [ 'axtChain', '-psl', '-linearGap=%s' % self.linear_gap, psl, '-faQ', self.from_genome, '-faT', self.to_genome, chfn ] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) chains.append(chfn) self._tempfiles.append(chfn) self._tempfiles.append(chaindir) # Filter the chained alignments before returning. allchain = os.path.join(self.local_tempdir, 'all.chain') cmd = ('chainMergeSort -tempDir=%s %s > %s' % (bash_quote(self.local_tempdir), " ".join( [bash_quote(x) for x in chains]), bash_quote(allchain))) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) self._tempfiles.append(allchain) from_sizes = self.get_chr_sizes(self.from_genome) to_sizes = self.get_chr_sizes(self.to_genome) # Actually create the prechain file. cmd = ['chainPreNet', allchain, from_sizes, to_sizes, prechain] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) return prechain
def return_file_to_localhost(self, clusterout, outfile, execute=True, donefile=False): ''' If execute is False, returns a command string that can be used to transfer a cluster output files back to our local working directory. If execute is True, the command will also be run on the cluster. ''' myhost = getfqdn() myuser = getuser() sshcmd = "scp" # Transferring the files back to localhost requires an appropriate # passwordless ssh key to be given access on our localhost. The # alternative is some horrendous pexpect hack which is only a # little more secure (see: sshSangerTunnel.py). if self.ssh_key is not None: sshcmd += " -i %s" % self.ssh_key # Note that we need quoting of e.g. file paths containing # spaces. Also, the initial './' allows filenames to contain # colons. if not os.path.isabs(clusterout): clusterout = './%s' % (clusterout, ) sshcmd += ( r' %s %s@%s:\"' % (bash_quote(clusterout), myuser, myhost) + bash_quote(bash_quote(self.local_workdir + r'/%s' % outfile)) + r'\"') if donefile: sshcmd += " && ssh" if self.ssh_key is not None: sshcmd += " -i %s" % self.ssh_key sshcmd += (r' %s@%s touch ' % (myuser, myhost) + bash_quote( bash_quote(self.local_workdir + r'/%s.done' % outfile))) if execute is True: # This *should* die on failure. self.runner.run_command(sshcmd) return sshcmd
def get_chr_sizes(self, fasta): ''' Runs faSize on a fasta file to generate chr size data. ''' # We keep a cached because we'll be using this more than once. if fasta in self._chr_sizes: return self._chr_sizes[fasta] LOGGER.info("Calculating chr sizes for %s", fasta) sizefn = os.path.join(self.local_tempdir, filebasename(fasta) + '.sizes') cmd = 'faSize %s -detailed > %s' % (bash_quote(fasta), bash_quote(sizefn)) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) self._tempfiles.append(sizefn) self._chr_sizes[fasta] = sizefn return sizefn
def rename_files(files, fromFun): for fobj in files: old = fromFun(fobj) old = "/".join((SERVERDIR, old)) new = fobj.repository_file_path new = "/".join((SERVERDIR, new)) if old != new: d = os.path.dirname(new) # print "Creating directory %s" % (d,) cmd = ssh_command(('mkdir', '-p', quote(d))) call_subprocess(cmd, shell=True, path=CONFIG.hostpath) # print "Moving %s to %s" % (old, new) old = bash_quote(old) new = bash_quote(new) cmd = ssh_command(('mv', quote(old), quote(new))) try: call_subprocess(cmd, shell=True, path=CONFIG.hostpath) except CalledProcessError, err: print "Warning: move failed for file %s: %s" % (old, err)
def net(self, prechain): ''' Create nets from the chained alignements and convert them to axt format. Also generate a liftOver file. ''' from_sizes = self.get_chr_sizes(self.from_genome) to_sizes = self.get_chr_sizes(self.to_genome) net = os.path.join(self.local_workdir, prechain + '.net') cmd = (('chainNet %s -minSpace=1 %s %s stdout /dev/null' + ' | netSyntenic stdin %s') % (bash_quote(prechain), bash_quote(from_sizes), bash_quote(to_sizes), bash_quote(net))) # This may fail for spurious reasons (e.g. absence of # /proc/self/stat on non-linux machines). try: LOGGER.info("Running chainNet and netSyntenic on prechain file.") call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) except CalledProcessError, err: LOGGER.warning("chainNet or netSyntenic raised exception: %s", err)
def _archive_file_via_scp(fobj, attempts=1, sleeptime=2): ''' A wrapper for scp allowing multiple attempts for the transfer in case of recoverable error. ''' unrecoverable = [ 'No such file or directory', 'Failed to add the host to the list of known hosts', 'Operation not permitted' ] arch = fobj.archive if arch is None: raise ValueError( "Attempting to transfer file to null archive location.") # NOTE: We may still need to double-quote spaces the destination # passed to scp. Double-quoting brackets ([]) does not work, though. host_archdir = os.path.join(arch.host_path, fobj.libcode) dest = os.path.join(host_archdir, os.path.basename(fobj.repository_file_path)) cmd = 'scp -p -o StrictHostKeyChecking=no' if arch.host_port is not None: cmd += ' -P %s' % str(arch.host_port) # Assume we're copying from the main repository to the archive. # Note that we need quoting of e.g. file paths containing # spaces. cmd += ' %s' % bash_quote(fobj.original_repository_file_path) # Double-quote the destination, as it has to get past (a) our local # bash, and (b) the bash on the destination machine. if arch.host_user is not None: cmd += ' %s@%s:%s' % (arch.host_user, arch.host, bash_quote(bash_quote(dest))) else: cmd += ' %s:%s' % (arch.host, bash_quote(bash_quote(dest))) start_time = time.time() while attempts > 0: subproc = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) (stdout, stderr) = subproc.communicate() retcode = subproc.wait() if stdout is not None: sys.stdout.write(stdout) if stderr is not None: sys.stderr.write(stderr) if retcode != 0: for mesg in unrecoverable: if mesg in stderr: LOGGER.error(stderr) attempts = 0 break attempts -= 1 if attempts <= 0: break LOGGER.warning(\ 'Transfer failed with error code: %s\nTrying again (max %d times)', stderr, attempts) time.sleep(sleeptime) else: break if retcode != 0: raise StandardError( "ERROR. Failed to transfer file. Command was:\n %s\n" % (" ".join(cmd), )) time_diff = time.time() - start_time LOGGER.info("Copying to archive (scp) completed in %d seconds.", time_diff)
class LastzAligner(ClusterJobManager): ''' Class to handle all the steps required for generating an axt-format net alignment file using lastz as the aligner. ''' # local_tempdir will need to be able to handle around 75GB when aligning # two typical mammalian genomes. def __init__(self, from_genome, to_genome, hsp_thresh=3000, length_limit=None, linear_gap='loose', local_tempdir=None, resume=False, *args, **kwargs): super(LastzAligner, self).__init__(*args, **kwargs) self.from_genome = from_genome self.to_genome = to_genome self.hsp_thresh = hsp_thresh self.length_limit = length_limit self.linear_gap = linear_gap # Flag used to tell the object to fill in missing lav files by # resubmitting to the cluster, rather than just working with # what's available. self.resume = resume systempdir = gettempdir() if local_tempdir is None else local_tempdir self.local_tempdir = os.path.join(systempdir, str(os.getpid())) os.mkdir(self.local_tempdir) # Fails on pre-existing directory. self._tempfiles = [] self._chr_sizes = {} def split_chrs(self, fasta, dryrun=False): ''' Split a designated fasta file by chromosome. Returns a list of the generated fasta files. Any chromosome whose sequence exceeds self.length_limit will be split appropriately. Calling with dryrun=True returns a list of files which would have been created; this may be useful when deciding on an appropriate length_limit parameter. ''' LOGGER.info("Splitting fasta by chromosome: %s", fasta) # N.B. the trailing '/' is important here: wdir = os.path.join(self.local_tempdir, '%s_chr_split/' % filebasename(fasta)) if not dryrun: os.mkdir(wdir) # Fails on pre-existing directory. self._tempfiles.append(wdir) outfiles = [] handle = open(fasta, 'rU') for chromosome in SeqIO.parse(handle, 'fasta'): # Check whether we need to split the chromosome. seqlen = len(chromosome.seq) if self.length_limit and seqlen > self.length_limit: # Figure out how many chunks we need. denom = 2 while (float(seqlen) / denom) > self.length_limit: denom += 1 # Output the sequences for segnum in range(denom): start = (segnum * (seqlen / denom)) + 1 end = min(seqlen, (segnum + 1) * (seqlen / denom)) # This filename format will be parsed later, in # process_lavs_to_psl. The filename coordinate needs to be # added to the output psl coords. new_id = "%s_+%d" % (chromosome.id, start - 1) chrfile = os.path.join(wdir, "%s.fa" % new_id) chrseg = chromosome[start - 1:end] chrseg.id = new_id if not dryrun: with open(chrfile, 'w') as chrfh: SeqIO.write([chrseg], chrfh, 'fasta') outfiles.append(chrfile) else: # If chromosome is small enough, just dump it out in a single file. chrfile = os.path.join(wdir, "%s.fa" % chromosome.id) if not dryrun: with open(chrfile, 'w') as chrfh: SeqIO.write([chromosome], chrfh, 'fasta') outfiles.append(chrfile) return outfiles def mask_tandem_repeats(self, fasta): ''' Runs trfBig over the designated fasta file. Should return the newly-generated masked fasta file name. Runs quite slowly, so we keep the outputs following 2bit conversion. ''' LOGGER.info("Masking tandem repeats for fasta: %s", fasta) curdir = os.getcwd() # trfBig writes to current working directory a lot. os.chdir(self.local_tempdir) maskfn = os.path.splitext(fasta)[0] + MASKTAG + '.fa' cmd = ['trfBig', fasta, maskfn] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) os.chdir(curdir) return maskfn def convert_to_2bit(self, fasta, workdir=None): ''' Runs faToTwoBit on the designated fasta file; returns the name of the output 2bit file. ''' LOGGER.info("Converting fasta to 2bit: %s", fasta) if workdir is None: workdir = self.local_workdir twobitfn = os.path.join(workdir, filebasename(fasta) + '.2bit') cmd = ['faToTwoBit', '-noMask', fasta, twobitfn] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) return twobitfn def make_cluster_filename(self, localfile): ''' Generate a unique filename to be used on the cluster, without unnecessarily divulging local file paths. ''' pathbits = os.path.split(localfile) hasher = md5() hasher.update(pathbits[0]) clusterfile = "%d_%s_%s" % (os.getpid(), hasher.hexdigest(), pathbits[1]) return clusterfile def align(self, from_list, to_list, omit_list=None): ''' Actually run the alignment. Requires lastz on the cluster, and of course bsub/LSF et al. Note that we will generate one lastz process for each chr-chr combination, so this should lend itself well to a clustered solution. ''' # Just in case, for convenience. if type(from_list) in (str, unicode): from_list = [from_list] if type(to_list) in (str, unicode): to_list = [to_list] if omit_list is None: omit_list = [] # Make sure the filenames on the cluster won't easily collide. cluster_from = [self.make_cluster_filename(x) for x in from_list] cluster_to = [self.make_cluster_filename(x) for x in to_list] # Note that we copy all the files to the cluster even if we only # want to repeat a handful of alignments; managing the files is # simpler that way. LOGGER.info("Copying files to cluster server.") self.submitter.remote_copy_files(filenames=from_list + to_list, destnames=cluster_from + cluster_to) job_ids = [] lavfiles = [] for from_num in range(len(from_list)): for to_num in range(len(to_list)): # Files on localhost from_file = from_list[from_num] to_file = to_list[to_num] # Files on the cluster from_clust = cluster_from[from_num] to_clust = cluster_to[to_num] outfile = "%s_%s.lav" % (filebasename(from_file), filebasename(to_file)) if outfile in omit_list: LOGGER.warning("Skipping pre-existing lav file %s...", outfile) lavfiles.append(outfile) continue ## FIXME consider the --inner option here (ensembl-compara ## appears to use --inner=2200). LOGGER.info("Launching alignment (%s : %s).", from_file, to_file) clusterout = "%d_%s" % (os.getpid(), outfile) ## We use this file to monitor lastz completion, to ## disambiguate lastz failure from scp failure. FIXME if this ## turns out to be scp failure we can add a final re-try to ## the monitor job. clusterdone = clusterout + '.done' ## Note that using --chain here appears to be undesirable ## since the lastz chaining implementation is rather too ## simplistic for our purposes (see lastz docs). cmd = [ 'lastz', to_clust, from_clust, # This is the correct order. '--format=lav', '--hspthresh=%d' % self.hsp_thresh, '--output=%s' % clusterout ] sshcmd = self.return_file_to_localhost(clusterout, outfile, execute=False) LOGGER.debug(sshcmd) cmd = " ".join(cmd) + ( ' && touch %s && %s && rm %s %s' % (clusterdone, sshcmd, clusterout, clusterdone)) # 4GB is the default max mem for lastz. Setting mem=4000 means # some larger alignments fail silently; using 5000 seems much # more robust on our cluster. job_ids.append( self.submitter.submit_command(cmd=cmd, mem=self.memsize * 1024, auto_requeue=False, time_limit=self.time_limit)) lavfiles.append(outfile) # Reduce the rate of cluster job submission, if desired. sleep(self.throttle) # Caller code tends to assume these paths are absolute. lavfiles = [os.path.join(self.local_workdir, x) for x in lavfiles] return (job_ids, lavfiles, cluster_from + cluster_to) def convert_to_psl(self, lav): ''' Converts an input lav file to a temporary psl file. ''' pslfn = os.path.join(self.local_tempdir, filebasename(lav) + '.psl') cmd = ['lavToPsl', lav, pslfn] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) return pslfn # Delete this file in the caller code. def process_lavs_to_psl(self, lavs): ''' Convert .lav files to .psl, swap query and target, and split on target. ''' # Convert lav files to psl, concatenate. LOGGER.info("Reorganising lav files into psl files.") psls = [self.convert_to_psl(x) for x in lavs] allpsl = os.path.join(self.local_tempdir, 'all.psl') # Concatenate the files. We take this opportunity to strip out the # junk we've added to the chromosome names. def repl(match): ''' Regex replace function. ''' return "\t%s\t" % match.group(1) from_sizes = self.get_chr_sizes_dict(self.from_genome) to_sizes = self.get_chr_sizes_dict(self.to_genome) # We allow for any pid prefix so we can restart in a new process # if needed. Also allow for genome/chrN_trfBig_masked to support # fill-in files generated locally. genstr = ( r'(?:%s|%s)' % (filebasename(self.from_genome), filebasename(self.to_genome))) strip_re = re.compile(r'\t(?:\d+_%s_)?([^\t]*)%s\t' % (genstr, MASKTAG)) # Keep this regex in sync with the file naming scheme used in split_chrs. subchr_re = re.compile(r'^(.*)_\+(\d+)$') with open(allpsl, 'wb') as allfh: for inp in psls: with open(inp, 'rb') as pfh: for line in pfh: # We need to rewrite the chrnames here. Also remove the # trailing newline so it doesn't confuse the processing below. newline = strip_re.sub(repl, line).rstrip('\n') # Parse out sub-chromosome coordinates from filenames and # fix coords appropriately. This is heavily dependent on # the PSL file following the specification. fields = newline.split("\t") if len(fields) > 1: # Sort out the query positions. chrA_match = subchr_re.match(fields[9]) if chrA_match: fields[9] = chrA_match.group(1) basecoord = int(chrA_match.group(2)) for fnum in (11, 12): fields[fnum] = str( int(fields[fnum]) + basecoord) fields[19] = ','.join([ str(int(x) + basecoord) for x in fields[19].split(',') if x != '' ]) + ',' fields[10] = from_sizes[fields[9]] # Sort out the target positions. chrB_match = subchr_re.match(fields[13]) if chrB_match: fields[13] = chrB_match.group(1) basecoord = int(chrB_match.group(2)) for fnum in (15, 16): fields[fnum] = str( int(fields[fnum]) + basecoord) fields[20] = ','.join([ str(int(x) + basecoord) for x in fields[20].split(',') if x != '' ]) + ',' fields[14] = to_sizes[fields[13]] # Quick check on our output. This is essentially cribbed # from the pslToBed code. if (int(fields[11]) >= int(fields[12]) or int(fields[12]) > int(fields[10]) or int(fields[15]) >= int(fields[16]) or int(fields[16]) > int(fields[14])): raise StandardError(( "Mangled PSL format output. Offending input line was in file %s:" + "\n\n%s\n\nMunged to:\n%s\n\n") % (inp, line, "\t".join(fields))) newline = "\t".join(fields) + "\n" allfh.write(newline) os.unlink(inp) # Attempt to save some temp space # Swap target and source annotation, such that splitting on the # target actually splits on the query. swppsl = os.path.join(self.local_tempdir, 'all-swap.psl') cmd = ['pslSwap', allpsl, swppsl] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) os.unlink(allpsl) # Split psl files by target chromosome. psldir = os.path.join(self.local_tempdir, 'psl/') os.mkdir(psldir) # Consider -lump option for scaffolds FIXME cmd = ['pslSplitOnTarget', swppsl, psldir] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) target_psls = [ os.path.join(self.local_tempdir, psldir, x) for x in os.listdir(psldir) ] self._tempfiles.extend(target_psls + [psldir]) os.unlink(swppsl) return target_psls def get_chr_sizes(self, fasta): ''' Runs faSize on a fasta file to generate chr size data. ''' # We keep a cached because we'll be using this more than once. if fasta in self._chr_sizes: return self._chr_sizes[fasta] LOGGER.info("Calculating chr sizes for %s", fasta) sizefn = os.path.join(self.local_tempdir, filebasename(fasta) + '.sizes') cmd = 'faSize %s -detailed > %s' % (bash_quote(fasta), bash_quote(sizefn)) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) self._tempfiles.append(sizefn) self._chr_sizes[fasta] = sizefn return sizefn def get_chr_sizes_dict(self, fasta): ''' As for get_chr_sizes, but also parses the file and returns a dict for convenience. ''' sizefn = self.get_chr_sizes(fasta) sizes = dict() with open(sizefn, 'r') as sizefh: for row in sizefh: (chrom, size) = [x.strip() for x in row.split()] sizes[chrom] = size return sizes def chain(self, lavs): ''' Chains the lastz output .lav files together. ''' # We keep the filtered chain file. gen_from = filebasename(self.from_genome) gen_to = filebasename(self.to_genome) prechain = os.path.join(self.local_workdir, '%s_vs_%s.pre.chain' % (gen_from, gen_to)) if os.path.exists(prechain): LOGGER.warning( "Prechain file already exists." + " Assuming we can start from this point: %s", prechain) return prechain # Convert lavs to appropriately-organised psl files. psls = self.process_lavs_to_psl(lavs) # FIXME at some point we need to add these psls to self._tempfiles # Run the initial chaining. LOGGER.info("Running the initial chaining.") chaindir = os.path.join(self.local_tempdir, 'chain/') os.mkdir(chaindir) chains = [] for psl in psls: chfn = os.path.join(chaindir, filebasename(psl) + '.chain') cmd = [ 'axtChain', '-psl', '-linearGap=%s' % self.linear_gap, psl, '-faQ', self.from_genome, '-faT', self.to_genome, chfn ] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) chains.append(chfn) self._tempfiles.append(chfn) self._tempfiles.append(chaindir) # Filter the chained alignments before returning. allchain = os.path.join(self.local_tempdir, 'all.chain') cmd = ('chainMergeSort -tempDir=%s %s > %s' % (bash_quote(self.local_tempdir), " ".join( [bash_quote(x) for x in chains]), bash_quote(allchain))) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) self._tempfiles.append(allchain) from_sizes = self.get_chr_sizes(self.from_genome) to_sizes = self.get_chr_sizes(self.to_genome) # Actually create the prechain file. cmd = ['chainPreNet', allchain, from_sizes, to_sizes, prechain] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) return prechain def get_2bit(self, fasta): ''' Simply generate a temporary 2bit file from the specified fasta file. Note the differences between this and convert_to_2bit. FIXME refactor so there's only one of these functions. ''' outfn = os.path.join(self.local_tempdir, filebasename(fasta) + '.2bit') if os.path.exists(outfn): return outfn LOGGER.info("Generating 2bit file for %s", fasta) cmd = ['faToTwoBit', fasta, outfn] call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) self._tempfiles.append(outfn) return outfn def net(self, prechain): ''' Create nets from the chained alignements and convert them to axt format. Also generate a liftOver file. ''' from_sizes = self.get_chr_sizes(self.from_genome) to_sizes = self.get_chr_sizes(self.to_genome) net = os.path.join(self.local_workdir, prechain + '.net') cmd = (('chainNet %s -minSpace=1 %s %s stdout /dev/null' + ' | netSyntenic stdin %s') % (bash_quote(prechain), bash_quote(from_sizes), bash_quote(to_sizes), bash_quote(net))) # This may fail for spurious reasons (e.g. absence of # /proc/self/stat on non-linux machines). try: LOGGER.info("Running chainNet and netSyntenic on prechain file.") call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) except CalledProcessError, err: LOGGER.warning("chainNet or netSyntenic raised exception: %s", err) if not os.path.exists(net): raise StandardError( "chainNet/netSyntenic failed to create output net file %s" % net) axt = os.path.join( self.local_workdir, "%s.%s.net.axt" % (filebasename(self.from_genome), filebasename(self.to_genome))) from_2bit = self.get_2bit(self.from_genome) to_2bit = self.get_2bit(self.to_genome) LOGGER.info('Converting to axt format.') cmd = ('netToAxt %s %s %s %s stdout | axtSort stdin %s' % (bash_quote(net), bash_quote(prechain), bash_quote(from_2bit), bash_quote(to_2bit), bash_quote(axt))) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH'], shell=True) # These are cheap to generate and store, but potentially very useful later. LOGGER.info('Creating liftOver file.') liftover = os.path.join(self.local_workdir, prechain + '.liftOver') cmd = ('netChainSubset', net, prechain, liftover) call_subprocess(cmd, tmpdir=self.local_tempdir, path=os.environ['PATH']) return axt