def align_bwa_mem(global_config, read1, read2, reference, threads, dryrun): aligner = "bwa" if "bwa" in global_config["Tools"]: aligner = global_config["Tools"]["bwa"]["bin"] elif not common.which("bwa"): sys.exit("error while trying to run bwa mem: bwa not present in the path and not in global config, please make sure to install bwa properly") samtools = "samtools" if "samtools" in global_config["Tools"]: samtools = global_config["Tools"]["samtools"]["bin"] elif not common.which("samtools"): sys.exit("error while trying to run samtools: bwa not present in the path and not in global config, please make sure to install bwa properly") # extract base name libraryBase = "" if read2: libraryBase = os.path.basename(read1).split("_1.fastq")[0] else: libraryBase = os.path.basename(read1).split(".fastq")[0] if not os.path.exists(libraryBase): os.makedirs(libraryBase) os.chdir(libraryBase) mappingBase = "{}_to_{}".format(libraryBase, os.path.basename(reference).split(".fasta")[0]) BAMsorted = "{}.bam".format(mappingBase) BAMunsorted = "{}.unsorted.bam".format(mappingBase) SAMMapped = "{}.unsorted.sam".format(mappingBase) if os.path.exists(os.path.abspath(BAMsorted)): BAMsorted = os.path.abspath(BAMsorted) os.chdir("..") return BAMsorted bwa_mem_command = [aligner, "mem", "-M", "-t", "{}".format(threads), reference, read1, read2] samtools_view_command = [samtools, "view", "-b", "-S", "-u", "-"] if not os.path.exists(BAMunsorted): command = "{} | {} > {}".format(" ".join(bwa_mem_command), " ".join(samtools_view_command), BAMunsorted) bwa_stdOut = open("bwa.stdOut", "w") bwa_stdErr = open("bwa.stdErr", "w") common.print_command(command) if not dryrun: subprocess.call(command, shell=True, stdout=bwa_stdOut, stderr=bwa_stdErr) samtools_sort_command = [samtools, "sort", "-@", "{}".format(threads), "-m" , "1G", BAMunsorted, mappingBase] command = " ".join(samtools_sort_command) if not os.path.exists(BAMsorted): stdOut = open("sam_sort.stdOut", "w") stdErr = open("sam_sort.stdErr", "w") common.print_command(command) if not dryrun: subprocess.call(command, shell=True, stdout=stdOut, stderr=stdErr) if os.path.exists(BAMsorted) and os.path.exists(BAMunsorted) : subprocess.call(["rm", BAMunsorted]) BAMsorted = os.path.abspath(BAMsorted) os.chdir("..") return BAMsorted
def get_hmmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, hmmer_cfg, n_nodes=None): name = 'hmmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) hmmscan_exc = which('hmmscan') if n_nodes is None: parallel_cmd = parallel_fasta(input_filename, n_threads) else: parallel_cmd = multinode_parallel_fasta(input_filename, n_threads, n_nodes) stat = output_filename + '.out' cmd = [parallel_cmd, hmmscan_exc, '--cpu', '1', '--domtblout', '/dev/stdout', '-E', str(cutoff), '-o', stat, db_filename, '/dev/stdin', '>', output_filename] cmd = ' '.join(cmd) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename+'.h3p'], 'targets': [output_filename, stat], 'clean': [clean_targets]}
def make_doc(source_path, doc_path, packages): if not common.which(DOC_PROGRAM): console.error('[' + DOC_PROGRAM + '] is not available.') console.error('Please make sure [' + DOC_PROGRAM + '] is in your python path') return if not os.path.exists(doc_path): os.mkdir(doc_path) # List up packages with its absolute path packages_by_name = { p.name: source_path + '/' + path for path, p in packages.iteritems() } doc_output = {} console.pretty_println('Generating documents in ' + doc_path, console.cyan) for name, path in packages_by_name.items(): console.pretty_println(' ' + name) output = generate_doc(name, path, doc_path) doc_output[name] = output generates_index_page(doc_path, packages_by_name.keys()) console.pretty_println('') console.pretty_println( 'Document generation result. 0 may mean error. But it is fine most of time', console.bold_white) for name, err in doc_output.items(): console.pretty_print(name, console.cyan) console.pretty_print(' : ') console.pretty_println(str(err))
def main(): args = parse_args() print("Building hermes using {} into {}".format( args.build_system, args.hermes_build_dir + os.path.sep)) try: os.mkdir(args.hermes_build_dir) except OSError: # It's alright if the file already exists. pass cmake_flags = args.cmake_flags.split() + [ "-DLLVM_BUILD_DIR=" + args.llvm_build_dir, "-DLLVM_SRC_DIR=" + args.llvm_src_dir, "-DCMAKE_BUILD_TYPE=" + args.build_type, ] if args.is_32_bit: cmake_flags += ["-DLLVM_BUILD_32_BITS=On"] if (platform.system() == "Windows" and platform.machine().endswith("64") and is_visual_studio(args.build_system)): cmake_flags += ["-Thost=x64"] if not args.distribute: cmake_flags += ["-DLLVM_ENABLE_ASSERTIONS=On"] if args.enable_asan: cmake_flags += ["-DLLVM_USE_SANITIZER=Address"] if args.opcode_stats: cmake_flags += ["-DHERMESVM_PROFILER_OPCODE=On"] if args.basic_block_profiler: cmake_flags += ["-DHERMESVM_PROFILER_BB=On"] if args.warnings_as_errors: cmake_flags += ["-DHERMES_ENABLE_WERROR=On"] if args.static_link: cmake_flags += ["-DHERMES_STATIC_LINK=On"] if args.fbsource_dir: cmake_flags += ["-DFBSOURCE_DIR=" + args.fbsource_dir] if args.icu_root: cmake_flags += ["-DICU_ROOT=" + args.icu_root] elif (os.environ.get("SANDCASTLE") and platform.system() != "macos" and platform.system() != "Windows"): raise Exception("No ICU path provided on sandcastle") print("CMake flags: {}".format(" ".join(cmake_flags))) hermes_src_dir = os.path.realpath(__file__) # The hermes directory is three directories up from this file. # If this file is moved, make sure to update this. for _ in range(3): hermes_src_dir = os.path.dirname(hermes_src_dir) cmake = which("cmake") # Print the CMake version to assist in diagnosing issues. print("CMake version:\n{}".format( subprocess.check_output([cmake, "--version"], stderr=subprocess.STDOUT))) run_command( [cmake, hermes_src_dir, "-G", args.build_system] + cmake_flags, env=os.environ, cwd=args.hermes_build_dir, )
def main(): """ Script to push the content of the build folder to a publishing site using rsync. The tool is assued to be installed in the system. Config variables: publish_src_folder: source folder to publish (Rsync notation) publish_dst_folder: destination folder to publish (Rsync notation) """ # Check that rsync exists and can be executed if common.which('rsync') == None: print 'This script requires the application rsync to be installed and' print 'available in your system.' sys.exit(1) # Load config and arguments (config, args) = common.load_config_file(cmd_args=sys.argv[1:]) # This script does not need any additional arguments if args != []: print 'WARNING: Script ignoring the following arguments:' print ' ', '\n '.join(args) do(config)
def get_lastal_task(query, db, out_fn, translate, n_threads, lastal_cfg): '''Create a pydoit task to run lastal Args: query (str): The file with the query sequences. db (str): The database file prefix. out_fn (str): Destination file for alignments. translate (bool): True if query is a nucleotide FASTA. n_threads (int): Number of threads to run with. lastal_cfg (dict): Config, must contain key params holding str. Returns: dict: A pydoit task. ''' exc = which('lastal') params = lastal_cfg['params'] if translate: params += ' -F' + str(lastal_cfg['frameshift']) cmd = '{exc} {params} {db} {query} > {out_fn}'.format(**locals()) name = 'lastal:' + os.path.join(out_fn) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [out_fn], 'file_dep': [db + '.sds'], 'clean': [clean_targets]}
def read_winetricks(): winetricks = common.which('winetricks') with open(winetricks, 'r') as _file: data = _file.read() if 'w_metadata' not in data: raise EnvironmentError, "Winetricks is too old, can't parse." return data
def get_lastal_task(query, db, out_fn, translate, cutoff, n_threads, lastal_cfg): '''Create a pydoit task to run lastal Args: query (str): The file with the query sequences. db (str): The database file prefix. out_fn (str): Destination file for alignments. translate (bool): True if query is a nucleotide FASTA. n_threads (int): Number of threads to run with. lastal_cfg (dict): Config, must contain key params holding str. Returns: dict: A pydoit task. ''' exc = which('lastal') params = lastal_cfg['params'] if translate: params += ' -F' + str(lastal_cfg['frameshift']) if cutoff is not None: cutoff = 1.0 / cutoff params += ' -D' + str(cutoff) cmd = '{exc} {params} {db} {query} > {out_fn}'.format(**locals()) name = 'lastal:' + os.path.join(out_fn) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [out_fn], 'file_dep': [db + '.prj'], 'clean': [clean_targets] }
def get_lastdb_task(db_fn, db_out_prefix, lastdb_cfg, prot=True): '''Create a pydoit task to run lastdb. WARNING: This does not define a file_dep, to make sure it doesn't get executed when the dependency and targets already exist. This means that if a task acquires the database, it MUST be defined before the lastdb task. Args: db_fn (str): The FASTA file to format. db_out_prefix (str): Prefix for the database files. lastdb_cfg (dict): Config for the command. Shoud contain an entry named "params" storing a str. prot (bool): True if a protein FASTA, False otherwise. Returns: dict: A pydoit task. ''' exc = which('lastdb') params = lastdb_cfg['params'] if prot: params += ' -p' cmd = '{exc} {params} {db_out_prefix} {db_fn}'.format(**locals()) name = 'lastdb:' + os.path.basename(db_out_prefix) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [db_out_prefix + ext \ for ext in \ ['.bck', '.des', '.prj', '.sds', '.ssp', '.suf', '.tis']], 'uptodate': [True], 'clean': [clean_targets]}
def get_cmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, infernal_cfg, n_nodes=None): name = 'cmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('cmscan') if n_nodes is None: parallel_cmd = parallel_fasta(input_filename, n_threads) else: parallel_cmd = multinode_parallel_fasta(input_filename, n_threads, n_nodes) stat = output_filename + '.cmscan.out' cmd = [ parallel_cmd, exc, '--cpu', '1', '--rfam', '--nohmmonly', '-E', str(cutoff), '--tblout', '/dev/stdout', '-o', stat, db_filename, '/dev/stdin', '>', output_filename ] cmd = ' '.join(cmd) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.i1p'], 'targets': [output_filename, stat], 'clean': [clean_targets] }
def get_hmmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, hmmer_cfg, pbs=None): name = 'hmmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) hmmscan_exc = which('hmmscan') stat = output_filename + '.out' cmd = [ hmmscan_exc, '--cpu', '1', '--domtblout', '/dev/stdout', '-E', str(cutoff), '-o', stat, db_filename, '/dev/stdin' ] cmd = parallel_fasta(input_filename, output_filename, cmd, n_threads, pbs=pbs) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.h3p'], 'targets': [output_filename, stat], 'clean': [clean_targets] }
def _run_pileup(global_config, bamfile): """ Perform samtools pileup on a .bam-file """ if "samtools" in global_config["Tools"]: samtools = global_config["Tools"]["samtools"]["bin"] elif not common.which("samtools"): sys.exit( "error while trying to run samtools: samtools not present in the path and not in global config, please make sure to install samtools properly" ) pileupfile = bamfile.replace('.bam', '_coverage.csv') pileup_cmd = "{} mpileup {} | awk '{print $2, $4}' > {}".format( samtools, bamfile, pileupfile) p1 = subprocess.Popen(pileup_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p1.wait() if p1.returncode == 0: return pileupfile else: print "Could not perform mpileup" return 1
def make_doc(source_path, doc_path, packages): if not common.which(DOC_PROGRAM): console.error('[' + DOC_PROGRAM + '] is not available.') console.error('Please make sure [' + DOC_PROGRAM + '] is in your python path') return if not os.path.exists(doc_path): os.mkdir(doc_path) # List up packages with its absolute path packages_by_name = {p.name: source_path + '/' + path for path, p in packages.iteritems()} doc_output = {} console.pretty_println('Generating documents in ' + doc_path, console.cyan) for name, path in packages_by_name.items(): console.pretty_println(' ' + name) output = generate_doc(name, path, doc_path) doc_output[name] = output generates_index_page(doc_path, packages_by_name.keys()) console.pretty_println('') console.pretty_println('Document generation result. 0 may mean error. But it is fine most of time', console.bold_white) for name, err in doc_output.items(): console.pretty_print(name, console.cyan) console.pretty_print(' : ') console.pretty_println(str(err))
def get_lastdb_task(db_fn, db_out_prefix, lastdb_cfg, prot=True): '''Create a pydoit task to run lastdb. WARNING: This does not define a file_dep, to make sure it doesn't get executed when the dependency and targets already exist. This means that if a task acquires the database, it MUST be defined before the lastdb task. Args: db_fn (str): The FASTA file to format. db_out_prefix (str): Prefix for the database files. lastdb_cfg (dict): Config for the command. Shoud contain an entry named "params" storing a str. prot (bool): True if a protein FASTA, False otherwise. Returns: dict: A pydoit task. ''' exc = which('lastdb') params = lastdb_cfg['params'] if prot: params += ' -p' cmd = '{exc} {params} {db_out_prefix} {db_fn}'.format(**locals()) name = 'lastdb:' + os.path.basename(db_out_prefix) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': ['{0}.prj'.format(db_out_prefix)], 'uptodate': [True], 'clean': [clean_targets] }
def main(): """ Script to push the content of the build folder to a publishing site using rsync. The tool is assued to be installed in the system. Config variables: publish_src_folder: source folder to publish (Rsync notation) publish_dst_folder: destination folder to publish (Rsync notation) """ # Check that rsync exists and can be executed if common.which('rsync') == None: print 'This script requires the application rsync to be installed and' print 'available in your system.' sys.exit(1) # Load config and arguments (config, args) = common.load_config_file(cmd_args = sys.argv[1:]) # This script does not need any additional arguments if args != []: print 'WARNING: Script ignoring the following arguments:' print ' ', '\n '.join(args) do(config)
def parallel_fasta(input_filename, n_jobs): file_size = 'S=`stat -c "%%s" {0}`; B=`expr $S / {1}`;'.format(input_filename, n_jobs) exc = which('parallel') cmd = [file_size, 'cat', input_filename, '|', exc, '--block', '$B', '--pipe', '--recstart', '">"', '--gnu', '-j', str(n_jobs)] return ' '.join(cmd)
def __init__(self, command_arg, name=None, env=None, cwd=None, executable='wine', output_to_shell=False, use_log=True, cpu_limit=None): """ Run a program in a separate thread and monitor its state.""" if output_to_shell is True and use_log is True: raise ValueError("output_to_shell and use_log can't both be True") if env is None: env = common.ENV if name is None: #name = command_arg.split()[0].split('\\')[-1] if type(command_arg) in (str, unicode): programs.isolate_executable_from_command(command_arg) else: name = command_arg[0] try: name = util.get_program_name(util.wintounix(name)) except (IOError, TypeError): name = '' self.name = name print(executable) if executable is not None and len(executable): if executable == 'wine' and 'WINE' in env: executable = common.ENV['WINE'] if type(command_arg) in (__builtin__.list, tuple): command_arg = [executable] + command_arg else: command_arg = "%s '%s'" % (executable, command_arg) print(executable) if cpu_limit is not None and type(cpu_limit) is int and cpu_limit > 0: if common.which('taskset'): command_arg = ['taskset', str(cpu_limit)] + command_arg print("Limiting process to {0} CPUs.".format(cpu_limit)) else: print(("Couldn't limit process to {0} CPUs, " + "taskset isn't installed." ), file=sys.stderr) self.has_standard_output = True self.prefix = env.get('WINEPREFIX', None) self.child = command.run(command_arg, name = name, env = env, cwd = cwd, output_to_shell = output_to_shell, use_logfiles = use_log ) self.pid = self.child.pid self.start_time = self.child.start_time if use_log is True: self._create_info_file() RUNNING_PROGRAMS[self.child.log_filename_base] = self # Clean up RUNNING_PROGRAMS update_list()
def build_git_command(http_proxy): # Otherwise, trust that the user has git on the path. command = [which("git")] if http_proxy: command += ["-c", "http.proxy={}".format(http_proxy)] if platform.system() == "Windows": command += ["-c", "core.filemode=false"] command += ["-c", "core.autocrlf=false"] return command
def findCMake(): if which("cmake") == None: possiblePaths = ["/Applications/CMake.app/Contents/bin/cmake"] for path in possiblePaths: if os.path.exists(path): return path # Fall back to PATH - may fail later return "cmake"
def multinode_parallel_fasta(input_filename, ppn, nodes): file_size = 'S=`stat -c "%%s" {0}`; B=`expr $S / {1}`;'.format(input_filename, nodes * ppn) exc = which('parallel') cmd = [file_size, 'cat', input_filename, '|', exc, '--block', '$B', '--pipe', '--recstart', '">"', '--gnu', '--jobs', str(ppn), '--sshloginfile $PBS_NODEFILE', '--workdir $PWD'] return ' '.join(cmd)
def multinode_parallel_fasta(input_filename, ppn, nodes): file_size = 'S=`stat -c "%%s" {0}`; B=`expr $S / {1}`;'.format( input_filename, nodes * ppn) exc = which('parallel') cmd = [ file_size, 'cat', input_filename, '|', exc, '--block', '$B', '--pipe', '--recstart', '">"', '--gnu', '--jobs', str(ppn), '--sshloginfile $PBS_NODEFILE', '--workdir $PWD' ] return ' '.join(cmd)
def findCMake (): if which("cmake") == None: possiblePaths = [ "/Applications/CMake.app/Contents/bin/cmake" ] for path in possiblePaths: if os.path.exists(path): return path # Fall back to PATH - may fail later return "cmake"
def get_cmpress_task(db_filename, infernal_cfg): exc = which('cmpress') cmd = '{exc} {db_filename}'.format(**locals()) return {'name': 'cmpress:' + os.path.basename(db_filename), 'title': title_with_actions, 'actions': [cmd], 'targets': [db_filename + ext for ext in ['.i1f', '.i1i', '.i1m', '.i1p']], 'uptodate': [True], 'clean': [clean_targets]}
def parallel_fasta(input_filename, n_jobs): file_size = 'S=`stat -c "%%s" {0}`; B=`expr $S / {1}`;'.format( input_filename, n_jobs) exc = which('parallel') cmd = [ file_size, 'cat', input_filename, '|', exc, '--block', '$B', '--pipe', '--recstart', '">"', '--gnu', '-j', str(n_jobs) ] return ' '.join(cmd)
def open_window(xterm, gdb, journal, name, print_tokens=False, valgrind=None): if valgrind == None: executable, args = split_first(gdb.open_window_cmd(name), ' ') else: executable = which('valgrind') args = '--log-file=%s' % valgrind + ' ' + gdb.open_window_cmd(name) return XtermMcgdbWin(xterm, executable, args, journal, name, print_tokens=print_tokens)
def get_hmmpress_task(db_filename, hmmer_cfg): name = 'hmmpress:' + os.path.basename(db_filename) exc = which('hmmpress') cmd = '{exc} {db_filename}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [db_filename + ext for ext in ['.h3f', '.h3i', '.h3m', '.h3p']], 'uptodate': [True], 'clean': [clean_targets]}
def get_lastal_task(query, db, out_fn, cfg, translate=False, cutoff=0.00001, n_threads=1, n_nodes=None): '''Create a pydoit task to run lastal Args: query (str): The file with the query sequences. db (str): The database file prefix. out_fn (str): Destination file for alignments. translate (bool): True if query is a nucleotide FASTA. n_threads (int): Number of threads to run with. cfg (dict): Config, must contain key params holding str. Returns: dict: A pydoit task. ''' lastal_exc = which('lastal') params = cfg['params'] lastal_cmd = [lastal_exc] if translate: lastal_cmd.append('-F' + str(cfg['frameshift'])) if cutoff is not None: cutoff = round(1.0 / cutoff, 2) lastal_cmd.append('-D' + str(cutoff)) lastal_cmd.append(db) lastal_cmd = '"{0}"'.format(' '.join(lastal_cmd)) if n_nodes is None: parallel = parallel_fasta(query, n_threads) else: parallel = multinode_parallel_fasta(query, n_threads, n_nodes) cmd = [parallel, lastal_cmd, '<', query, '>', out_fn] cmd = ' '.join(cmd) name = 'lastal:{0}'.format(os.path.join(out_fn)) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [out_fn], 'file_dep': [db + '.prj'], 'clean': [clean_targets] }
def get_crb_blast_task(query, target, output, cutoff, crb_blast_cfg, n_threads): name = 'crb-blast:{0}.x.{1}'.format(query, target) exc = which('crb-blast') cmd = '{exc} --query {query} --target {target} --output {output} '\ '--evalue {cutoff} --threads {n_threads}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [output], 'file_dep': [query, target], 'clean': [clean_targets]}
def getCommand(): global command global checked if not checked: checked = True (id, release) = common.getLinuxDistrib() if id == "LinuxMint": command = "/usr/bin/highlight" common.onWarning("LinuxMint detected. Workaround to find 'highlight' command in /usr/bin/") else: command = common.which("highlight") if not command: common.onWarning("no highlight command found: code will not be colored.") return command
def get_cmpress_task(db_filename, infernal_cfg): exc = which('cmpress') cmd = '{exc} {db_filename}'.format(**locals()) return { 'name': 'cmpress:' + os.path.basename(db_filename), 'title': title_with_actions, 'actions': [cmd], 'targets': [db_filename + ext for ext in ['.i1f', '.i1i', '.i1m', '.i1p']], 'uptodate': [True], 'clean': [clean_targets] }
def get_transdecoder_orf_task(input_filename, transdecoder_cfg): name = 'TransDecoder.LongOrfs:' + os.path.basename(input_filename) min_prot_len = transdecoder_cfg['min_prot_len'] exc = which('TransDecoder.LongOrfs') cmd = '{exc} -t {input_filename} -m {min_prot_len}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename], 'targets': [input_filename + '.transdecoder_dir/longest_orfs.pep'], 'clean': [(clean_folder, [input_filename + '.transdecoder_dir'])]}
def compute_point_cloud(cloud, heights, rpc, H=None, crop_colorized='', off_x=None, off_y=None, ascii_ply=False, with_normals=False): """ Computes a color point cloud from a height map. Args: cloud: path to the output points cloud (ply format) heights: height map, sampled on the same grid as the crop_colorized image. In particular, its size is the same as crop_colorized. rpc: path to xml file containing RPC data for the current Pleiade image H (optional, default None): path to the file containing the coefficients of the homography transforming the coordinates system of the original full size image into the coordinates system of the crop we are dealing with. crop_colorized (optional, default ''): path to a colorized crop of a Pleiades image off_{x,y} (optional, default None): coordinates of the point we want to use as origin in the local coordinate system of the computed cloud ascii_ply (optional, default false): boolean flag to tell if the output ply file should be encoded in plain text (ascii). """ hij = " ".join([str(x) for x in np.loadtxt(H).flatten()]) if H else "" asc = "--ascii" if ascii_ply else "" nrm = "--with-normals" if with_normals else "" command = "colormesh %s %s %s %s -h \"%s\" %s %s" % ( cloud, heights, rpc, crop_colorized, hij, asc, nrm) if off_x: command += " --offset_x %d" % off_x if off_y: command += " --offset_y %d" % off_y common.run(command) # if LidarViewer is installed, convert the point cloud to its format # this is useful for huge point clouds if crop_colorized and common.which('LidarPreprocessor'): tmp = cfg['temporary_dir'] nthreads = multiprocessing.cpu_count() cloud_lidar_viewer = "%s.lidar_viewer" % os.path.splitext(cloud)[0] common.run( "LidarPreprocessor -to %s/LidarO -tp %s/LidarP -nt %d %s -o %s" % (tmp, tmp, nthreads, cloud, cloud_lidar_viewer)) return
def build_reference_bwa(global_config, sample_config): #build the reference if not available reference = sample_config["reference"] program = "bwa" if "bwa" in global_config["Tools"]: program = global_config["Tools"]["bwa"]["bin"] elif not common.which("bwa"): sys.exit( "error while trying to run bwa index: bwa not present in the path and not in global config, please make sure to install bwa properly" ) # check if reference provided exisists reference = os.path.abspath(reference) if not os.path.exists(reference): sys.exit("error, reference file {} does not exists".format(reference)) # check if bwa index already created current_dir = os.getcwd() path_name, base_name = os.path.split(reference) bwa_index_folder = os.path.join(path_name, "bwa") #if needed create directory if not os.path.exists(bwa_index_folder): os.makedirs(bwa_index_folder) os.chdir(bwa_index_folder) # if needed soft link the reference if not os.path.exists(base_name): returnValue = subprocess.call(["ln", "-s", reference, base_name]) if not returnValue == 0: sys.exit("error while trying to soft link reference sequence") reference = os.path.join(path_name, "bwa", base_name) # now I have a soflinked copy # now check if index alredy build or not if not os.path.exists( "{}.bwt".format(reference)): # then create the index sequence bwa_stdOut = open("bwa_index.stdOut", "w") bwa_stdErr = open("bwa_index.stdErr", "w") command = [program, "index", reference] common.print_command(command) if not common.check_dryrun(sample_config): returnValue = subprocess.call(command, stdout=bwa_stdOut, stderr=bwa_stdErr) if not returnValue == 0: sys.exit( "error, while indexing reference file {} with bwa index". format(reference)) #extra control to avoid problem with unexpected return value if not os.path.exists("{}.bwt".format(reference)): sys.exit("bwa index failed") os.chdir(current_dir) return reference
def mount_iso(iso_path): """Mounts the iso file given and returns the path to the mount point or None if the mount failed.""" if common.which('fuseiso') and ( os.path.exists(iso_path) and os.access(iso_path, os.R_OK) ): mount_dir = get_mount_iso_path(iso_path) return_output, return_error, return_code = common.run( ['fuseiso', '-p', iso_path, mount_dir], include_return_code = True ) print(return_output) print(return_error) if return_code == 0: return mount_dir return None
def parallel_fasta(input_filename, output_filename, command, n_jobs, pbs=None): exc = which('parallel') cmd = ['cat', input_filename, '|', exc, '--round-robin', '--pipe', '-L', 2, '-N', 10000, '--gnu'] if pbs is not None: cmd.extend(['--sshloginfile', pbs, '--workdir $PWD']) else: cmd.extend(['-j', n_jobs]) cmd.extend(['-a', input_filename]) if isinstance(command, list): command = ' '.join(command) cmd.extend([command, '>', output_filename]) return ' '.join(map(str, cmd))
def get_hmmpress_task(db_filename, hmmer_cfg): name = 'hmmpress:' + os.path.basename(db_filename) exc = which('hmmpress') cmd = '{exc} {db_filename}'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [db_filename + ext for ext in ['.h3f', '.h3i', '.h3m', '.h3p']], 'uptodate': [True], 'clean': [clean_targets] }
def build_reference_bwa(global_config, sample_config): # build the reference if not available reference = sample_config["reference"] program = "bwa" if "bwa" in global_config["Tools"]: program = global_config["Tools"]["bwa"]["bin"] elif not common.which("bwa"): sys.exit( "error while trying to run bwa index: bwa not present in the path and not in global config, please make sure to install bwa properly" ) # check if reference provided exisists reference = os.path.abspath(reference) # check if I have already the bwt index if os.path.exists("{}.bwt".format(reference)): # index already present, nothing to do return reference # otherwise I need to build the reference, in this case I build it locally if not os.path.exists(reference): sys.exit("error, reference file {} does not exists".format(reference)) # check if bwa index already created current_dir = os.getcwd() path_name, base_name = os.path.split(reference) bwa_index_folder = os.path.join(path_name, "bwa") # if needed create directory if not os.path.exists(bwa_index_folder): os.makedirs(bwa_index_folder) os.chdir(bwa_index_folder) # if needed soft link the reference if not os.path.exists(base_name): returnValue = subprocess.call(["ln", "-s", reference, base_name]) if not returnValue == 0: sys.exit("error while trying to soft link reference sequence") reference = os.path.join(path_name, "bwa", base_name) # now I have a soflinked copy # now check if index alredy build or not if not os.path.exists("{}.bwt".format(reference)): # then create the index sequence bwa_stdOut = open("bwa_index.stdOut", "w") bwa_stdErr = open("bwa_index.stdErr", "w") command = [program, "index", reference] common.print_command(command) if not common.check_dryrun(sample_config): returnValue = subprocess.call(command, stdout=bwa_stdOut, stderr=bwa_stdErr) if not returnValue == 0: sys.exit("error, while indexing reference file {} with bwa index".format(reference)) # extra control to avoid problem with unexpected return value if not os.path.exists("{}.bwt".format(reference)): sys.exit("bwa index failed") os.chdir(current_dir) return reference
def get_cmscan_task(input_filename, output_filename, db_filename, n_threads, infernal_cfg): name = 'cmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('cmscan') cmd = '{exc} --cpu {n_threads} --cut_ga --rfam --nohmmonly --tblout {output_filename}'\ ' {db_filename} {input_filename} > {output_filename}.cmscan'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.i1p'], 'targets': [output_filename, output_filename + '.cmscan'], 'clean': [clean_targets]}
def get_transdecoder_orf_task(input_filename, transdecoder_cfg): name = 'TransDecoder.LongOrfs:' + os.path.basename(input_filename) min_prot_len = transdecoder_cfg['min_prot_len'] exc = which('TransDecoder.LongOrfs') cmd = '{exc} -t {input_filename} -m {min_prot_len}'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename], 'targets': [input_filename + '.transdecoder_dir/longest_orfs.pep'], 'clean': [(clean_folder, [input_filename + '.transdecoder_dir'])] }
def get_crb_blast_task(query, target, output, cutoff, crb_blast_cfg, n_threads): name = 'crb-blast:{0}.x.{1}'.format(query, target) exc = which('crb-blast') cmd = '{exc} --query {query} --target {target} --output {output} '\ '--evalue {cutoff} --threads {n_threads}'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [output], 'file_dep': [query, target], 'clean': [clean_targets] }
def get_hmmscan_task(input_filename, output_filename, db_filename, n_threads, hmmer_cfg): name = 'hmmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('hmmscan') stat = output_filename + '.out' cmd = '{exc} --cpu {n_threads} --domtblout {output_filename}'\ ' -o {stat} {db_filename} {input_filename}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename+'.h3p'], 'targets': [output_filename, stat], 'clean': [clean_targets]}
def parallel_fasta(input_filename, output_filename, command, n_jobs, pbs=None): exc = which('parallel') cmd = [ 'cat', input_filename, '|', exc, '--round-robin', '--pipe', '-L', 2, '-N', 10000, '--gnu' ] if pbs is not None: cmd.extend(['--sshloginfile', pbs, '--workdir $PWD']) else: cmd.extend(['-j', n_jobs]) cmd.extend(['-a', input_filename]) if isinstance(command, list): command = ' '.join(command) cmd.extend([command, '>', output_filename]) return ' '.join(map(str, cmd))
def get_cmscan_task(input_filename, output_filename, db_filename, n_threads, infernal_cfg): name = 'cmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('cmscan') cmd = '{exc} --cpu {n_threads} --cut_ga --rfam --nohmmonly --tblout {output_filename}'\ ' {db_filename} {input_filename} > {output_filename}.cmscan'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.i1p'], 'targets': [output_filename, output_filename + '.cmscan'], 'clean': [clean_targets] }
def get_busco_task(input_filename, output_name, busco_db_dir, input_type, n_threads, busco_cfg): name = 'busco:' + os.path.basename(input_filename) + '-' + os.path.basename(busco_db_dir) assert input_type in ['genome', 'OGS', 'trans'] exc = which('BUSCO_v1.1b1.py') # BUSCO chokes on file paths as output names output_name = os.path.basename(output_name) cmd = 'python3 {exc} -in {input_filename} -f -o {output_name} -l {busco_db_dir} '\ '-m {input_type} -c {n_threads}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename], 'uptodate': [run_once], 'clean': [(clean_folder, ['run_' + output_name])]}
def get_hmmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, hmmer_cfg): name = 'hmmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('hmmscan') stat = output_filename + '.out' cmd = '{exc} --cpu {n_threads} --domtblout {output_filename} -E {cutoff}'\ ' -o {stat} {db_filename} {input_filename}'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.h3p'], 'targets': [output_filename, stat], 'clean': [clean_targets] }
def get_hmmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, hmmer_cfg, pbs=None): name = 'hmmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) hmmscan_exc = which('hmmscan') stat = output_filename + '.out' cmd = [hmmscan_exc, '--cpu', '1', '--domtblout', '/dev/stdout', '-E', str(cutoff), '-o', stat, db_filename, '/dev/stdin'] cmd = parallel_fasta(input_filename, output_filename, cmd, n_threads, pbs=pbs) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename+'.h3p'], 'targets': [output_filename, stat], 'clean': [clean_targets]}
def compute_point_cloud(cloud, heights, rpc, H=None, crop_colorized='', off_x=None, off_y=None, ascii_ply=False, with_normals=False): """ Computes a color point cloud from a height map. Args: cloud: path to the output points cloud (ply format) heights: height map, sampled on the same grid as the crop_colorized image. In particular, its size is the same as crop_colorized. rpc: path to xml file containing RPC data for the current Pleiade image H (optional, default None): path to the file containing the coefficients of the homography transforming the coordinates system of the original full size image into the coordinates system of the crop we are dealing with. crop_colorized (optional, default ''): path to a colorized crop of a Pleiades image off_{x,y} (optional, default None): coordinates of the point we want to use as origin in the local coordinate system of the computed cloud ascii_ply (optional, default false): boolean flag to tell if the output ply file should be encoded in plain text (ascii). """ hij = " ".join([str(x) for x in np.loadtxt(H).flatten()]) if H else "" asc = "--ascii" if ascii_ply else "" nrm = "--with-normals" if with_normals else "" command = "colormesh %s %s %s %s -h \"%s\" %s %s" % (cloud, heights, rpc, crop_colorized, hij, asc, nrm) if off_x: command += " --offset_x %d" % off_x if off_y: command += " --offset_y %d" % off_y common.run(command) # if LidarViewer is installed, convert the point cloud to its format # this is useful for huge point clouds if crop_colorized and common.which('LidarPreprocessor'): tmp = cfg['temporary_dir'] nthreads = multiprocessing.cpu_count() cloud_lidar_viewer = "%s.lidar_viewer" % os.path.splitext(cloud)[0] common.run("LidarPreprocessor -to %s/LidarO -tp %s/LidarP -nt %d %s -o %s" % ( tmp, tmp, nthreads, cloud, cloud_lidar_viewer)) return
def get_lastal_task(query, db, out_fn, cfg, translate=False, cutoff=0.00001, n_threads=1, n_nodes=None): '''Create a pydoit task to run lastal Args: query (str): The file with the query sequences. db (str): The database file prefix. out_fn (str): Destination file for alignments. translate (bool): True if query is a nucleotide FASTA. n_threads (int): Number of threads to run with. cfg (dict): Config, must contain key params holding str. Returns: dict: A pydoit task. ''' lastal_exc = which('lastal') params = cfg['params'] lastal_cmd = [lastal_exc] if translate: lastal_cmd.append('-F' + str(cfg['frameshift'])) if cutoff is not None: cutoff = round(1.0 / cutoff, 2) lastal_cmd.append('-D' + str(cutoff)) lastal_cmd.append(db) lastal_cmd = '"{0}"'.format(' '.join(lastal_cmd)) if n_nodes is None: parallel = parallel_fasta(query, n_threads) else: parallel = multinode_parallel_fasta(query, n_threads, n_nodes) cmd = [parallel, lastal_cmd, '<', query, '>', out_fn] cmd = ' '.join(cmd) name = 'lastal:{0}'.format(os.path.join(out_fn)) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'targets': [out_fn], 'file_dep': [db + '.prj'], 'clean': [clean_targets]}
def get_transdecoder_predict_task(input_filename, db_filename, transdecoder_cfg): name = 'TransDecoder.Predict:' + os.path.basename(input_filename) orf_cutoff = transdecoder_cfg['orf_cutoff'] exc = which('TransDecoder.Predict') cmd = '{exc} -t {input_filename} --retain_pfam_hits {db_filename} \ --retain_long_orfs {orf_cutoff}'.format(**locals()) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, input_filename + '.transdecoder_dir/longest_orfs.pep', db_filename], 'targets': [input_filename + '.transdecoder' + ext \ for ext in ['.bed', '.cds', '.pep', '.gff3', '.mRNA']], 'clean': [clean_targets, (clean_folder, [input_filename + '.transdecoder_dir'])]}
def _run_pileup(global_config, bamfile): """ Perform samtools pileup on a .bam-file """ if "samtools" in global_config["Tools"]: samtools = global_config["Tools"]["samtools"]["bin"] elif not common.which("samtools"): sys.exit("error while trying to run samtools: samtools not present in the path and not in global config, please make sure to install samtools properly") pileupfile = bamfile.replace('.bam', '_coverage.csv') pileup_cmd = "{} mpileup {} | awk '{print $2, $4}' > {}".format(samtools, bamfile, pileupfile) p1 = subprocess.Popen(pileup_cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) p1.wait() if p1.returncode == 0: return pileupfile else: print "Could not perform mpileup" return 1
def winetricks_installed(): """"Return the path to winetricks, if installed, else return False. Also checks if winetricks is actually a shell script.""" winetricks_path = common.which('winetricks') if winetricks_path: return winetricks_path winetricks_path = '%s/winetricks.sh' % common.ENV['VINEYARDPATH'] if os.access(winetricks_path, os.R_OK) and os.path.getsize(winetricks_path): with open(winetricks_path, 'r') as file_obj: content = file_obj.read() if '#!' in content: runner = content[content.find('#!'):].split('\n')[0] if ( runner.endswith('sh') or runner.endswith('bash') or runner.endswith('dash') ): return winetricks_path return False
def get_cmscan_task(input_filename, output_filename, db_filename, cutoff, n_threads, infernal_cfg, pbs=None): name = 'cmscan:' + os.path.basename(input_filename) + '.x.' + \ os.path.basename(db_filename) exc = which('cmscan') stat = output_filename + '.cmscan.out' cmd = [exc, '--cpu', '1', '--rfam', '--nohmmonly', '-E', str(cutoff), '--tblout', '/dev/stdout', '-o', stat, db_filename, '/dev/stdin'] cmd = parallel_fasta(input_filename, output_filename, cmd, n_threads, pbs=pbs) return {'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename, db_filename, db_filename + '.i1p'], 'targets': [output_filename, stat], 'clean': [clean_targets]}
def get_busco_task(input_filename, output_name, busco_db_dir, input_type, n_threads, busco_cfg): name = 'busco:' + os.path.basename( input_filename) + '-' + os.path.basename(busco_db_dir) assert input_type in ['genome', 'OGS', 'trans'] exc = which('BUSCO_v1.1b1.py') # BUSCO chokes on file paths as output names output_name = os.path.basename(output_name) cmd = 'python3 {exc} -in {input_filename} -f -o {output_name} -l {busco_db_dir} '\ '-m {input_type} -c {n_threads}'.format(**locals()) return { 'name': name, 'title': title_with_actions, 'actions': [cmd], 'file_dep': [input_filename], 'uptodate': [run_once], 'clean': [(clean_folder, ['run_' + output_name])] }
def main(): """ Script to run linkchecker in a folder. It expects the following variables: linkchecker_ignore_urls linkchecker_src_folder """ # Check that rsync exists and can be executed if common.which('linkchecker') == None: print 'This script requires the application linkchecker to be', print 'installed and available in your system.' sys.exit(1) # Load config and arguments (config, args) = common.load_config_file(cmd_args = sys.argv[1:]) # This script does not need any additional arguments if args != []: print 'WARNING: Script ignoring the following arguments:' print ' ', '\n '.join(args) do(config)