def cmd_build(args, remainder): common_args = [] common_args += ['--config={}'.format(args.variant)] common_args += ['--define=version={}'.format(args.version)] common_args += ['--explain={}'.format(os.path.abspath('explain.log'))] common_args += ['--verbose_failures'] common_args += ['--verbose_explanations'] if platform.system() == 'Windows': util.check_call(['git', 'config', 'core.symlinks', 'true']) cenv = util.CondaEnv(pathlib.Path('.cenv')) cenv.create('environment-windows.yml') env = os.environ.copy() env.update(cenv.env()) else: env = None util.check_call(['bazelisk', 'test', '...'] + common_args, env=env) archive_dir = os.path.join( args.root, args.pipeline, args.build_id, 'build', args.variant, ) os.makedirs(archive_dir, exist_ok=True) shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
def DoSCPFile(file, remote_path, user, host, port=None, ssh_key=None): """Upload file to user@host:remote_path using scp. Optionally use port and ssh_key, if provided.""" cmdline = ["scp"] AppendOptionalArgsToSSHCommandline(cmdline, port, ssh_key) cmdline.extend([WindowsPathToMsysPath(file), "%s@%s:%s" % (user, host, remote_path)]) check_call(cmdline)
def run_iss(rc, iss_command): check_call(iss_command) counters = defaultdict(int) for tmp_fastq, output_fastq, r in zip(rc.tmp_files, rc.output_files, ["_1\n", "_2\n"]): annotate_and_count_reads(tmp_fastq, output_fastq, r, counters) output_summary_counters(rc.summary_file, counters) rc.cleanup()
def activate(generated_root=None, logger=None): if logger is None: logger = util.setup_logging('console') conf_file = os.path.join(generated_root, 'conserver.cf') shutil.copy(conf_file, '/etc/conserver.cf') try: util.check_call(['/etc/init.d/conserver', 'restart']) except (RuntimeError, SystemExit): raise
def main() -> None: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s:%(levelname)s: %(message)s") p = argparse.ArgumentParser() p.add_argument('--cache_dir', default=util.DEFAULT_CACHE_DIR, help='Benchmark cache dir') p.add_argument('--result_dir', default=util.DEFAULT_RESULT_DIR, help='Benchmark result dir') args = p.parse_args() util.s3_cache_files([ util.REFERENCE_DIR + '/gencode.v26.whole_genes.fa', util.REFERENCE_DIR + '/all_pair_art_lod_gpair_merged.txt' ], args.cache_dir) for sample in util.TITRATION_SAMPLES: logging.info('Start benchmark %s', sample.name) result_dir = args.result_dir + '/' + sample.name try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) if os.path.exists(result_dir + "/filtered.fa"): logging.info("Skip %s", result_dir) continue util.s3_cache_files(util.expand_fastq_files(sample.paths), args.cache_dir) cached_r1 = ",".join([ args.cache_dir + '/' + os.path.basename(fq.r1) for fq in sample.paths ]) cached_r2 = ",".join([ args.cache_dir + '/' + os.path.basename(fq.r2) for fq in sample.paths ]) cached_ref = args.cache_dir + '/gencode.v26.whole_genes.fa' cached_cosmic_fusion = args.cache_dir + '/all_pair_art_lod_gpair_merged.txt' af4_args = [ str(util.af4_path()), f'-log_dir={result_dir}', f'-pprof=:12345', f'-mutex-profile-rate=1000', f'-block-profile-rate=1000', f'-r1={cached_r1}', f'-r2={cached_r2}', f'-fasta-output={result_dir}/all.fa', f'-filtered-output={result_dir}/filtered.fa', f'-transcript={cached_ref}', f'-max-genes-per-kmer=2', f'-max-proximity-distance=1000', f'-max-proximity-genes=5', f'-unstranded-prep', f'-cosmic-fusion={cached_cosmic_fusion}' ] util.check_call(af4_args) logging.info('Finished benchmark %d: %s', sample.name) logging.info("Runtime stats: %s", util.run_stats(Path(result_dir))) for path in glob.glob(f'{args.cache_dir}/*rerun*'): try: os.remove(path) except: logging.error("failed to remove " + path)
def cmd_build(args, remainder): import yaml with open('ci/plan.yml') as file_: plan = yaml.safe_load(file_) env = os.environ.copy() variant = plan['VARIANTS'][args.variant] for key, value in variant['env'].items(): env[key] = str(value) util.printf('--- :snake: pre-build steps... ') util.printf('delete any old whl files...') wheel_dirs = [ wheel_path('plaidml').resolve(), wheel_path('plaidml/keras').resolve(), wheel_path('plaidbench').resolve(), ] wheel_clean(wheel_dirs) explain_log = 'explain.log' profile_json = 'profile.json.gz' bazel_config = variant.get('bazel_config', args.variant) common_args = [] common_args += ['--config={}'.format(bazel_config)] common_args += ['--define=version={}'.format(args.version)] common_args += ['--experimental_generate_json_trace_profile'] common_args += ['--experimental_json_trace_compression'] common_args += ['--experimental_profile_cpu_usage'] common_args += ['--explain={}'.format(explain_log)] common_args += ['--profile={}'.format(profile_json)] common_args += ['--verbose_failures'] common_args += ['--verbose_explanations'] util.printf('--- :bazel: Running Build...') if platform.system() == 'Windows': util.check_call(['git', 'config', 'core.symlinks', 'true']) cenv = util.CondaEnv(pathlib.Path('.cenv')) cenv.create('environment-windows.yml') env.update(cenv.env()) util.check_call(['bazelisk', 'test', '...'] + common_args, env=env) util.printf('--- :buildkite: Uploading artifacts...') buildkite_upload(explain_log) buildkite_upload(profile_json) for wheel_dir in wheel_dirs: buildkite_upload('*.whl', cwd=wheel_dir) archive_dir = os.path.join( args.root, args.pipeline, args.build_id, 'build', args.variant, ) os.makedirs(archive_dir, exist_ok=True) shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir)
def cmd_report(args, remainder): archive_dir = os.path.join(args.root, args.pipeline, args.build_id) cmd = ['bazelisk', 'run', '//ci:report'] cmd += ['--'] cmd += ['--pipeline', args.pipeline] cmd += ['--annotate'] cmd += [archive_dir] cmd += remainder util.check_call(cmd)
def cmd_build(args, remainder): import yaml with open('ci/plan.yml') as file_: plan = yaml.safe_load(file_) env = os.environ.copy() variant = plan['VARIANTS'][args.variant] for key, value in variant['env'].items(): env[key] = str(value) explain_log = 'explain.log' profile_json = 'profile.json.gz' bazel_config = variant.get('bazel_config', args.variant) common_args = [] common_args += ['--config={}'.format(bazel_config)] common_args += ['--define=version={}'.format(args.version)] common_args += ['--experimental_generate_json_trace_profile'] common_args += ['--experimental_json_trace_compression'] common_args += ['--experimental_profile_cpu_usage'] common_args += ['--explain={}'.format(explain_log)] common_args += ['--profile={}'.format(profile_json)] common_args += ['--verbose_failures'] common_args += ['--verbose_explanations'] util.printf('--- :bazel: Running Build...') if platform.system() == 'Windows': util.check_call(['git', 'config', 'core.symlinks', 'true']) cenv = util.CondaEnv(pathlib.Path('.cenv')) cenv.create('environment-windows.yml') env.update(cenv.env()) util.check_call(['bazelisk', 'test', '...'] + common_args, env=env) util.printf('--- :buildkite: Uploading artifacts...') buildkite_upload(explain_log) buildkite_upload(profile_json) shutil.rmtree('tmp', ignore_errors=True) tarball = os.path.join('bazel-bin', 'pkg.tar.gz') with tarfile.open(tarball, "r") as tar: wheels = [] for item in tar.getmembers(): if item.name.endswith('.whl'): wheels.append(item) tar.extractall('tmp', members=wheels) buildkite_upload('*.whl', cwd='tmp') archive_dir = os.path.join( args.root, args.pipeline, args.build_id, 'build', args.variant, ) os.makedirs(archive_dir, exist_ok=True) shutil.copy(tarball, archive_dir)
def copy_files(src_files: List[str], dest_dir: str) -> None: basenames: Set[str] = set() for path in src_files: basename = os.path.basename(path) if basename in basenames: raise Exception("Duplicate filename: " + path) basenames.add(basename) logging.info("%s -> %s", src_files, dest_dir) util.check_call([str(util.grail_file_path()), "cp", "-v"] + src_files + [dest_dir])
def cmd_report(args, remainder): workdir = pathlib.Path('tmp').resolve() make_all_wheels(workdir) archive_dir = os.path.join(args.root, args.pipeline, args.build_id) cmd = ['bazelisk', 'run', '//ci:report'] cmd += ['--'] cmd += ['--pipeline', args.pipeline] cmd += ['--annotate'] cmd += [archive_dir] cmd += remainder util.check_call(cmd, stderr=subprocess.DEVNULL)
def cmd_report(args, remainder): workdir = pathlib.Path('tmp').resolve() make_all_wheels(workdir) download_test_artifacts('tmp/test/**/*') cmd = ['bazelisk', 'run', '//ci:report'] cmd += ['--'] cmd += ['--pipeline', args.pipeline] cmd += ['--annotate'] cmd += [str(workdir)] cmd += remainder util.check_call(cmd, stderr=subprocess.DEVNULL)
def fetch_versioned_accession_id(vaccid): # e.g., "NC_004325.2" output_file = f"{vaccid}.fa" if os.path.isfile(output_file): print(f"{output_file} already exists, nice") else: try: command = f"ncbi-acc-download --format fasta {vaccid} -e all" check_call(command) except: remove_safely(output_file) raise return output_file
def run(self): failed = False if self.distribution.subpackages != None: for idx in range(len(sys.argv)): if 'setup.py' in sys.argv[idx]: break argv = list(sys.argv[idx+1:]) build = self.get_finalized_command('build') failed = process_subpackages(build.distribution.parallel_build, 'test', build.build_base, self.distribution.subpackages, argv, False) ## PYTHON if self._has_python_tests(): self.run_command('build') build = self.get_finalized_command('build') build_dir = build.build_base environ = self.distribution.environment pkg_dirs = [build_dir, build.build_lib, os.path.join(build_dir, 'python')] lib_dirs = [build.build_temp] try: lib_dirs += environ['PATH'] # FIXME need boost, etc dlls for windows except: pass try: lib_dirs.append(os.path.join(environ['MINGW_DIR'], 'bin')) lib_dirs.append(os.path.join(environ['MSYS_DIR'], 'bin')) lib_dirs.append(os.path.join(environ['MSYS_DIR'], 'lib')) except: pass postfix = '.'.join(build.build_temp.split('.')[1:]) for pkg, units in self._get_python_tests(): test_dir = os.path.join(build_dir, 'test_' + pkg) if not os.path.exists(test_dir): util.copy_tree('test', test_dir, excludes=['.svn*', 'CVS*']) f = open(os.path.join(test_dir, '__init__.py'), 'w') f.write("__all__ = ['" + "', '".join(units) + "']\n") f.close() outfile = os.path.join(build_dir, 'test_' + pkg + '.py') util.create_testscript('test_' + pkg, units, outfile, pkg_dirs) wrap = util.create_test_wrapper(outfile, build_dir, lib_dirs) log.info('Python unit tests for ' + pkg) try: util.check_call([wrap]) except Exception, e: failed = True print e
def cmd_report(args, remainder): cmd_pack('tmp') #shutil.rmtree('tmp') archive_dir = os.path.join(args.root, args.pipeline, args.build_id) cmd = ['bazelisk', 'run', '//ci:report'] cmd += ['--'] cmd += ['--pipeline', args.pipeline] cmd += ['--annotate'] cmd += [archive_dir] cmd += remainder util.check_call(cmd, stderr=subprocess.DEVNULL)
def cmd_report(args, remainder): workdir = pathlib.Path('tmp').resolve() make_all_wheels(workdir) download_test_artifacts('tmp/test/**/*') startup_args = ['--output_base={}'.format(output_base())] cmd = ['bazelisk'] + startup_args + ['run', '//ci:report'] cmd += ['--'] cmd += ['--pipeline', args.pipeline] cmd += ['--annotate'] cmd += [str(workdir)] cmd += remainder util.check_call(cmd)
def cmd_build(args, remainder): util.printf('--- :snake: pre-build steps... ') util.printf('bazel shutdown...') util.check_output(['bazelisk', 'shutdown']) util.printf('delete any old whl files...') wheel_clean('plaidml') wheel_clean('plaidbench') wheel_clean('plaidml/keras') common_args = [] common_args += ['--config={}'.format(args.variant)] common_args += ['--define=version={}'.format(args.version)] common_args += ['--explain={}'.format(os.path.abspath('explain.log'))] common_args += ['--verbose_failures'] common_args += ['--verbose_explanations'] util.printf('--- :bazel: Running Build ...') if platform.system() == 'Windows': util.check_call(['git', 'config', 'core.symlinks', 'true']) cenv = util.CondaEnv(pathlib.Path('.cenv')) cenv.create('environment-windows.yml') env = os.environ.copy() env.update(cenv.env()) else: env = None util.check_call(['bazelisk', 'test', '...'] + common_args, env=env, stderr=subprocess.DEVNULL) archive_dir = os.path.join( args.root, args.pipeline, args.build_id, 'build', args.variant, ) util.printf('--- :buildkite: Uploading artifacts...') pw = wheel_path('plaidml').resolve() pbw = wheel_path('plaidbench').resolve() pkw = wheel_path('plaidml/keras').resolve() util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'], cwd=pw) util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'], cwd=pbw) util.check_call(['buildkite-agent', 'artifact', 'upload', '*.whl'], cwd=pkw) os.makedirs(archive_dir, exist_ok=True) shutil.copy(os.path.join('bazel-bin', 'pkg.tar.gz'), archive_dir) util.printf('bazel shutdown...') util.check_output(['bazelisk', 'shutdown'])
def run_af4( sample_name: str, cached_file_pairs: List[util.FASTQPair], cosmic_fusion_path: str, args: Any, ): ref_path = "s3://grail-publications/resources/gencode.v26.whole_genes.fa" util.s3_cache_files([ref_path, cosmic_fusion_path], args.cache_dir) cached_r1 = ",".join([ args.cache_dir + "/" + os.path.basename(fp.r1) for fp in cached_file_pairs ]) cached_r2 = ",".join([ args.cache_dir + "/" + os.path.basename(fp.r2) for fp in cached_file_pairs ]) for mode in ["denovo", "targeted"]: result_dir = args.result_dir + "/" + os.path.basename(sample_name + "-" + mode) if os.path.exists(result_dir + "/filtered.fa"): logging.info("Skipping benchmark: %s", result_dir) continue logging.info("Start af4 benchmark: %s", result_dir) try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) af4_args = [ str(util.af4_path()), f"-log_dir={result_dir}", f"-pprof=:12345", f"-mutex-profile-rate=1000", f"-block-profile-rate=1000", f"-r1={cached_r1}", f"-r2={cached_r2}", f"-max-genes-per-kmer=2", f"-max-proximity-distance=1000", f"-max-proximity-genes=5", f"-fasta-output={result_dir}/all.fa", f"-filtered-output={result_dir}/filtered.fa", f"-transcript={args.cache_dir}/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa", ] if mode == "targeted": af4_args.append( f"-cosmic-fusion={args.cache_dir}/all_pair_art_lod_gpair_merged.txt" ) util.check_call(af4_args) logging.info("Finished benchmark: %s", result_dir) logging.info("Runtime stats: %s", util.run_stats(Path(result_dir)))
def fetch_all(): for g in Genome.all.values(): remove_safely(g.filename) accession_fas = [] for f in (g.versioned_accession_ids): af = Genome.fetch_versioned_accession_id(f) accession_fas.append(af) accession_fastas = " ".join(accession_fas) command = f"cat {accession_fastas} > {g.filename}" check_call(command) assert os.path.isfile(g.filename), f"Failed to download genome {g.filename}" command = f"grep -v '^>' {g.filename} | tr -d '\n' | wc > {g.key}.size" check_call(command) with open(f"{g.key}.size") as f: line = f.readline().rstrip() g.size = int(line.split()[2]) print(f"Genome {g.key} size {g.size} bases.")
def run_af4(sample_name: str, cached_file_pairs: List[util.FASTQPair], args: Any): ref_path = "s3://grail-publications/resources/gencode.v26.whole_genes.fa" cosmic_fusion_path = "s3://grail-publications/resources/all_pair_art_lod_gpair_merged.txt" util.s3_cache_files([ref_path, cosmic_fusion_path], args.cache_dir) cached_r1 = ",".join([ args.cache_dir + '/' + os.path.basename(fp.r1) for fp in cached_file_pairs ]) cached_r2 = ",".join([ args.cache_dir + '/' + os.path.basename(fp.r2) for fp in cached_file_pairs ]) for mode in ['denovo', 'targeted']: result_dir = args.result_dir + '/' + os.path.basename(sample_name + '-' + mode) if os.path.exists(result_dir + "/filtered.fa"): logging.info('Skipping benchmark: %s', result_dir) continue logging.info('Start af4 benchmark: %s', result_dir) try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) af4_args = [ str(util.af4_path()), f'-log_dir={result_dir}', f'-pprof=:12345', f'-mutex-profile-rate=1000', f'-block-profile-rate=1000', f'-umi-in-read', f'-r1={cached_r1}', f'-r2={cached_r2}', f'-fasta-output={result_dir}/all.fa', f'-filtered-output={result_dir}/filtered.fa', f'-transcript={args.cache_dir}/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa' ] if mode == 'targeted': af4_args.append( f'-cosmic-fusion={args.cache_dir}/all_pair_art_lod_gpair_merged.txt' ) util.check_call(af4_args) logging.info('Finished benchmark: %s', result_dir) logging.info("Runtime stats: %s", util.run_stats(Path(result_dir)))
def cmd_pack(arg): pathlib.Path(arg).mkdir(parents=True, exist_ok=True) dir_list = ['windows_x86_64', 'macos_x86_64', 'linux_x86_64', 'common'] whl_type = ['win_amd64', 'macosx', 'manylinux', 'none-any'] os.chdir(arg) print('downloading wheels...') util.check_output( ['buildkite-agent', 'artifact', 'download', '*.whl', '.'], cwd='.') whl_files = list(pathlib.Path('.').glob('*.whl')) [pathlib.Path(d).mkdir(parents=True, exist_ok=True) for d in dir_list] for whl, dest in zip(whl_type, dir_list): for f in whl_files: if whl in str(f): shutil.move(str(f), dest) print('packing all_wheels...') make_tarfile('all_wheels.tar.gz', '.') util.check_call( ['buildkite-agent', 'artifact', 'upload', 'all_wheels.tar.gz']) os.chdir('..')
def main() -> None: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s:%(levelname)s: %(message)s") p = argparse.ArgumentParser() p.add_argument('--cache_dir', default=util.DEFAULT_CACHE_DIR, help='Benchmark cache dir') p.add_argument('--result_dir', default=util.DEFAULT_RESULT_DIR, help='Benchmark result dir') p.add_argument( '--rerun_af4', action='store_true', help='Always run AF4 even if the result file already exists') p.add_argument( '--recache_files', action='store_true', help= 'Always copy benchmark data files, even if they already exist locally.' ) args = p.parse_args() util.s3_cache_files([ util.REFERENCE_DIR + '/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa', util.REFERENCE_DIR + '/all_pair_art_lod_gpair_merged.txt', util.REFERENCE_DIR + '/liu_gpair.txt' ], args.cache_dir) for mode in ['denovo', 'targeted']: for sample in util.SIMULATED_SAMPLES: util.s3_cache_files([sample.path.r1, sample.path.r2], args.cache_dir) result_dir = f'{args.result_dir}/synthetic-{mode}-{sample.n}-{sample.coverage}' try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) if not os.path.exists( f'{result_dir}/filtered.fa') or args.rerun_af4: logging.info('running benchmark in %s', result_dir) af4_args = [ str(util.af4_path()), f'-log_dir={result_dir}', f'-r1={args.cache_dir}/{sample.path.r1}', f'-r2={args.cache_dir}/{sample.path.r2}', f'-fasta-output={result_dir}/all.fa', f'-filtered-output={result_dir}/filtered.fa', '-transcript=' + args.cache_dir + '/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa' ] if mode == 'targeted': af4_args.append('-cosmic-fusion=' + args.cache_dir + '/all_pair_art_lod_gpair_merged.txt') util.check_call(af4_args) logging.info("Runtime stats: %s", util.run_stats(Path(result_dir))) stats = TargetedFusionStats( Path(f'{args.cache_dir}/liu_gpair.txt'), Path(f'{result_dir}/filtered.fa')) s = stats.stats() tp = "%d" % (s.tp, ) fp = "%d" % (s.fp, ) fn = "%d" % (s.fn, ) print( f'{mode} & {sample.n} & {sample.coverage} & {tp} & {fp} & {fn}\\\\' )
def concatenate_fasta(genomes, genomes_file): genome_fastas = " ".join(g.filename for g in genomes) command = f"cat {genome_fastas} > {genomes_file}" check_call(command)
def run_starfusion(sample_name: str, cached_file_pairs: List[util.FASTQPair], args: Any): match = re.match(r'.*/([^/]+)\.FULL\.tar\.gz$', args.starfusion_targz) assert match local_starfusion_dir = match[1] logging.info("LOCAL: %s", local_starfusion_dir) if not os.path.exists( os.path.join(args.starfusion_data_dir, local_starfusion_dir)): util.check_call([ 'tar', 'xzf', args.starfusion_targz, '-C', args.starfusion_data_dir ]) util.check_call([ 'make', '-C', os.path.join(args.starfusion_data_dir, local_starfusion_dir) ]) match = re.match(r'.*/([^/]+)\.tar\.gz$', args.starfusion_plug_n_play_targz) assert match local_plugnplay_dir = match[1] if not os.path.exists(args.starfusion_data_dir + local_plugnplay_dir): util.check_call([ 'tar', 'xzf', args.starfusion_plug_n_play_targz, '-C', args.starfusion_data_dir ]) result_dir = args.result_dir + '/' + os.path.basename(sample_name + '-starfusion') logging.info('Start starfusion benchmark: %s', result_dir) try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) cached_r1 = ",".join([ args.cache_dir + '/' + os.path.basename(fp.r1) for fp in cached_file_pairs ]) cached_r2 = ",".join([ args.cache_dir + '/' + os.path.basename(fp.r2) for fp in cached_file_pairs ]) starfusion_args = ['docker', 'run'] mounted: Set[str] = set() for dir in [args.starfusion_data_dir, args.result_dir, args.cache_dir]: if dir not in mounted: mounted.add(dir) starfusion_args += ['-v', f'{dir}:{dir}'] starfusion_args += [ '--rm', 'trinityctat/ctatfusion', os.path.join(args.starfusion_data_dir, local_starfusion_dir, '/STAR-Fusion'), '--left_fq', cached_r1, '--right_fq', cached_r2, '--CPU', '56', '--genome_lib_dir', os.path.join(args.starfusion_data_dir, local_plugnplay_dir, '/ctat_genome_lib_build_dir'), '-O', result_dir, '--FusionInspector', 'validate' ] try: util.check_call(starfusion_args) except Exception as e: logging.error("Starfusion failed (ignoring): %s", e) logging.info('Finished starfusion benchmark: %s', result_dir)
def main() -> None: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s:%(levelname)s: %(message)s") p = argparse.ArgumentParser() p.add_argument("--cache_dir", default=util.DEFAULT_CACHE_DIR, help="Benchmark cache dir") p.add_argument("--result_dir", default=util.DEFAULT_RESULT_DIR, help="Benchmark result dir") p.add_argument( "--starfusion_data_dir", default="/scratch-nvme/starfusion", help="Directory for expanding starfusion plug-n-play files", ) p.add_argument( "--run", action="append", choices=["af4", "starfusion"], help="List of systems to run. If unset, run all the configured systems", ) p.add_argument( "--starfusion_plug_n_play_targz", default=os.environ["HOME"] + "/GRCh38_v27_CTAT_lib_Feb092018.plug-n-play.tar.gz", help= "Tar.gz file of starfusion plug-n-play file. https://github.com/STAR-Fusion/STAR-Fusion/wiki#data-resources-required", ) p.add_argument( "--starfusion_targz", default=os.environ["HOME"] + "/STAR-Fusion-v1.5.0.FULL.tar.gz", help= "Tar.gz file of starfusion source package. https://github.com/STAR-Fusion/STAR-Fusion/wiki#data-resources-required", ) p.add_argument( "--brca_data_dir", default="/scratch-nvme/xyang/brca_rnaseq_data", help="BT474, KPL4, MCF7, SKBR3 Breast cancer data directory", ) args = p.parse_args() if not args.run: args.run = ["af4", "starfusion"] ## brca rna-seq for af4 brca_samples = [ os.path.join(args.brca_data_dir, s) for s in ["BT474", "KPL4", "MCF7", "SKBR3"] ] for s in brca_samples: if not os.path.exists(os.path.join(args.brca_data_dir, s)): util.check_call([ "download_brca_data.py", "--odir", "/scratch-nvme/xyang/brca_rnaseq_data", ]) cosmic_fusion_path = ( "s3://grail-publications/2019-ISMB/references/all_art_lod_brca.txt") for sample in brca_samples: r1s: List[str] = [] for fq in os.listdir(sample): if "_1" in fq: r1s.append(os.path.join(sample, fq)) cached_file_pairs: List[util.FASTQPair] = [] for r1 in r1s: assert os.path.exists(r1.replace("_1", "_2")) cached_file_pairs.append( util.FASTQPair(r1=r1, r2=r1.replace("_1", "_2"))) print(os.path.basename(sample)) print(cached_file_pairs) run_af4(os.path.basename(sample), cached_file_pairs, cosmic_fusion_path, args) ## cfrna for af4 and starfusion cosmic_fusion_path = ( "s3://grail-publications/2019-ISMB/references/all_pair_art_lod_gpair_merged.txt" ) for sample in util.RNA_SAMPLES: fastq_files: List[str] = [] cached_file_pairs: List[util.FASTQPair] = [] for fp in sample.paths: assert fp.r1.replace("R1", "R2") == fp.r2, fp.r2 fastq_files += [fp.r1, fp.r2] cached_file_pairs.append( util.FASTQPair( r1=args.cache_dir + "/" + os.path.basename(fp.r1), r2=args.cache_dir + "/" + os.path.basename(fp.r2), )) util.s3_cache_files(fastq_files, args.cache_dir) if "af4" in args.run: run_af4(sample.name, cached_file_pairs, cosmic_fusion_path, args) if "starfusion" in args.run: run_starfusion(sample.name, cached_file_pairs, args)
def run_starfusion(sample_name: str, cached_file_pairs: List[util.FASTQPair], args: Any): match = re.match(r".*/([^/]+)\.FULL\.tar\.gz$", args.starfusion_targz) assert match local_starfusion_dir = match[1] logging.info("LOCAL: %s", local_starfusion_dir) if not os.path.exists( os.path.join(args.starfusion_data_dir, local_starfusion_dir)): util.check_call([ "tar", "xzf", args.starfusion_targz, "-C", args.starfusion_data_dir ]) util.check_call([ "make", "-C", os.path.join(args.starfusion_data_dir, local_starfusion_dir) ]) match = re.match(r".*/([^/]+)\.tar\.gz$", args.starfusion_plug_n_play_targz) assert match local_plugnplay_dir = match[1] if not os.path.exists(args.starfusion_data_dir + local_plugnplay_dir): util.check_call([ "tar", "xzf", args.starfusion_plug_n_play_targz, "-C", args.starfusion_data_dir, ]) result_dir = args.result_dir + "/" + os.path.basename(sample_name + "-starfusion") logging.info("Start starfusion benchmark: %s", result_dir) try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) cached_r1 = ",".join([ args.cache_dir + "/" + os.path.basename(fp.r1) for fp in cached_file_pairs ]) cached_r2 = ",".join([ args.cache_dir + "/" + os.path.basename(fp.r2) for fp in cached_file_pairs ]) starfusion_args = ["docker", "run"] mounted: Set[str] = set() for dir in [args.starfusion_data_dir, args.result_dir, args.cache_dir]: if dir not in mounted: mounted.add(dir) starfusion_args += ["-v", f"{dir}:{dir}"] starfusion_args += [ "--rm", "trinityctat/ctatfusion", os.path.join(args.starfusion_data_dir, local_starfusion_dir, "STAR-Fusion"), "--left_fq", cached_r1, "--right_fq", cached_r2, "--CPU", "56", "--genome_lib_dir", os.path.join(args.starfusion_data_dir, local_plugnplay_dir, "ctat_genome_lib_build_dir"), "-O", result_dir, "--FusionInspector", "validate", ] try: util.check_call(starfusion_args) except Exception as e: logging.error("Starfusion failed (ignoring): %s", e) logging.info("Finished starfusion benchmark: %s", result_dir)
def buildkite_upload(pattern, **kwargs): util.check_call(['buildkite-agent', 'artifact', 'upload', pattern], **kwargs)
unit.include_dirs.append(env['CUNIT_INCLUDE_DIR']) unit.libraries.append(env['CUNIT_LIBRARIES']) unit.library_dirs.append(env['CUNIT_LIB_DIR']) self.distribution.native_executables.append(unit) ## build w/ distutils thru backdoor cmd_obj = self.get_command_obj('build_exe') cmd_obj.ensure_finalized() cmd_obj.run() self.distribution.native_executables = orig_exes for pkg, units in self._get_c_tests(): log.info('C unit tests for ' + pkg) for unit in units: try: util.check_call([os.path.join(lib_dir, unit.name)]) except Exception, e: failed = True print e ## C++ if self._has_cpp_tests(): sys.std_err.write("C++ unit testing is untested!") #FIXME from configure import cppunit env = dict() if not cppunit.is_installed(env, None): cppunit.install(env, None) orig_exes = self.distribution.native_executables
def buildkite_download(pattern, destination, **kwargs): util.check_call( ['buildkite-agent', 'artifact', 'download', pattern, destination], **kwargs)
def cmd_build(args, remainder): with open('ci/plan.yml') as file_: plan = yaml.safe_load(file_) env = os.environ.copy() variant = plan['VARIANTS'][args.variant] for key, value in variant.get('env', {}).items(): env[key] = str(value) build_root = variant.get('build_root', 'build-x86_64') build_type = variant.get('build_type', 'Release') check = variant.get('check', 'smoke') system = variant.get('system', 'Linux') temp_dir = Path('/tmp') / os.getenv('BUILDKITE_AGENT_NAME') build_dir = Path(build_root) / build_type logs_dir = Path('logs').resolve() logs_dir.mkdir(parents=True, exist_ok=True) util.printf('--- :building_construction: configure') configure_log = logs_dir / 'configure.log' with configure_log.open('wb') as fp: util.check_call([ 'python', 'configure', '--ci', f'--temp={temp_dir}', f'--type={build_type}' ], env=env, stdout=fp, stderr=subprocess.STDOUT) util.printf('--- :hammer_and_wrench: ninja') util.check_call(['ninja', '-C', build_dir], env=env) util.printf('--- :hammer_and_wrench: ninja package') util.check_call(['ninja', '-C', build_dir, 'package'], env=env) util.printf(f'--- :hammer_and_wrench: ninja check-{check}') check_log = logs_dir / f'check-{check}.log' with check_log.open('wb') as fp: util.check_call(['ninja', '-C', build_dir, f'check-{check}'], env=env, stdout=fp, stderr=subprocess.STDOUT) util.printf('--- Test devkit') devkit_dir = build_dir / '_CPack_Packages' / system / 'TGZ' / f'PlaidML-1.0.0-{system}' / 'devkit' devkit_build_dir = devkit_dir / 'build' cmd = ['cmake'] cmd += ['-S', devkit_dir] cmd += ['-B', devkit_build_dir] cmd += ['-G', 'Ninja'] util.check_call(cmd, env=env) util.check_call(['ninja', '-C', devkit_build_dir], env=env) util.check_call([devkit_build_dir / 'edsl_test'], env=env) if 'dbg' not in args.variant: util.buildkite_upload(build_dir / '*.whl') util.buildkite_upload(build_dir / '*.tar.gz')
def main() -> None: logging.basicConfig(level=logging.DEBUG, format="%(asctime)s:%(levelname)s: %(message)s") p = argparse.ArgumentParser() p.add_argument("--cache_dir", default=util.DEFAULT_CACHE_DIR, help="Benchmark cache dir") p.add_argument("--result_dir", default=util.DEFAULT_RESULT_DIR, help="Benchmark result dir") p.add_argument( "--rerun_af4", action="store_true", help="Always run AF4 even if the result file already exists", ) p.add_argument( "--recache_files", action="store_true", help= "Always copy benchmark data files, even if they already exist locally.", ) args = p.parse_args() util.s3_cache_files( [ util.REFERENCE_DIR + "/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa", util.REFERENCE_DIR + "/all_pair_art_lod_gpair_merged.txt", util.REFERENCE_DIR + "/liu_gpair.txt", ], args.cache_dir, ) for mode in ["denovo", "targeted"]: for sample in util.SIMULATED_SAMPLES: util.s3_cache_files([sample.path.r1, sample.path.r2], args.cache_dir) result_dir = ( f"{args.result_dir}/synthetic-{mode}-{sample.n}-{sample.coverage}" ) try: os.makedirs(result_dir, 0o755) except: logging.error("mkdir %s failed", result_dir) if not os.path.exists( f"{result_dir}/filtered.fa") or args.rerun_af4: logging.info("running benchmark in %s", result_dir) af4_args = [ str(util.af4_path()), f"-log_dir={result_dir}", f"-r1={args.cache_dir}/{sample.path.r1}", f"-r2={args.cache_dir}/{sample.path.r2}", f"-fasta-output={result_dir}/all.fa", f"-filtered-output={result_dir}/filtered.fa", f"-max-genes-per-kmer=2", f"-max-proximity-distance=1000", f"-max-proximity-genes=5", "-transcript=" + args.cache_dir + "/gencode.v26.250padded_separate_jns_transcripts_parsed_no_mt_no_overlap_no_pary_no_versioned.fa", ] if mode == "targeted": af4_args.append("-cosmic-fusion=" + args.cache_dir + "/all_pair_art_lod_gpair_merged.txt") util.check_call(af4_args) logging.info("Runtime stats: %s", util.run_stats(Path(result_dir))) stats = TargetedFusionStats( Path(f"{args.cache_dir}/liu_gpair.txt"), Path(f"{result_dir}/filtered.fa"), ) s = stats.stats() tp = "%d" % (s.tp, ) fp = "%d" % (s.fp, ) fn = "%d" % (s.fn, ) print( f"{mode} & {sample.n} & {sample.coverage} & {tp} & {fp} & {fn}\\\\" )