def config_logbook_pp(log_dir: str, infile_name: str, log_to_file: bool=True, file_level: str='INFO', log_type: str="pp", stream_level: str='DEBUG') -> None: """ Configure logging via logbook for assorted preprocessing scripts. """ logbook.set_datetime_format("local") logname = "{}_{}.log".format(infile_name, log_type) log_path = norman_path(log_dir, logname) fileutils.mkdir_p(log_dir) if log_to_file: file_handler = TimedRotatingFileHandler(log_path, level=file_level, backup_count=7) else: file_handler = NullHandler() stream_handler = StreamHandler(stream=stdout, level=stream_level, bubble=True) with file_handler, stream_handler: yield
def get_tmp_path(arg): if arg.startswith(get_generated_path()): mkdir_p(get_generated_path('tmp')) tmp_basename = arg[len(get_generated_path()):].lstrip(os.path.sep).replace(os.path.sep, '-') return get_generated_path('tmp', tmp_basename) elif arg.startswith(os.path.sep): return arg + '.tmp' else: mkdir_p(get_generated_path('tmp')) return get_generated_path('tmp', arg)
def save_traffic_stats(lang, project, query_date, limit=DEFAULT_LIMIT): '''\ 1. Get articles 2. Add images and summaries 3. Prepare and save results ''' articles = make_article_list(query_date, lang=lang, project=project) total_traffic = get_project_traffic(query_date, lang, project) articles = articles[:limit] articles = add_extras(articles, lang=lang, project=project) ret = {'articles': articles, 'formatted_date': format_date(query_date, format='d MMMM yyyy', locale=lang), 'date': {'day': query_date.day, 'month': query_date.month, 'year': query_date.year}, 'lang': lang, 'full_lang': LOCAL_LANG_MAP[lang], 'total_traffic': total_traffic, 'total_traffic_short': shorten_number(total_traffic), 'examples': [articles[0], articles[1], articles[2], articles[query_date.day * 2]], # haha ok.. 'project': project.capitalize(), 'permalink': DATE_PERMALINK_TMPL.format(lang=lang, project=project, year=query_date.year, month=query_date.month, day=query_date.day), 'meta': {'fetched': datetime.utcnow().isoformat()}} outfile_name = DATA_PATH_TMPL.format(lang=lang, project=project, year=query_date.year, month=query_date.month, day=query_date.day) with tlog.critical('saving_single_day_stats') as rec: rec['out_file'] = os.path.abspath(outfile_name) try: out_file = codecs.open(outfile_name, 'w') except IOError: mkdir_p(os.path.dirname(outfile_name)) out_file = codecs.open(outfile_name, 'w') with out_file: data_bytes = json.dumps(ret, indent=2, sort_keys=True) rec['len_bytes'] = len(data_bytes) out_file.write(data_bytes) rec.success('wrote {len_bytes} bytes to {out_file}') return
def move_file(self, old_filename, new_filename): self.tree_modifier.move(old_filename, new_filename) if not self.repo.is_bare and self.update_working_copy: real_old_filename = os.path.join(self.path, old_filename) real_new_filename = os.path.join(self.path, new_filename) mkdir_p(os.path.dirname(real_new_filename)) os.rename(real_old_filename, real_new_filename) remove_file_with_empty_parents(self.path, old_filename) self.messages.append(' R {} -> {}'.format(old_filename, new_filename))
def save_rendered(outfile_name, template_name, context): global ASHES_ENV # retain laziness if not ASHES_ENV: ASHES_ENV = ashes.AshesEnv([TEMPLATE_PATH], keep_whitespace=True) rendered = ASHES_ENV.render(template_name, context) try: out_file = codecs.open(outfile_name, 'w', 'utf-8') except IOError: mkdir_p(dirname(outfile_name)) out_file = codecs.open(outfile_name, 'w', 'utf-8') with out_file: out_file.write(rendered) print 'successfully generated %s' % outfile_name
def _create_dir(directory_path): """Create a directory for us. Args: directory_path (:obj:`str`): The path to the directory to create. Returns: int: Status code of success or failure. Anything except 0 is a failure. """ try: fileutils.mkdir_p(directory_path) except OSError: LOGGER.exception('Could not create directory: %s', directory_path, exc_info=True) return 1 return 0
def _configure_cache(): conf.set_default_value('cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache'))) if conf.cache is False: return if conf.has_own_property('cache'): conf.cache = os.path.abspath(os.path.join(conf.data_dir, os.path.expanduser(conf.cache))) if not os.path.isdir(conf.cache): try: mkdir_p(conf.cache) except PermissionError: print("Warning: caching is disabled because the directory {!r} can't be created.\n".format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py.") conf.cache = False return if not os.access(conf.cache, os.R_OK): print('Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'.format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py.") conf.cache = False
def write_file(self, filename, content): # TODO: combine writing many files assert isinstance(content, text_type) data = content.encode('utf-8') existing_entry = get_tree_entry(self.repo, self.working_tree, filename) if existing_entry: type = 'M' if existing_entry.id == git_hash(data): return else: type = 'A' blob_id = self.repo.create_blob(data) self.insert_into_working_tree(blob_id, filename) if not self.repo.is_bare and self.update_working_copy: real_filename = os.path.join(self.path, filename) mkdir_p(os.path.dirname(real_filename)) with codecs.open(real_filename, 'w', encoding='utf-8') as outfile: outfile.write(content) self.messages.append(' {} {}'.format(type, filename))
def export_entry(entry): entry_custom_base_path = os.path.split(entry.entry_root)[0] if entry_custom_base_path: mkdir_p(pjoin(output_path, entry_custom_base_path)) er = entry.entry_root entry_html_fn = er + EXPORT_HTML_EXT entry_gen_md_fn = er + '.gen.md' entry_data_fn = er + '.json' html_output_path = pjoin(output_path, entry_html_fn) data_output_path = pjoin(output_path, entry_data_fn) gen_md_output_path = pjoin(output_path, entry_gen_md_fn) #fal.write(html_output_path, entry.entry_html) # fal.write(html_output_path, entry.entry_html) fal.write(gen_md_output_path, entry.content_md) # TODO _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True) fal.write(data_output_path, _data) # TODO: copy file # fal.write(src_output_path, entry.source_text) return
def run(argv): if not os.path.exists(clean_file): print('dbsnp will be stored at {clean_file!r}'.format( clean_file=clean_file)) mkdir_p(dbsnp_dir) if not os.path.exists(raw_file): print('Downloading dbsnp!') wget = utils.get_path('wget') dbsnp_url = 'ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/VCF/All_20160601.vcf.gz' #dbsnp_url= 'ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/database/organism_data/b147_SNPChrPosOnRef_105.bcp.gz' utils.run_cmd([wget, '-O', raw_tmpfile, dbsnp_url]) os.rename(raw_tmpfile, raw_file) utils.run_script(r''' gzip -cd '{raw_file}' | grep -v '^#' | perl -F'\t' -nale 'print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]"' | # Gotta declare that it's tab-delimited, else it's '\s'-delimited I think. gzip > '{clean_tmpfile}' '''.format(raw_file=raw_file, clean_tmpfile=clean_tmpfile)) os.rename(clean_tmpfile, clean_file) else: print("dbsnp is at '{clean_file}'".format(clean_file=clean_file))
def main(): global DR_LOG_FILE global FRRP_LOG_FILE global GENERAL_LOG_FILE global MUX global PREFIX global EXP_ID global producer global CONST_WAIT_TIME mkdir_p('graph_results') mkdir_p('general_logs') mkdir_p('dr_logs') mkdir_p('frrp_logs') if len(sys.argv) < 5: print( 'usage: long_path.py <EXP_ID> <ATLAS_AS_FILE> <PREFIX> <WAIT_TIME> <MUX>' ) sys.exit(1) producer = KafkaProducer( value_serializer=lambda m: json.dumps(m).encode('ascii')) atexit.register(end_collector_call) EXP_ID = str(sys.argv[1]) MUX = str(sys.argv[5]) PREFIX = str(sys.argv[3]) CONST_WAIT_TIME = int(sys.argv[4]) source_as_file = str(sys.argv[2]) exp_start = arrow.utcnow().isoformat() GENERAL_LOG_FILE = "general_logs/general_log_{}_{}_{}.txt".format( EXP_ID, MUX, exp_start) DR_LOG_FILE = "dr_logs/default_route_log_{}_{}_{}.txt".format( EXP_ID, MUX, exp_start) FRRP_LOG_FILE = "frrp_logs/frrp_log_{}_{}_{}.txt".format( EXP_ID, MUX, exp_start) run_measurements(producer, source_as_file, exp_start)
def before_container_hook(self, app): if is_quast(app): fu.mkdir_p(fs.get_task_dir_path(app, 'tmp/assembly_metrics'))
def make_basedir(path): mkdir_p(os.path.dirname(path))
def _find_and_submit(tasks: List[Task], work_folder: str, concurrent_jobs: int, submit_limit: int, submitter: SyncSubmission): submitter.warm_cache(tasks) submitted = 0 # To maintain concurrent_jobs limit, we set a pbs dependency on previous jobs. # mapping of concurrent slot number to the last job id to be submitted in it. last_job_slots = {} # type: Dict[int, str] for task in tasks: if submitted == submit_limit: click.echo("Submit limit ({}) reached, done.".format(submit_limit)) break require_job_id = last_job_slots.get(submitted % concurrent_jobs) run_path = task.resolve_path(work_folder).joinpath('{:03d}'.format(submitted)) if run_path.exists(): raise RuntimeError("Calculated job folder should be unique? Got %r" % (run_path,)) fileutils.mkdir_p(run_path) job_id, command = submitter.submit( task=task, output_file=(run_path.joinpath('out.log')), error_file=run_path.joinpath('err.log'), job_name='{}-{:02}'.format(task.collection.name, submitted), require_job_id=require_job_id, ) if job_id: # Not used by the job, but useful for our reference, and potentially by future monitoring. run_path.joinpath('submission-info.yaml').write_text( yaml.safe_dump( { 'pbs_command': ' '.join(shlex.quote(arg) for arg in command), 'pbs_job_id': job_id, 'input_paths': [str(p) for p in task.input_paths], 'file_dataset_count': task.dataset_count, 'collection_name': task.collection.name }, default_flow_style=False, indent=4 ) ) last_job_slots[submitted % concurrent_jobs] = job_id submitted += 1 click.echo( "{prefix}: submitted {job_id} with {dataset_count} datasets using directory {run_path}".format( prefix=style( "[{:02d} {}]".format(submitted, task.collection.name), fg='blue', bold=True ), job_id=style(job_id, bold=True), dataset_count=style(str(task.dataset_count), bold=True), run_path=style(str(run_path), bold=True) ) ) time.sleep(SUBMIT_THROTTLE_SECS)
def main(random_seed, test_on_gt, only_test, overfit, fusion, weighted_aggregation): random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed_all(random_seed) n_epochs = 3 lr = 1e-2 wd = 0 lr_scheduler = True # graph settings h_dim = 128 x_dim = 128 c_dim = 90 phi_dim = 2048 max_steps = 3 train_db = JointCocoTasks() initializer = InitializerMul(h_dim=h_dim, phi_dim=phi_dim, c_dim=c_dim) if weighted_aggregation: aggregator = AllLinearAggregatorWeightedWithDetScore( in_features=h_dim, out_features=x_dim) else: aggregator = AllLinearAggregator(in_features=h_dim, out_features=x_dim) output_model = OutputModelFirstLast(h_dim=h_dim, num_tasks=len(TASK_NUMBERS)) network = GGNNDiscLoss( initializer=initializer, aggregator=aggregator, output_model=output_model, max_steps=max_steps, h_dim=h_dim, x_dim=x_dim, class_dim=c_dim, fusion=fusion, ) optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd) experiment = JointGraphExperiment( network=network, optimizer=optimizer, dataset=train_db, tensorboard=True, seed=random_seed, ) train_folder = "ggnn-full-seed:{s}".format(s=random_seed) folder = os.path.join(SAVING_DIRECTORY, train_folder) mkdir_p(folder) if not only_test: experiment.train_n_epochs(n_epochs, overfit=overfit, lr_scheduler=lr_scheduler) torch.save(network.state_dict(), os.path.join(folder, "model.mdl")) else: network.load_state_dict(torch.load(os.path.join(folder, "model.mdl"))) for task_number in TASK_NUMBERS: if test_on_gt: test_db = CocoTasksTestGT(task_number) else: test_db = CocoTasksTest(task_number) print("testing task {}".format(task_number), "---------------------") # test_model detections = experiment.do_test(test_db, task_number=task_number) detection_file_name = "detections_wa:{}_tn:{}_tgt:{}_f:{}.json".format( weighted_aggregation, task_number, test_on_gt, fusion) # save detections with open(os.path.join(folder, detection_file_name), "w") as f: json.dump(detections, f) # perform evaluation with redirect_stdout(open(os.devnull, "w")): gtCOCO = test_db.task_coco dtCOCO = gtCOCO.loadRes(os.path.join(folder, detection_file_name)) cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox") cocoEval.params.catIds = 1 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("mAP:\t\t %1.6f" % cocoEval.stats[0]) print("[email protected]:\t\t %1.6f" % cocoEval.stats[1]) # save evaluation performance result_file_name = "result_wa:{}_tn:{}_tgt:{}_f:{}.txt".format( weighted_aggregation, task_number, test_on_gt, fusion) with open(os.path.join(folder, result_file_name), "w") as f: f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
def main(random_seed, test_on_gt, only_test, overfit): random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed_all(random_seed) n_epochs = 3 lr = 1e-2 wd = 0 lr_scheduler = True train_db = JointCocoTasks() network = JointClassifier() optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd) experiment = JointClassifierExperiment( network=network, optimizer=optimizer, dataset=train_db, tensorboard=True, seed=random_seed, ) train_folder = "ablation-joint-classifier-seed:{s}".format(s=random_seed) folder = os.path.join(SAVING_DIRECTORY, train_folder) mkdir_p(folder) if not only_test: experiment.train_n_epochs(n_epochs, overfit=overfit, lr_scheduler=lr_scheduler) torch.save(network.state_dict(), os.path.join(folder, "model.mdl")) else: network.load_state_dict(torch.load(os.path.join(folder, "model.mdl"))) for task_number in TASK_NUMBERS: if test_on_gt: test_db = CocoTasksTestGT(task_number) else: test_db = CocoTasksTest(task_number) print("testing task {}".format(task_number), "---------------------") # test_model detections = experiment.do_test(test_db, task_number=task_number) detections_file_name = "detections_tn:{}_tgt:{}.json".format( task_number, test_on_gt) # save detections with open(os.path.join(folder, detections_file_name), "w") as f: json.dump(detections, f) # perform evaluation with redirect_stdout(open(os.devnull, "w")): gtCOCO = test_db.task_coco dtCOCO = gtCOCO.loadRes(os.path.join(folder, detections_file_name)) cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox") cocoEval.params.catIds = 1 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("mAP:\t\t %1.6f" % cocoEval.stats[0]) print("[email protected]:\t\t %1.6f" % cocoEval.stats[1]) # save evaluation performance result_file_name = "result_tn:{}_tgt:{}.txt".format( task_number, test_on_gt) with open(os.path.join(folder, result_file_name), "w") as f: f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
def pypier(args, reqs): ''' only supports pure-python repos for now ''' parser = argparse.ArgumentParser(prog='pypier') parser.add_argument('cmd', choices=('config', 'publish', 'pip-index') + PIP_CMDS) cmd = parser.parse_args(args[1:2]).cmd cache = reqs.cache executor = reqs.executor site_config = reqs.site_config pypier_repo = site_config['pypier']['repo'] pypier_repo_ro = site_config['pypier']['repo_ro'] if cmd == 'config': print print 'PyPIER repos:' print ' ', pypier_repo_ro, '(fetch)' print ' ', pypier_repo, '(publish)' elif cmd == 'publish': parser.add_argument('--dry-run', action='store_true') arg_vals = parser.parse_args(args[1:]) setup_dir = find_project_dir(os.getcwd(), 'setup.py') pypier_read_write = cache.workon_project_git('pypier', pypier_repo) executor.python('setup.py', 'sdist').redirect(cwd=setup_dir) # TODO manylinux wheels? OSX wheels? version = executor.python('setup.py', version=None).batch()[0].strip() output = [ fn for fn in os.listdir(setup_dir + '/dist/') if version in fn ] name = output[0].split('-', 1)[0] # typical artifact: foo-ver.tar.gz dst = pypier_read_write.path + '/packages/' + name + '/' fileutils.mkdir_p(dst) # TODO: instead of just looking for anything in the dist # directory, query setup for the version and check for that. for result in output: if os.path.exists(os.path.join(dst, result)): raise EnvironmentError( "{} has already been published".format(result)) for result in output: shutil.copy(setup_dir + '/dist/' + result, dst) with fileutils.atomic_save(os.path.join(dst, 'pkg_info.json')) as f: pkg_info = get_pkg_info(executor, setup_dir) pkg_info_json = json.dumps(pkg_info, indent=2, sort_keys=True) f.write(pkg_info_json + '\n') update_index(pypier_read_write.path) source_metadata = get_source_metadata(executor, setup_dir) commit_msg = 'PyPIER publish: {}\n\n{}\n'.format( ', '.join(output), json.dumps(source_metadata, indent=2, sort_keys=True)) pypier_read_write.push(commit_msg, dry_run=arg_vals.dry_run) elif cmd == 'pip-index': pypier_read_only = cache.pull_project_git('pypier', pypier_repo_ro) link_path = pypier_read_only + '/packages/index.html' print link_path # NOTE: this print command is the primary purpose elif cmd in PIP_CMDS: pypier_read_only = cache.pull_project_git('pypier', pypier_repo_ro) link_path = pypier_read_only + '/packages/index.html' #env = dict(os.environ) #env['PIP_FIND_LINKS'] = ' '.join( # [link_path] + env.get('PIP_FIND_LINKS', '').split()) # TODO: figure out clean way to extend env # TODO: remove ALL_PROXY='' once urllib3 + requests # do a release and don't pre-emptively die # on socks5h:// proxy executor.patch_env(PIP_FIND_LINKS=link_path, ALL_PROXY='').command(['python', '-m', 'pip'] + args[1:]).redirect( stdout=sys.stdout, stderr=sys.stderr) else: # argparse should catch this above raise ValueError('unrecognized sub-command %r' % cmd)
def export(self): fal = self.fal self._call_custom_hook('pre_export') output_path = self.paths['output_path'] with chlog.critical('create output path'): mkdir_p(output_path) def export_entry(entry): entry_custom_base_path = os.path.split(entry.entry_root)[0] if entry_custom_base_path: mkdir_p(pjoin(output_path, entry_custom_base_path)) er = entry.entry_root entry_html_fn = er + EXPORT_HTML_EXT entry_gen_md_fn = er + '.gen.md' entry_data_fn = er + '.json' html_output_path = pjoin(output_path, entry_html_fn) data_output_path = pjoin(output_path, entry_data_fn) gen_md_output_path = pjoin(output_path, entry_gen_md_fn) #fal.write(html_output_path, entry.entry_html) # fal.write(html_output_path, entry.entry_html) fal.write(gen_md_output_path, entry.content_md) # TODO _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True) fal.write(data_output_path, _data) # TODO: copy file # fal.write(src_output_path, entry.source_text) return for entry in self.entries: export_entry(entry) for entry in self.draft_entries: export_entry(entry) for entry in self.special_entries: export_entry(entry) # index is just the most recent entry for now index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT) if self.entries: index_content = self.entries[0].entry_html else: index_content = 'No entries yet!' fal.write(index_path, index_content) archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT)) fal.write(archive_path, self.entries.rendered_html) # output feeds rss_path = pjoin(output_path, RSS_FEED_FILENAME) fal.write(rss_path, self.entries.rendered_rss_feed) atom_path = pjoin(output_path, ATOM_FEED_FILENAME) fal.write(atom_path, self.entries.rendered_atom_feed) for tag, entry_list in self.tag_map.items(): tag_path = pjoin(output_path, entry_list.path_part) mkdir_p(tag_path) rss_path = pjoin(tag_path, RSS_FEED_FILENAME) atom_path = pjoin(tag_path, ATOM_FEED_FILENAME) archive_path = pjoin(tag_path, 'index.html') fal.write(rss_path, entry_list.rendered_rss_feed) fal.write(atom_path, entry_list.rendered_atom_feed) fal.write(archive_path, entry_list.rendered_html) # copy assets, i.e., all directories under the theme path for sdn in get_subdirectories(self.theme_path): cur_src = pjoin(self.theme_path, sdn) cur_dest = pjoin(output_path, sdn) with chlog.critical('copy assets', src=cur_src, dest=cur_dest): copytree(cur_src, cur_dest) # optionally symlink the uploads directory. this is an # important step for sites with uploads because Chert's # default rsync behavior picks up on these uploads by # following the symlink. with chlog.critical('link uploads directory') as rec: uploads_link_path = pjoin(output_path, 'uploads') if not os.path.isdir(self.uploads_path): rec.failure('no uploads directory at {}', self.uploads_path) else: message = None if os.path.islink(uploads_link_path): os.unlink(uploads_link_path) message = 'refreshed existing uploads symlink' os.symlink(self.uploads_path, uploads_link_path) rec.success(message) self._call_custom_hook('post_export')
def download(self, path=LOCAL_DATA): # mostly taken from https://github.com/streamlit/demo-face-gan/ # blob/master/streamlit_app.py root = Path(path).resolve() path = root / self.filename # Don't download the file twice. (If possible, verify the # download using the file length.) if os.path.exists(path): if not self.size or os.path.getsize(path) == self.size: return path mkdir_p(path.parent) # These are handles to two visual elements to animate. status, progress_bar = None, None try: status = st.warning("Downloading %s..." % path) # handle cases where files hosted on gdrive sometimes fail # to download if "google.com" in self.url: _ = gdown.cached_download(self.url, path=path) else: progress_bar = st.progress(0) # with open(path, "wb") as output_file: with urllib.request.urlopen( self.url, cafile=certifi.where()) as response: if response.info()["Content-Length"] is not None: with open(path, "wb") as output_file: length = int(response.info()["Content-Length"]) counter = 0.0 MEGABYTES = 2.0**20.0 while True: data = response.read(8192) if not data: break counter += len(data) output_file.write(data) # We perform animation by overwriting the elements. status.warning( "Downloading %s... (%6.2f/%6.2f MB)" % (path, counter / MEGABYTES, length / MEGABYTES)) progress_bar.progress( min(counter / length, 1.0)) except urllib.error.URLError as e: logger.exception(f"Invalid URL: {self.url}", exc_info=e) # Finally, we remove these visual elements by calling .empty(). finally: if status is not None: status.empty() if progress_bar is not None: progress_bar.empty() if not path.exists(): raise FileNotFoundError(str(path)) elif os.path.getsize(path) == 0: os.remove(path) raise ValueError(f"Invalid URL: {self.url}") return path
def _ensure_conf(): if hasattr(conf, 'data_dir'): conf.data_dir = os.path.abspath(conf.data_dir) else: conf.set_default_value('data_dir', os.path.abspath(os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir)) ## Get `conf.cache` working because it's needed for reporting errors def _configure_cache(): conf.set_default_value('cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache'))) if conf.cache is False: return if conf.has_own_property('cache'): conf.cache = os.path.abspath(os.path.join(conf.data_dir, os.path.expanduser(conf.cache))) if not os.path.isdir(conf.cache): try: mkdir_p(conf.cache) except PermissionError: print("Warning: caching is disabled because the directory {!r} can't be created.\n".format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py.") conf.cache = False return if not os.access(conf.cache, os.R_OK): print('Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'.format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py.") conf.cache = False _configure_cache() def _load_config_file(): _config_filepath = os.path.join(conf.data_dir, 'config.py') if os.path.isfile(_config_filepath): try: _conf_module = imp.load_source('config', _config_filepath) except Exception: raise utils.PheWebError("PheWeb tried to load your config.py at {!r} but it failed.".format(_config_filepath)) else: for key in dir(_conf_module): if not key.startswith('_'): conf[key] = getattr(_conf_module, key) _load_config_file() conf.set_default_value('lzjs_version', '0.9.0') # Global setting, rarely needs configuration: which version of LZjs to fetch from the CDN conf.set_default_value('custom_templates', lambda: os.path.join(conf.data_dir, 'custom_templates'), is_function=True) conf.set_default_value('debug', False) conf.set_default_value('limit_num_variants', False) conf.set_default_value('assoc_min_maf', 0) conf.set_default_value('variant_inclusion_maf', 0) conf.set_default_value('within_pheno_mask_around_peak', int(500e3)) conf.set_default_value('between_pheno_mask_around_peak', int(1e6)) conf.set_default_value('manhattan_num_unbinned', 500) conf.set_default_value('manhattan_peak_max_count', 500) conf.set_default_value('manhattan_peak_pval_threshold', 1e-6) conf.set_default_value('manhattan_peak_sprawl_dist', int(200e3)) conf.set_default_value('top_hits_pval_cutoff', 1e-6) conf.set_default_value('allow_variant_json_cors', False) conf.set_default_value('urlprefix', '') if 'minimum_maf' in conf: raise utils.PheWebError("minimum_maf has been deprecated. Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead") if conf.get('login', {}).get('whitelist', None): conf.login['whitelist'] = [addr.lower() for addr in conf.login['whitelist']] if not os.path.isdir(conf.data_dir): mkdir_p(conf.data_dir) if not os.access(conf.data_dir, os.R_OK): raise utils.PheWebError("Your data directory, {!r}, is not readable.".format(conf.data_dir)) ### Parsing def scientific_int(value): '''like int(value) but accepts "1.3e-4"''' try: return int(value) except ValueError: x = float(value) if x.is_integer(): return int(x) raise class Field: def __init__(self, d): self._d = d def parse(self, value): '''parse from input file''' # nullable if self._d['nullable'] and value in conf.parse.null_values: return '' # type x = self._d['type'](value) # range if 'range' in self._d: assert self._d['range'][0] is None or x >= self._d['range'][0] assert self._d['range'][1] is None or x <= self._d['range'][1] if 'sigfigs' in self._d: x = utils.round_sig(x, self._d['sigfigs']) if 'proportion_sigfigs' in self._d: if 0 <= x < 0.5: x = utils.round_sig(x, self._d['proportion_sigfigs']) elif 0.5 <= x <= 1: x = 1 - utils.round_sig(1-x, self._d['proportion_sigfigs']) else: raise utils.PheWebError('cannot use proportion_sigfigs on a number outside [0-1]') if 'decimals' in self._d: x = round(x, self._d['decimals']) return x def read(self, value): '''read from internal file''' if self._d['nullable'] and value == '': return '' x = self._d['type'](value) return x default_null_values = ['', '.', 'NA', 'N/A', 'n/a', 'nan', '-nan', 'NaN', '-NaN', 'null', 'NULL'] default_field = { 'aliases': [], 'required': False, 'type': str, 'nullable': False, 'from_assoc_files': True, # if this is False, then the field will not be parsed from input files, because annotation will add it. } default_per_variant_fields = OrderedDict([ ('chrom', { 'aliases': ['#CHROM', 'chr'], 'required': True, 'tooltip_underscoretemplate': '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>', 'tooltip_lztemplate': False, }), ('pos', { 'aliases': ['BEG', 'BEGIN', 'BP'], 'required': True, 'type': scientific_int, 'range': [0, None], 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('ref', { 'aliases': ['reference', 'allele0'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('alt', { 'aliases': ['alternate', 'allele1'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('rsids', { 'from_assoc_files': False, 'tooltip_underscoretemplate': '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>', 'tooltip_lztemplate': {'condition': 'rsid', 'template': '<strong>{{rsid}}</strong><br>'}, }), ('nearest_genes', { 'from_assoc_files': False, 'tooltip_underscoretemplate': 'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>', 'tooltip_lztemplate': False, }), ]) default_per_assoc_fields = OrderedDict([ ('pval', { 'aliases': ['PVALUE'], 'required': True, 'type': float, 'nullable': True, 'range': [0, 1], 'sigfigs': 2, 'tooltip_lztemplate': { 'condition': False, 'template': ('{{#if pvalue}}P-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n' + '{{#if pval}}P-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}'), }, 'display': 'P-value', }), ('beta', { 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': 'Beta: <%= d.beta %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta %>)<% } %><br>', 'tooltip_lztemplate': 'Beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>', 'display': 'Beta', }), ('sebeta', { 'aliases': ['se'], 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('or', { 'type': float, 'nullable': True, 'range': [0, None], 'sigfigs': 2, 'display': 'Odds Ratio', }), ('maf', { 'type': float, 'range': [0, 0.5], 'sigfigs': 2, 'tooltip_lztemplate': {'transform': '|percent'}, 'display': 'MAF', }), ('af', { 'aliases': ['A1FREQ'], 'type': float, 'range': [0, 1], 'proportion_sigfigs': 2, 'tooltip_lztemplate': {'transform': '|percent'}, 'display': 'AF', }), ('ac', { 'type': float, 'range': [0, None], 'decimals': 1, 'display': 'AC', }), ('r2', { 'type': float, 'proportion_sigfigs': 2, 'nullable': True, 'display': 'R2', }), ('tstat', { 'type': float, 'sigfigs': 2, 'nullable': True, 'display': 'Tstat', }), ]) default_per_pheno_fields = OrderedDict([ ('num_cases', { 'aliases': ['NS.CASE', 'N_cases'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#cases', }), ('num_controls', { 'aliases': ['NS.CTRL', 'N_controls'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#controls', }), ('num_samples', { 'aliases': ['NS', 'N'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#samples', }), # TODO: phenocode, phenostring, category, &c? # TODO: include `assoc_files` with {never_send: True}? ]) conf.parse.null_values = deepcopy(default_null_values) conf.parse.per_variant_fields = deepcopy(default_per_variant_fields) conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields) conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields) conf.parse.fields = OrderedDict(itertools.chain(conf.parse.per_variant_fields.items(), conf.parse.per_assoc_fields.items(), conf.parse.per_pheno_fields.items())) assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len(conf.parse.per_assoc_fields) + len(conf.parse.per_pheno_fields) # no overlaps! if 'aliases' in conf: for alias, field in conf.aliases.items(): conf.parse.fields[field].setdefault('aliases', []).append(alias) if 'null_values' in conf: conf.parse.null_values.extend(conf.null_values) # make all aliases lowercase and add parsers for field_name, field_dict in conf.parse.fields.items(): for k,v in default_field.items(): field_dict.setdefault(k, v) field_dict['aliases'] = list(set([field_name.lower()] + [alias.lower() for alias in field_dict['aliases']])) field_dict['_parse'] = Field(field_dict).parse field_dict['_read'] = Field(field_dict).read _repeated_aliases = [alias for alias,count in Counter(itertools.chain.from_iterable(f['aliases'] for f in conf.parse.fields.values())).most_common() if count > 1] if _repeated_aliases: raise utils.PheWebError('The following aliases appear for multiple fields: {}'.format(_repeated_aliases)) def get_tooltip_underscoretemplate(): template = '' for fieldname, field in conf.parse.fields.items(): if 'tooltip_underscoretemplate' in field: if field['tooltip_underscoretemplate'] is False: continue else: template += '<% if(_.has(d, ' + repr(fieldname) + ')) { %>' + field['tooltip_underscoretemplate'] + '<% } %>\n' else: template += '<% if(_.has(d, ' + repr(fieldname) + ')) { %>' + field.get('display', fieldname) + ': <%= d[' + repr(fieldname) + '] %><br><% } %>\n' return template conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate() def get_tooltip_lztemplate(): template = '' for fieldname, field in conf.parse.fields.items(): lzt = field.get('tooltip_lztemplate', {}) if lzt is False: continue if isinstance(lzt, str): lzt = {'template': lzt} if 'template' not in lzt: lzt['template'] = field.get('display', fieldname) + ': <strong>{{' + fieldname + lzt.get('transform','') + '}}</strong><br>' if 'condition' not in lzt: lzt['condition'] = fieldname if not lzt['condition']: template += lzt['template'] + '\n' else: template += '{{#if ' + lzt['condition'] + '}}' + lzt['template'] + '{{/if}}\n' return template conf.parse.tooltip_lztemplate = get_tooltip_lztemplate()
def save_traffic_stats(lang, project, query_date, limit=DEFAULT_LIMIT): '''\ 1. Get articles 2. Add images and summaries 3. Prepare and save results ''' articles = make_article_list(query_date, lang=lang, project=project) total_traffic = get_project_traffic(query_date, lang, project) articles = articles[:limit] articles = add_extras(articles, lang=lang, project=project) ret = { 'articles': articles, 'formatted_date': format_date(query_date, format='d MMMM yyyy', locale=lang), 'date': { 'day': query_date.day, 'month': query_date.month, 'year': query_date.year }, 'lang': lang, 'full_lang': LOCAL_LANG_MAP[lang], 'total_traffic': total_traffic, 'total_traffic_short': shorten_number(total_traffic), 'examples': [articles[0], articles[1], articles[2], articles[query_date.day * 2]], # haha ok.. 'project': project.capitalize(), 'permalink': DATE_PERMALINK_TMPL.format(lang=lang, project=project, year=query_date.year, month=query_date.month, day=query_date.day), 'meta': { 'fetched': datetime.utcnow().isoformat() } } outfile_name = DATA_PATH_TMPL.format(lang=lang, project=project, year=query_date.year, month=query_date.month, day=query_date.day) with tlog.critical('saving_single_day_stats') as rec: rec['out_file'] = os.path.abspath(outfile_name) try: out_file = codecs.open(outfile_name, 'w') except IOError: mkdir_p(os.path.dirname(outfile_name)) out_file = codecs.open(outfile_name, 'w') with out_file: data_bytes = json.dumps(ret, indent=2, sort_keys=True) rec['len_bytes'] = len(data_bytes) out_file.write(data_bytes) rec.success('wrote {len_bytes} bytes to {out_file}') return
def run(argv): # I need these genenames to be unique. So, if a SYMBOL is not unique, I use the ENSG instead. gene_dir = os.path.join(conf.data_dir, 'sites', 'genes') gencode_file = os.path.join(gene_dir, 'gencode.gtf.gz') bed_file = utils.get_cacheable_file_location(gene_dir, 'genes.bed') if not os.path.exists(bed_file): print('genes.bed will be stored at {bed_file!r}'.format( bed_file=bed_file)) mkdir_p(gene_dir) if not os.path.exists(gencode_file): wget = utils.get_path('wget') # Link from <http://www.gencodegenes.org/releases/19.html> utils.run_cmd([ wget, '-O', gencode_file, "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz" ]) good_genetypes = set(''' protein_coding IG_C_gene IG_D_gene IG_J_gene IG_V_gene TR_C_gene TR_D_gene TR_J_gene TR_V_gene '''.split()) genes = [] with gzip.open(gencode_file, 'rt') as f: for l in f: if l.startswith('#'): continue r = l.split('\t') if r[2] != 'gene': continue # Remove pseudogenes and other unwanted types of genes. genetype = re.search(r'gene_type "(.+?)"', r[8]).group(1) if genetype not in good_genetypes: continue assert r[0].startswith('chr') chrom = r[0][3:] pos1, pos2 = int(r[3]), int(r[4]) assert pos1 < pos2 symbol = re.search(r'gene_name "(.+?)"', r[8]).group(1) ensg = re.search(r'gene_id "(ENSG[R0-9]+?)(?:\.[0-9]+)?"', r[8]).group(1) genes.append({ 'chrom': chrom, 'start': pos1, 'end': pos2, 'symbol': symbol, 'ensg': ensg, }) symbol_counts = collections.Counter(g['symbol'] for g in genes) for g in genes: if symbol_counts[g['symbol']] > 1: g['symbol'] = g['ensg'] assert len(set(g['symbol'] for g in genes)) == len(genes) with open(bed_file, 'w') as f: writer = csv.DictWriter( f, delimiter='\t', fieldnames='chrom start end symbol ensg'.split(), lineterminator='\n') writer.writerows(genes) else: print("gencode is at {bed_file!r}".format(bed_file=bed_file))
f_out.write('{chrom}\t{1}\t{2}\t{3}\n'.format(*next_cpra, chrom=utils.chrom_order_list[next_cpra[0]])) for phenocode in next_cpras.pop(next_cpra): try: next_cpra = next(readers[phenocode]) except StopIteration: del readers[phenocode] else: next_cpras.setdefault(next_cpra, []).append(phenocode) assert not readers, list(readers.items()) print('{:8} variants in {} <- {}'.format(n_variants, os.path.basename(out_filename), [os.path.basename(path) for path in input_filenames])) mkdir_p(conf.data_dir + '/sites') mkdir_p(conf.data_dir + '/tmp') def merge_files_in_queue(lock, manna_dict): # Keep a work queue of files that need to get merged. # Each process takes files off the queue, merges them, and pushes the result back onto the queue. # But if there are fewer than MIN_NUM_FILES_TO_MERGE_AT_ONCE on the work queue, and there are files currently being merged (ie, the process is not alone), # then the process just exits rather than merge a small number of files. while True: with lock: if len(manna_dict['files_to_merge']) <= 1: # no work to do. return elif len(manna_dict['files_to_merge']) >= MIN_NUM_FILES_TO_MERGE_AT_ONCE or len(manna_dict['files_being_merged']) == 0: # If there's a good amount of work to do (or if we're the only process left to do the work), we merge some files. files_to_merge_now = manna_dict['files_to_merge'][-NUM_FILES_TO_MERGE_AT_ONCE:]
def copy_to_file(src_file, dst_file, app): file_ = os.path.join(app['path'], dst_file) dir_ = os.path.dirname(file_) fu.mkdir_p(dir_) shutil.copy(src_file, file_)
def make_basedir(path: Union[str, Path]) -> None: mkdir_p(os.path.dirname(path))
def _ensure_conf(): if hasattr(conf, 'data_dir'): conf.data_dir = os.path.abspath(conf.data_dir) else: conf.set_default_value( 'data_dir', os.path.abspath( os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir)) ## Get `conf.cache` working because it's needed for reporting errors def _configure_cache(): conf.set_default_value( 'cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache'))) if conf.cache is False: return if conf.has_own_property('cache'): conf.cache = os.path.abspath( os.path.join(conf.data_dir, os.path.expanduser(conf.cache))) if not os.path.isdir(conf.cache): try: mkdir_p(conf.cache) except PermissionError: print( "Warning: caching is disabled because the directory {!r} can't be created.\n" .format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py." ) conf.cache = False return if not os.access(conf.cache, os.R_OK): print( 'Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n' .format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py." ) conf.cache = False _configure_cache() def _load_config_file(): _config_filepath = os.path.join(conf.data_dir, 'config.py') if os.path.isfile(_config_filepath): try: _conf_module = imp.load_source('config', _config_filepath) except Exception: raise utils.PheWebError( "PheWeb tried to load your config.py at {!r} but it failed." .format(_config_filepath)) else: for key in dir(_conf_module): if not key.startswith('_'): conf[key] = getattr(_conf_module, key) _load_config_file() conf.set_default_value( 'lzjs_version', '0.9.0' ) # Global setting, rarely needs configuration: which version of LZjs to fetch from the CDN conf.set_default_value( 'custom_templates', lambda: os.path.join(conf.data_dir, 'custom_templates'), is_function=True) conf.set_default_value('debug', False) conf.set_default_value('limit_num_variants', False) conf.set_default_value('assoc_min_maf', 0) conf.set_default_value('variant_inclusion_maf', 0) conf.set_default_value('within_pheno_mask_around_peak', int(500e3)) conf.set_default_value('between_pheno_mask_around_peak', int(1e6)) conf.set_default_value('manhattan_num_unbinned', 500) conf.set_default_value('manhattan_peak_max_count', 500) conf.set_default_value('manhattan_peak_pval_threshold', 1e-6) conf.set_default_value('manhattan_peak_sprawl_dist', int(200e3)) conf.set_default_value('top_hits_pval_cutoff', 1e-6) # Whether to show a table of correlated phenotypes in the app conf.set_default_value('show_correlations', False) conf.set_default_value('pheno_correlations_pvalue_threshold', 0.05) conf.set_default_value('allow_variant_json_cors', False) conf.set_default_value('urlprefix', '') if 'minimum_maf' in conf: raise utils.PheWebError( "minimum_maf has been deprecated. Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead" ) if conf.get('login', {}).get('whitelist', None): conf.login['whitelist'] = [ addr.lower() for addr in conf.login['whitelist'] ] if not os.path.isdir(conf.data_dir): mkdir_p(conf.data_dir) if not os.access(conf.data_dir, os.R_OK): raise utils.PheWebError( "Your data directory, {!r}, is not readable.".format( conf.data_dir)) ### Parsing def scientific_int(value): '''like int(value) but accepts "1.3e-4"''' try: return int(value) except ValueError: x = float(value) if x.is_integer(): return int(x) raise class Field: def __init__(self, d): self._d = d def parse(self, value): '''parse from input file''' # nullable if self._d['nullable'] and value in conf.parse.null_values: return '' # type x = self._d['type'](value) # range if 'range' in self._d: assert self._d['range'][0] is None or x >= self._d['range'][0] assert self._d['range'][1] is None or x <= self._d['range'][1] if 'sigfigs' in self._d: x = utils.round_sig(x, self._d['sigfigs']) if 'proportion_sigfigs' in self._d: if 0 <= x < 0.5: x = utils.round_sig(x, self._d['proportion_sigfigs']) elif 0.5 <= x <= 1: x = 1 - utils.round_sig(1 - x, self._d['proportion_sigfigs']) else: raise utils.PheWebError( 'cannot use proportion_sigfigs on a number outside [0-1]' ) if 'decimals' in self._d: x = round(x, self._d['decimals']) return x def read(self, value): '''read from internal file''' if self._d['nullable'] and value == '': return '' x = self._d['type'](value) return x default_null_values = [ '', '.', 'NA', 'N/A', 'n/a', 'nan', '-nan', 'NaN', '-NaN', 'null', 'NULL' ] default_field = { 'aliases': [], 'required': False, 'type': str, 'nullable': False, 'from_assoc_files': True, # if this is False, then the field will not be parsed from input files, because annotation will add it. } default_per_variant_fields = OrderedDict([ ('chrom', { 'aliases': ['#CHROM', 'chr'], 'required': True, 'tooltip_underscoretemplate': '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>', 'tooltip_lztemplate': False, }), ('pos', { 'aliases': ['BEG', 'BEGIN', 'BP'], 'required': True, 'type': scientific_int, 'range': [0, None], 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('ref', { 'aliases': ['reference', 'allele0', 'A1'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('alt', { 'aliases': ['alternate', 'allele1', 'A2'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('rsids', { 'from_assoc_files': False, 'tooltip_underscoretemplate': '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>', 'tooltip_lztemplate': { 'condition': 'rsid', 'template': '<strong>{{rsid}}</strong><br>' }, }), ('nearest_genes', { 'from_assoc_files': False, 'tooltip_underscoretemplate': 'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>', 'tooltip_lztemplate': False, }), ]) default_per_assoc_fields = OrderedDict([ ('pval', { 'aliases': ['PVALUE', 'P'], 'required': True, 'type': float, 'nullable': True, 'range': [0, 1], 'sigfigs': 2, 'tooltip_lztemplate': { 'condition': False, 'template': ('{{#if pvalue}}P-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n' + '{{#if pval}}P-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}' ), }, 'display': 'P-value', }), ('beta', { 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': 'Beta: <%= d.beta %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta %>)<% } %><br>', 'tooltip_lztemplate': 'Beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>', 'display': 'Beta', }), ('sebeta', { 'aliases': ['se'], 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('or', { 'type': float, 'nullable': True, 'range': [0, None], 'sigfigs': 2, 'display': 'Odds Ratio', }), ('maf', { 'type': float, 'range': [0, 0.5], 'sigfigs': 2, 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF', }), ('af', { 'aliases': ['A1FREQ', 'FRQ'], 'type': float, 'range': [0, 1], 'proportion_sigfigs': 2, 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'AF', }), ('ac', { 'type': float, 'range': [0, None], 'decimals': 1, 'display': 'AC', }), ('r2', { 'type': float, 'proportion_sigfigs': 2, 'nullable': True, 'display': 'R2', }), ('tstat', { 'type': float, 'sigfigs': 2, 'nullable': True, 'display': 'Tstat', }), ]) default_per_pheno_fields = OrderedDict([ ('num_cases', { 'aliases': ['NS.CASE', 'N_cases'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#cases', }), ('num_controls', { 'aliases': ['NS.CTRL', 'N_controls'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#controls', }), ('num_samples', { 'aliases': ['NS', 'N'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#samples', }), # TODO: phenocode, phenostring, category, &c? # TODO: include `assoc_files` with {never_send: True}? ]) conf.parse.null_values = deepcopy(default_null_values) conf.parse.per_variant_fields = deepcopy(default_per_variant_fields) conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields) conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields) conf.parse.fields = OrderedDict( itertools.chain(conf.parse.per_variant_fields.items(), conf.parse.per_assoc_fields.items(), conf.parse.per_pheno_fields.items())) assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len( conf.parse.per_assoc_fields) + len( conf.parse.per_pheno_fields) # no overlaps! if 'aliases' in conf: for alias, field in conf.aliases.items(): conf.parse.fields[field].setdefault('aliases', []).append(alias) if 'null_values' in conf: conf.parse.null_values.extend(conf.null_values) # make all aliases lowercase and add parsers for field_name, field_dict in conf.parse.fields.items(): for k, v in default_field.items(): field_dict.setdefault(k, v) field_dict['aliases'] = list( set([field_name.lower()] + [alias.lower() for alias in field_dict['aliases']])) field_dict['_parse'] = Field(field_dict).parse field_dict['_read'] = Field(field_dict).read _repeated_aliases = [ alias for alias, count in Counter( itertools.chain.from_iterable( f['aliases'] for f in conf.parse.fields.values())).most_common() if count > 1 ] if _repeated_aliases: raise utils.PheWebError( 'The following aliases appear for multiple fields: {}'.format( _repeated_aliases)) def get_tooltip_underscoretemplate(): template = '' for fieldname, field in conf.parse.fields.items(): if 'tooltip_underscoretemplate' in field: if field['tooltip_underscoretemplate'] is False: continue else: template += '<% if(_.has(d, ' + repr( fieldname) + ')) { %>' + field[ 'tooltip_underscoretemplate'] + '<% } %>\n' else: template += '<% if(_.has(d, ' + repr( fieldname) + ')) { %>' + field.get( 'display', fieldname) + ': <%= d[' + repr( fieldname) + '] %><br><% } %>\n' return template conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate() def get_tooltip_lztemplate(): template = '' for fieldname, field in conf.parse.fields.items(): lzt = field.get('tooltip_lztemplate', {}) if lzt is False: continue if isinstance(lzt, str): lzt = {'template': lzt} if 'template' not in lzt: lzt['template'] = field.get( 'display', fieldname) + ': <strong>{{' + fieldname + lzt.get( 'transform', '') + '}}</strong><br>' if 'condition' not in lzt: lzt['condition'] = fieldname if not lzt['condition']: template += lzt['template'] + '\n' else: template += '{{#if ' + lzt['condition'] + '}}' + lzt[ 'template'] + '{{/if}}\n' return template conf.parse.tooltip_lztemplate = get_tooltip_lztemplate()
def create_benchmark_file(app, path, contents = ''): loc = os.path.join(app["path"] + path) fu.mkdir_p(os.path.dirname(loc)) with open(loc, 'w+') as f: f.write(contents) return loc
def run_measurements(producer, source_as_file, exp_start, utc): global DR_LOG_FILE global GENERAL_LOG_FILE global FRRP_LOG_FILE global EXP_ID global PREFIX global CONST_WAIT_TIME global ROUTER_CLIENTS dir_name = "results/{}_{}".format(EXP_ID, utc.timestamp) mkdir_p(dir_name) with open(DR_LOG_FILE, 'w') as f: f.write('# Entry formats\n') f.write('# Error: -1|src,asn,msg,err_msg\n') f.write('# DR Entry: 0|asn,degree,has_default_route\n') with open(FRRP_LOG_FILE, 'w') as f: f.write('# Entry formats\n') f.write('# Error: -1|src,asn,msg,err_msg\n') f.write( '# Final Graph: 1|src,asn,graph_path,measured_graph_file\n' ) f.write( '# FRRP Result: 0|src,asn,atlas,m_id,oisons,poison_set,path,as-path,rtts,as-rtts\n' ) f.write( '# Lost Connectivity: 2|src,asn,atlas,m_id,poisons,poison_set,path,as-path,rtts,as-rtts\n' ) f.write( "# Invalid Result -2|src,asn,msg,first_path_not_connected\n") source_ases = parse_source_as_file(source_as_file) prefix_no_subnet = str(PREFIX.split('/')[0]) start_collector(producer, PREFIX, '{}_{}'.format(EXP_ID, prefix_no_subnet)) for source_asn, source_asn_degree in source_ases: try: with open( os.path.join(dir_name, 'run_file_{}.p'.format(source_asn)), 'wb') as run_filename: frrp_run = FRRPRun(source_asn) log_message("{}Starting Experiment for AS{}{}".format( '-' * 10, source_asn, '-' * 10)) all_probes = find_probes_by_asn(source_asn) if not all_probes or len(all_probes) == 0: log_message("AS{} does not have a stable probe!".format( source_asn)) write_frrp_entry( "-1|src,{},msg,no_stable_probe".format(source_asn)) write_default_route_entry( "-1|src,{},msg,no_stable_probe".format(source_asn)) frrp_run.add_error("No stable probe") frrp_run.no_stable_probe = True pickle.dump(frrp_run, run_filename) continue src_graph = nx.DiGraph() source_as = AS(source_asn) source_as.set_degree(source_asn_degree) src_graph.add_node(source_as) # Make normal announcement the very first time before doing a new ATLAS source if PORT2 is not None: ports = [PORT, PORT2] else: ports = [PORT] log_message( 'Making announcement to {} and waiting {} seconds for AS SEQ: {}' .format(PREFIX, CONST_WAIT_TIME, None)) make_announcement(ports) time.sleep(CONST_WAIT_TIME) poison_cache = PoisonCache() err = measure_single_source(frrp_run, src_graph, source_as, poison_cache=poison_cache, first=True) source_as.add_poison_cache(poison_cache) frrp_run.poison_cache = poison_cache if err == -1: log_message( "ATLAS AS {} returns error when trying to traceroute...moving on." .format(source_asn)) with open('misnomer_atlas_ases.txt', 'a') as f: f.write('{}\n'.format(source_asn)) write_frrp_entry( "-1|src,{},msg,atlas_source_traceroute_error".format( source_asn)) write_default_route_entry( "-1|src,{},msg,atlas_source_traceroute_error".format( source_asn)) frrp_run.add_error("source_traceroute_error") pickle.dump(frrp_run, run_filename) continue elif err == -2: log_message("ATLAS AS {} returns empty response.".format( source_asn)) with open('rerun_atlas_ases.txt', 'a') as f: f.write('{}\n'.format(source_asn)) write_frrp_entry( "-1|src,{},msg,atlas_source_empty_response".format( source_asn)) write_default_route_entry( "-1|src,{},msg,atlas_source_empty_response".format( source_asn)) frrp_run.add_error("empty_source_response") pickle.dump(frrp_run, run_filename) continue has_default_route = src_graph.out_degree(source_as) <= 1 log_message("ATLAS AS {} has a default route: {}".format( source_asn, has_default_route)) write_default_route_entry('0|{},{},{}'.format( source_asn, source_as.degree, has_default_route)) frrp_run.has_default_route = has_default_route mkdir_p('{}/graph_results'.format(dir_name)) draw_graph(src_graph, '{}/graph_results'.format(dir_name), 'atlas-as-{}-measured-graph'.format(source_asn)) gpickle_name = '{}/graph_results/atlas-as-{}-{}-{}-measured-graph.gpickle'.format( dir_name, source_asn, exp_start, EXP_ID) nx.write_gpickle(src_graph, gpickle_name) frrp_run.graph_path = gpickle_name write_frrp_entry('1|src,{},graph_path,{}'.format( source_asn, gpickle_name)) pickle.dump(frrp_run, run_filename) except Exception as e: log_message("Receieved exception: {}".format(e), print_stack=True)
def ensure_path(path): path = os.path.expanduser(path) if not os.path.exists(path): with log.sky_log.debug('ensure_path', path=path): fileutils.mkdir_p(path) return path
def test_dir(): path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'tmp', 'tests') fu.mkdir_p(path) return tempfile.mkdtemp(dir = path)
def extract_zip(zip_file, dest_dir, members=None): if not zipfile.is_zipfile(zip_file): return mkdir_p(dest_dir) with zipfile.ZipFile(zip_file, 'r') as zf: zf.extractall(dest_dir, members)
def _writer(self, file, relate_dir, out_dir): out_dir = join_path(out_dir, relate_dir) mkdir_p(out_dir) out_file = join_path(out_dir, os.path.basename(file)) return Line_Writer(out_file)
def main(random_seed, task_number, test_on_gt, only_test, overfit): random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed_all(random_seed) n_epochs = 3 lr = 1e-4 wd = 0 train_db = CocoTasksRanker(task_number) if test_on_gt: test_db = CocoTasksRankerTestGT(task_number) else: test_db = CocoTasksRankerTest(task_number) network = Ranker() optimizer = Adam(network.parameters(), lr=lr, weight_decay=wd) experiment = Experiment(network, train_db, optimizer=optimizer, tensorboard=True) folder = "single-task-ranker-baseline-tn:{tn}-seed:{s}".format( tn=task_number, s=random_seed ) folder = os.path.join(SAVING_DIRECTORY, folder) mkdir_p(folder) if not only_test: # train experiment.train_n_epochs(n_epochs, lr_scheduler=True, overfit=overfit) # save model torch.save(network.state_dict(), os.path.join(folder, "model.mdl")) else: # load model network.load_state_dict(torch.load(os.path.join(folder, "model.mdl"))) # test model detections = experiment.do_test(test_db) # save detections with open( os.path.join(folder, "detections_teg:{teg}.json".format(teg=test_on_gt)), "w" ) as f: json.dump(detections, f) detections_per_image = defaultdict(list) for d in detections: detections_per_image[d["image_id"]].append(d) fusion = "top_k" fused_detections = fuse(detections_per_image=detections_per_image) with open( os.path.join( folder, "detections_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion) ), "w", ) as f: json.dump(fused_detections, f) # perform evaluation with redirect_stdout(open(os.devnull, "w")): gtCOCO = test_db.task_coco dtCOCO = gtCOCO.loadRes( os.path.join( folder, "detections_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion), ) ) cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox") cocoEval.params.catIds = 1 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("fusion: %s - mAP:\t\t %1.6f" % (fusion, cocoEval.stats[0])) print("fusion: %s - [email protected]:\t\t %1.6f" % (fusion, cocoEval.stats[1])) # save evaluation performance with open( os.path.join( folder, "result_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion) ), "w", ) as f: f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
def create_app_structure(path_app): bf.mkdir_p(path_app) bf.mkdir_p(path_app + "/dockerfiles")
def main(random_seed, task_number, test_on_gt, only_test, overfit): random.seed(random_seed) np.random.seed(random_seed) torch.manual_seed(random_seed) torch.cuda.manual_seed_all(random_seed) n_epochs = 3 lr = 1e-2 wd = 0 train_db = CocoTasksGT(task_number, "train") if test_on_gt: test_db = CocoTasksTestGT(task_number) else: test_db = CocoTasksTest(task_number) network = ClassifierBaselineNetwork() optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd) experiment = ClassifierExperiment(network, train_db, optimizer=optimizer) folder = "single-task-classifier-baseline-tn:{tn}-seed:{s}".format( tn=task_number, s=random_seed) folder = os.path.join(SAVING_DIRECTORY, folder) mkdir_p(folder) if not only_test: # train experiment.train_n_epochs(n_epochs, lr_scheduler=True, overfit=overfit) # save model torch.save(network.state_dict(), os.path.join(folder, "model.mdl")) else: # load model network.load_state_dict(torch.load(os.path.join(folder, "model.mdl"))) # test_model detections = experiment.do_test(test_db) # save detections with open( os.path.join(folder, "detections-teg:{teg}.json".format(teg=test_on_gt)), "w") as f: json.dump(detections, f) # perform evaluation with redirect_stdout(open(os.devnull, "w")): gtCOCO = test_db.task_coco dtCOCO = gtCOCO.loadRes( os.path.join(folder, "detections-teg:{teg}.json".format(teg=test_on_gt))) cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox") cocoEval.params.catIds = 1 cocoEval.evaluate() cocoEval.accumulate() cocoEval.summarize() print("mAP:\t\t %1.6f" % cocoEval.stats[0]) print("[email protected]:\t\t %1.6f" % cocoEval.stats[1]) # save evaluation performance with open( os.path.join(folder, "result-teg:{teg}.txt".format(teg=test_on_gt)), "w") as f: f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
def get_cacheable_file_location(default_relative_dir, basename): if conf.cache: return os.path.join(conf.cache, basename) mkdir_p(get_generated_path(default_relative_dir)) return get_generated_path(default_relative_dir, basename)
def run_game(): parser = argparse.ArgumentParser(description="Legend of Harren") parser.add_argument( "-l", "--log-level", default="INFO", choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"), help="Logging level for command output.", ) parser.add_argument("-L", "--logfile", dest="logfile", default=None, help="Location to place a log of the process output") parser.add_argument("-V", "--version", action="version", version=get_version(), help="Display the version number.") parser.add_argument( "-g", "--fullscreen", dest="fullscreen", action="store_true", help="Launch the new game in fullscreen mode", ) parser.add_argument( "--no-splash", dest="no_splash", action="store_true", help="Skip the initial loading splash screen", ) parser.add_argument( "--no-sound", dest="no_sound", action="store_true", help="Disable sound", ) parsed_args = parser.parse_args() # Get logging related arguments & the configure logging if parsed_args.logfile: logfile = os.path.abspath(parsed_args.logfile) else: logfile = None # Don't bother with a file handler if we're not logging to a file handlers = (["console", "filehandler"] if logfile else [ "console", ]) # The base logging configuration BASE_CONFIG = { "version": 1, "disable_existing_loggers": False, "formatters": { "ConsoleFormatter": { "()": ColorFormatter, "format": "%(levelname)s: %(message)s", "datefmt": "%Y-%m-%d %H:%M:%S", }, "VerboseFormatter": { "()": ColorFormatter, "format": ("%(levelname)-8s: %(asctime)s '%(message)s' " "%(name)s:%(lineno)s"), "datefmt": "%Y-%m-%d %H:%M:%S", }, "FileFormatter": { "()": ColorStripper, "format": ("%(levelname)-8s: %(asctime)s '%(message)s' " "%(name)s:%(lineno)s"), "datefmt": "%Y-%m-%d %H:%M:%S", }, }, "handlers": { "console": { "level": "DEBUG", "class": "logging.StreamHandler", "formatter": "ConsoleFormatter", }, }, "loggers": { "harren": { "handlers": handlers, "level": parsed_args.log_level, }, "pygame": { "handlers": handlers, "level": parsed_args.log_level, }, "pytmx": { "handlers": handlers, "level": parsed_args.log_level, }, "pyscroll": { "handlers": handlers, "level": parsed_args.log_level, }, }, } # If we have a log file, modify the dict to add in the filehandler conf if logfile: BASE_CONFIG["handlers"]["filehandler"] = { "level": parsed_args.log_level, "class": "logging.handlers.RotatingFileHandler", "filename": logfile, "formatter": "FileFormatter", } if parsed_args.log_level == "DEBUG": # Set a more noisy formatter BASE_CONFIG["handlers"]["console"]["formatter"] = "VerboseFormatter" # Setup the loggers dictConfig(BASE_CONFIG) LOG.info("#g<Launching Harren RPG!>") # Setup SDL Environment Variables os.environ["SDL_VIDEO_CENTERED"] = "1" try: import pygame # noqa except ImportError: LOG.exception("#y<PyGame not found... exiting.>") sys.exit(1) # Make the config folder if it doesn't already exist mkdir_p(resources.CONFIG_FOLDER) # Disable or enable sound if parsed_args.no_sound: sound_enabled = False else: sound_enabled = True from harren.game_loop import GameState game = GameState( fullscreen=parsed_args.fullscreen, no_splash=parsed_args.no_splash, sound_enabled=sound_enabled, ) game.main() __exit()
def _ensure_conf(): if not hasattr(conf, 'authentication'): conf.set_default_value('authentication', False) if hasattr(conf, 'data_dir'): conf.data_dir = os.path.abspath(conf.data_dir) else: conf.set_default_value( 'data_dir', os.path.abspath( os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir)) ## Get `conf.cache` working because it's needed for reporting errors def _configure_cache(): conf.set_default_value( 'cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache'))) if conf.cache is False: return if conf.has_own_property('cache'): conf.cache = os.path.abspath( os.path.join(conf.data_dir, os.path.expanduser(conf.cache))) if not os.path.isdir(conf.cache): try: mkdir_p(conf.cache) except PermissionError: print( "Warning: caching is disabled because the directory {!r} can't be created.\n" .format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py." ) conf.cache = False return if not os.access(conf.cache, os.R_OK): print( 'Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n' .format(conf.cache) + "If you don't want caching, set `cache = False` in your config.py." ) conf.cache = False _configure_cache() def _load_config_file(): _config_filepath = os.path.join(conf.data_dir, 'config.py') if os.path.isfile(_config_filepath): try: _conf_module = imp.load_source('config', _config_filepath) except Exception: raise utils.PheWebError( "PheWeb tried to load your config.py at {!r} but it failed." .format(_config_filepath)) else: for key in dir(_conf_module): if not key.startswith('_'): conf[key] = getattr(_conf_module, key) print(str(conf)) if conf.authentication: try: _auth_module = imp.load_source('config', conf.authentication_file) except Exception: raise utils.PheWebError( "PheWeb tried to load your authentication file at {!r} but it failed." .format(conf.authentication_file)) else: for key in dir(_auth_module): if not key.startswith('_'): conf[key] = getattr(_conf_module, key) _load_config_file() conf.set_default_value( 'custom_templates', lambda: os.path.join(conf.data_dir, 'custom_templates'), is_function=True) conf.set_default_value('debug', False) conf.set_default_value('quick', False) conf.set_default_value('assoc_min_maf', 0) conf.set_default_value('variant_inclusion_maf', 0) conf.set_default_value('within_pheno_mask_around_peak', int(500e3)) conf.set_default_value('between_pheno_mask_around_peak', int(1e6)) conf.set_default_value('manhattan_num_unbinned', 2000) conf.set_default_value('manhattan_unbin_anyway_pval', 5e-8) conf.set_default_value('manhattan_hla_num_unbinned', 200) conf.set_default_value('hla_begin', 26000000) conf.set_default_value('hla_end', 36000000) conf.set_default_value("n_query_threads", 4) conf.set_default_value('peak_pval_cutoff', 1e-6) if 'minimum_maf' in conf: raise utils.PheWebError( "minimum_maf has been deprecated. Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead" ) if conf.get('login', {}).get('whitelist', None): conf.login['whitelist'] = [ addr.lower() for addr in conf.login['whitelist'] ] if not os.path.isdir(conf.data_dir): mkdir_p(conf.data_dir) if not os.access(conf.data_dir, os.R_OK): raise utils.PheWebError( "Your data directory, {!r}, is not readable.".format( conf.data_dir)) ### Parsing def scientific_int(value): '''like int(value) but accepts "1.3e-4"''' try: return int(value) except ValueError: x = float(value) if x.is_integer(): return int(x) raise class Field: def __init__(self, d): self._d = d def parse(self, value): '''parse from input file''' # nullable if self._d['nullable'] and value in conf.parse.null_values: return '' # type x = self._d['type'](value) # range if 'range' in self._d: assert self._d['range'][0] is None or x >= self._d['range'][0] assert self._d['range'][1] is None or x <= self._d['range'][1] if 'sigfigs' in self._d: x = utils.round_sig(x, self._d['sigfigs']) if 'decimals' in self._d: x = round(x, self._d['decimals']) return x def read(self, value): '''read from internal file''' if self._d['nullable'] and (value == '' or value == 'NA'): return '' x = self._d['type'](value) if 'range' in self._d: assert self._d['range'][0] is None or x >= self._d['range'][0] assert self._d['range'][1] is None or x <= self._d['range'][1] return x default_null_values = ['', '.', 'NA', 'nan', 'NaN'] default_field = { 'aliases': [], 'required': False, 'type': str, 'nullable': False, 'from_assoc_files': True, # if this is False, then the field will not be parsed from input files, because annotation will add it. } default_per_variant_fields = OrderedDict([ ('chrom', { 'aliases': ['#CHROM', 'chr'], 'required': True, 'tooltip_underscoretemplate': '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>', 'tooltip_lztemplate': False, }), ('pos', { 'aliases': ['BEG', 'BEGIN', 'BP'], 'required': True, 'type': scientific_int, 'range': [0, None], 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('ref', { 'aliases': ['reference', 'allele0'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('alt', { 'aliases': ['alternate', 'allele1'], 'required': True, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('rsids', { 'from_assoc_files': False, 'tooltip_underscoretemplate': '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>', 'tooltip_lztemplate': { 'condition': 'rsid', 'template': '<strong>{{rsid}}</strong><br>' }, }), ('nearest_genes', { 'from_assoc_files': False, 'tooltip_underscoretemplate': 'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>', 'tooltip_lztemplate': False, }) ]) default_per_assoc_fields = OrderedDict([ ('pheno', { 'tooltip_lztemplate': 'phenotype: <strong>{{trait:pheno}}</strong><br>', }), ('pval', { 'aliases': ['PVALUE'], 'required': True, 'type': float, 'nullable': True, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'p-value: <%= pValueToReadable(d.pval) %><br>', 'tooltip_lztemplate': { 'condition': False, 'template': ('{{#if trait:pvalue}}p-value: <strong>{{trait:pvalue|scinotation}}</strong><br>{{/if}}\n' + '{{#if trait:pval}}p-value: <strong>{{trait:pval|scinotation}}</strong><br>{{/if}}' ), }, 'display': 'P-value', }), ('beta', { 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': 'beta: <%= d.beta.toFixed(2) %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta.toFixed(2) %>)<% } %><br>', 'tooltip_lztemplate': 'beta: <strong>{{trait:beta}}</strong>{{#if trait:sebeta}} ({{trait:sebeta}}){{/if}}<br>', 'display': 'Beta', }), ('sebeta', { 'aliases': ['se'], 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('or', { 'type': float, 'nullable': True, 'range': [0, None], 'sigfigs': 2, 'display': 'Odds Ratio', }), ('maf', { 'type': float, 'nullable': True, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF: <%= d.maf.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF', }), ('maf_case', { 'type': float, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF cases: <%= d.maf_case.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF cases', }), ('maf_control', { 'type': float, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF controls: <%= d.maf_control.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF controls', }), ('maf_cases', { 'type': float, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF cases: <%= d.maf_cases.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF cases', }), ('maf_controls', { 'type': float, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF controls: <%= d.maf_controls.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF controls', }), ( 'af', { 'aliases': ['A1FREQ'], 'type': float, 'range': [0, 1], 'sigfigs': 2, # TODO: never round 99.99% to 100%. Make sure MAF would have the right sigfigs. 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'AF', }), ('ac', { 'type': float, 'range': [0, None], 'decimals': 1, 'tooltip_underscoretemplate': 'AC: <%= d.ac.toFixed(1) %> <br>', 'display': 'AC', }), ('r2', { 'type': float, 'sigfigs': 2, 'nullable': True, 'display': 'R2', }), ('tstat', { 'type': float, 'sigfigs': 2, 'nullable': True, 'display': 'Tstat', }), ('n_cohorts', { 'type': int }), ('n_hom_cases', { 'type': float }), ('n_het_cases', { 'type': float }), ('n_hom_controls', { 'type': float }), ('n_het_controls', { 'type': float }) ]) default_per_pheno_fields = OrderedDict([ ('n_case', { 'aliases': ['NS.CASE', 'N_cases'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#cases', }), ('n_control', { 'aliases': ['NS.CTRL', 'N_controls'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#controls', }), ('num_samples', { 'aliases': ['NS', 'N'], 'type': int, 'nullable': True, 'range': [0, None], 'display': '#samples', }), # TODO: phenocode, phenostring, category, &c? # TODO: include `assoc_files` with {never_send: True}? ]) default_GWAS_catalog_fields = OrderedDict([ ('pheno', { 'tooltip_lztemplate': 'phenotype: <strong>{{pheno}}</strong><br>', }), ('pval', { 'aliases': ['PVALUE'], 'required': True, 'type': float, 'nullable': True, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'p-value: <%= pValueToReadable(d.pval) %><br>', 'tooltip_lztemplate': { 'condition': False, 'template': ('{{#if pvalue}}p-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n' + '{{#if pval}}p-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}' ), }, 'display': 'P-value', }), ('beta', { 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': 'beta: <%= d.beta.toFixed(2) %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta.toFixed(2) %>)<% } %><br>', 'tooltip_lztemplate': 'beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>', 'display': 'Beta', }), ('sebeta', { 'aliases': ['se'], 'type': float, 'nullable': True, 'sigfigs': 2, 'tooltip_underscoretemplate': False, 'tooltip_lztemplate': False, }), ('or', { 'type': float, 'nullable': True, 'range': [0, None], 'sigfigs': 2, 'display': 'Odds Ratio', }), ('maf', { 'type': float, 'range': [0, 0.5], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF: <%= d.maf.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF', }), ('maf_cases', { 'type': float, 'range': [0, 1], 'sigfigs': 2, 'tooltip_underscoretemplate': 'MAF cases: <%= d.maf_cases.toFixed(4) %><br>', 'tooltip_lztemplate': { 'transform': '|percent' }, 'display': 'MAF cases', }) ]) conf.parse.null_values = deepcopy(default_null_values) conf.parse.per_variant_fields = deepcopy(default_per_variant_fields) conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields) conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields) conf.parse.fields = OrderedDict( itertools.chain(conf.parse.per_variant_fields.items(), conf.parse.per_assoc_fields.items(), conf.parse.per_pheno_fields.items())) assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len( conf.parse.per_assoc_fields) + len( conf.parse.per_pheno_fields) # no overlaps! if 'aliases' in conf: for alias, field in conf.aliases.items(): conf.parse.fields[field].setdefault('aliases', []).append(alias) if 'null_values' in conf: conf.parse.null_values.extend(conf.null_values) # make all aliases lowercase and add parsers for field_name, field_dict in conf.parse.fields.items(): for k, v in default_field.items(): field_dict.setdefault(k, v) field_dict['aliases'] = list( set([field_name.lower()] + [alias.lower() for alias in field_dict['aliases']])) field_dict['_parse'] = Field(field_dict).parse field_dict['_read'] = Field(field_dict).read _repeated_aliases = [ alias for alias, count in Counter( itertools.chain.from_iterable( f['aliases'] for f in conf.parse.fields.values())).most_common() if count > 1 ] if _repeated_aliases: raise utils.PheWebError( 'The following aliases appear for multiple fields: {}'.format( _repeated_aliases)) def get_tooltip_underscoretemplate(): template = '' for fieldname, field in conf.parse.fields.items(): if 'tooltip_underscoretemplate' in field: if field['tooltip_underscoretemplate'] is False: continue else: template += '<% if(_.has(d, ' + repr( fieldname) + ')) { %>' + field[ 'tooltip_underscoretemplate'] + '<% } %>\n' else: template += '<% if(_.has(d, ' + repr( fieldname) + ')) { %>' + field.get( 'display', fieldname) + ': <%= d[' + repr( fieldname) + '] %><br><% } %>\n' return template conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate() def get_tooltip_lztemplate(): template = '' for fieldname, field in conf.parse.fields.items(): lzt = field.get('tooltip_lztemplate', {}) if lzt is False: continue if isinstance(lzt, str): lzt = {'template': lzt} if 'template' not in lzt: lzt['template'] = field.get( 'display', fieldname) + ': <strong>{{' + fieldname + lzt.get( 'transform', '') + '}}</strong><br>' if 'condition' not in lzt: lzt['condition'] = fieldname if not lzt['condition']: template += lzt['template'] + '\n' else: template += '{{#if ' + lzt['condition'] + '}}' + lzt[ 'template'] + '{{/if}}\n' return template conf.parse.tooltip_lztemplate = get_tooltip_lztemplate() ## these fields will be exported in this order when exporting variants to TSV. conf.set_default_value("var_export_fields", [ 'chrom', 'pos', 'ref', 'alt', 'maf', 'maf_cases', 'maf_controls', 'most_severe', 'nearest_genes', 'rsids', "annotation.ac", "annotation.ac_hemi", "annotation.ac_het", "annotation.ac_hom", 'annotation.an', "annotation.info", "annotation.hc_lof" ]) ## these fields will be exported in this order when exporting variants to TSV. conf.set_default_value("var_top_pheno_export_fields", [ "phenocode", "phenostring", "category", "pval", "beta", "maf", "maf_case", "maf_control", "n_case", "n_control" ]) conf.set_default_value("gene_pheno_export_fields", [ "variant.varid", "assoc.pval", "assoc.beta", "assoc.variant.rsids", "pheno.category", "pheno.num_cases", "pheno.num_controls", "pheno.phenocode", "pheno.phenostring", "variant.annotation.gnomad.AF_fin", "variant.annotation.gnomad.AF_nfe" ]) conf.set_default_value("drug_export_fields", [ "drug.molecule_name", "drug.molecule_type", "evidence.target2drug.action_type", "disease.efo_info.label", "evidence.drug2clinic.clinical_trial_phase.label", "drug.id" ]) conf.set_default_value("lof_export_fields", [ "pheno", "variants", "p_value", "beta", "ref_alt_cases", "ref_alt_ctrls" ]) conf.set_default_value("report_conf", {"func_var_assoc_threshold": 0.0001}) conf.set_default_value( "vis_conf", { "loglog_threshold": 10, "info_tooltip_threshold": 0.8, "manhattan_colors": ['rgb(53,0,212)', 'rgb(40, 40, 40)'] }) conf.set_default_value( "locuszoom_conf", { "p_threshold": 0.05, "prob_threshold": 0.0001, "ld_service": "finngen", "ld_max_window": 5000000 }) conf.set_default_value("lof_threshold", 1e-3) conf.set_default_value("noindex", True) conf.set_default_value("anno_cpra", True) conf.set_default_value("show_ukbb", False) conf.set_default_value("show_risteys", False) conf.set_default_value("lof_threshold", 1e-3)