Example #1
0
def config_logbook_pp(log_dir: str,
                      infile_name: str,
                      log_to_file: bool=True,
                      file_level: str='INFO',
                      log_type: str="pp",
                      stream_level: str='DEBUG') -> None:
    """
    Configure logging via logbook for assorted preprocessing scripts.
    """

    logbook.set_datetime_format("local")

    logname = "{}_{}.log".format(infile_name, log_type)
    log_path = norman_path(log_dir, logname)
    fileutils.mkdir_p(log_dir)

    if log_to_file:
        file_handler = TimedRotatingFileHandler(log_path,
                                                level=file_level,
                                                backup_count=7)
    else:
        file_handler = NullHandler()

    stream_handler = StreamHandler(stream=stdout,
                                   level=stream_level,
                                   bubble=True)

    with file_handler, stream_handler:
        yield
Example #2
0
def get_tmp_path(arg):
    if arg.startswith(get_generated_path()):
        mkdir_p(get_generated_path('tmp'))
        tmp_basename = arg[len(get_generated_path()):].lstrip(os.path.sep).replace(os.path.sep, '-')
        return get_generated_path('tmp', tmp_basename)
    elif arg.startswith(os.path.sep):
        return arg + '.tmp'
    else:
        mkdir_p(get_generated_path('tmp'))
        return get_generated_path('tmp', arg)
Example #3
0
def save_traffic_stats(lang, project, query_date, limit=DEFAULT_LIMIT):
    '''\
    1. Get articles
    2. Add images and summaries
    3. Prepare and save results
    '''
    articles = make_article_list(query_date,
                                 lang=lang,
                                 project=project)
    total_traffic = get_project_traffic(query_date, lang, project)
    articles = articles[:limit]
    articles = add_extras(articles, lang=lang, project=project)
    ret = {'articles': articles,
           'formatted_date': format_date(query_date,
                                         format='d MMMM yyyy',
                                         locale=lang),
           'date': {'day': query_date.day,
                    'month': query_date.month,
                    'year': query_date.year},
           'lang': lang,
           'full_lang': LOCAL_LANG_MAP[lang],
           'total_traffic': total_traffic,
           'total_traffic_short': shorten_number(total_traffic),
           'examples': [articles[0],
                        articles[1],
                        articles[2],
                        articles[query_date.day * 2]],  # haha ok..
           'project': project.capitalize(),
           'permalink': DATE_PERMALINK_TMPL.format(lang=lang,
                                                   project=project,
                                                   year=query_date.year,
                                                   month=query_date.month,
                                                   day=query_date.day),
           'meta': {'fetched': datetime.utcnow().isoformat()}}
    outfile_name = DATA_PATH_TMPL.format(lang=lang,
                                         project=project,
                                         year=query_date.year,
                                         month=query_date.month,
                                         day=query_date.day)

    with tlog.critical('saving_single_day_stats') as rec:
        rec['out_file'] = os.path.abspath(outfile_name)
        try:
            out_file = codecs.open(outfile_name, 'w')
        except IOError:
            mkdir_p(os.path.dirname(outfile_name))
            out_file = codecs.open(outfile_name, 'w')
        with out_file:
            data_bytes = json.dumps(ret, indent=2, sort_keys=True)
            rec['len_bytes'] = len(data_bytes)
            out_file.write(data_bytes)

        rec.success('wrote {len_bytes} bytes to {out_file}')

    return
Example #4
0
    def move_file(self, old_filename, new_filename):
        self.tree_modifier.move(old_filename, new_filename)

        if not self.repo.is_bare and self.update_working_copy:
            real_old_filename = os.path.join(self.path, old_filename)
            real_new_filename = os.path.join(self.path, new_filename)
            mkdir_p(os.path.dirname(real_new_filename))
            os.rename(real_old_filename, real_new_filename)
            remove_file_with_empty_parents(self.path, old_filename)

        self.messages.append('    R  {} -> {}'.format(old_filename,
                                                      new_filename))
Example #5
0
def save_rendered(outfile_name, template_name, context):
    global ASHES_ENV  # retain laziness
    if not ASHES_ENV:
        ASHES_ENV = ashes.AshesEnv([TEMPLATE_PATH], keep_whitespace=True)
    rendered = ASHES_ENV.render(template_name, context)
    try:
        out_file = codecs.open(outfile_name, 'w', 'utf-8')
    except IOError:
        mkdir_p(dirname(outfile_name))
        out_file = codecs.open(outfile_name, 'w', 'utf-8')
    with out_file:
        out_file.write(rendered)
    print 'successfully generated %s' % outfile_name
Example #6
0
def _create_dir(directory_path):
    """Create a directory for us.

    Args:
        directory_path (:obj:`str`): The path to the directory to create.

    Returns:
        int: Status code of success or failure. Anything except 0 is a failure.

    """
    try:
        fileutils.mkdir_p(directory_path)
    except OSError:
        LOGGER.exception('Could not create directory: %s', directory_path, exc_info=True)
        return 1
    return 0
Example #7
0
 def _configure_cache():
     conf.set_default_value('cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache')))
     if conf.cache is False:
         return
     if conf.has_own_property('cache'):
         conf.cache = os.path.abspath(os.path.join(conf.data_dir, os.path.expanduser(conf.cache)))
     if not os.path.isdir(conf.cache):
         try:
             mkdir_p(conf.cache)
         except PermissionError:
             print("Warning: caching is disabled because the directory {!r} can't be created.\n".format(conf.cache) +
                   "If you don't want caching, set `cache = False` in your config.py.")
             conf.cache = False
             return
     if not os.access(conf.cache, os.R_OK):
         print('Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'.format(conf.cache) +
               "If you don't want caching, set `cache = False` in your config.py.")
         conf.cache = False
Example #8
0
    def write_file(self, filename, content):
        # TODO: combine writing many files
        assert isinstance(content, text_type)
        data = content.encode('utf-8')
        existing_entry = get_tree_entry(self.repo, self.working_tree, filename)
        if existing_entry:
            type = 'M'
            if existing_entry.id == git_hash(data):
                return
        else:
            type = 'A'
        blob_id = self.repo.create_blob(data)
        self.insert_into_working_tree(blob_id, filename)

        if not self.repo.is_bare and self.update_working_copy:
            real_filename = os.path.join(self.path, filename)
            mkdir_p(os.path.dirname(real_filename))
            with codecs.open(real_filename, 'w', encoding='utf-8') as outfile:
                outfile.write(content)

        self.messages.append('    {}  {}'.format(type, filename))
Example #9
0
        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return
Example #10
0
def run(argv):
    if not os.path.exists(clean_file):
        print('dbsnp will be stored at {clean_file!r}'.format(
            clean_file=clean_file))
        mkdir_p(dbsnp_dir)
        if not os.path.exists(raw_file):
            print('Downloading dbsnp!')
            wget = utils.get_path('wget')
            dbsnp_url = 'ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/VCF/All_20160601.vcf.gz'
            #dbsnp_url= 'ftp://ftp.ncbi.nlm.nih.gov/snp/organisms/human_9606_b147_GRCh37p13/database/organism_data/b147_SNPChrPosOnRef_105.bcp.gz'
            utils.run_cmd([wget, '-O', raw_tmpfile, dbsnp_url])
            os.rename(raw_tmpfile, raw_file)

        utils.run_script(r'''
        gzip -cd '{raw_file}' |
        grep -v '^#' |
        perl -F'\t' -nale 'print "$F[0]\t$F[1]\t$F[2]\t$F[3]\t$F[4]"' | # Gotta declare that it's tab-delimited, else it's '\s'-delimited I think.
        gzip > '{clean_tmpfile}'
        '''.format(raw_file=raw_file, clean_tmpfile=clean_tmpfile))
        os.rename(clean_tmpfile, clean_file)

    else:
        print("dbsnp is at '{clean_file}'".format(clean_file=clean_file))
Example #11
0
File: core.py Project: ra2003/chert
        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return
Example #12
0
def main():
    global DR_LOG_FILE
    global FRRP_LOG_FILE
    global GENERAL_LOG_FILE
    global MUX
    global PREFIX
    global EXP_ID
    global producer
    global CONST_WAIT_TIME

    mkdir_p('graph_results')
    mkdir_p('general_logs')
    mkdir_p('dr_logs')
    mkdir_p('frrp_logs')

    if len(sys.argv) < 5:
        print(
            'usage: long_path.py <EXP_ID> <ATLAS_AS_FILE> <PREFIX> <WAIT_TIME> <MUX>'
        )
        sys.exit(1)
    producer = KafkaProducer(
        value_serializer=lambda m: json.dumps(m).encode('ascii'))

    atexit.register(end_collector_call)

    EXP_ID = str(sys.argv[1])
    MUX = str(sys.argv[5])
    PREFIX = str(sys.argv[3])
    CONST_WAIT_TIME = int(sys.argv[4])
    source_as_file = str(sys.argv[2])

    exp_start = arrow.utcnow().isoformat()
    GENERAL_LOG_FILE = "general_logs/general_log_{}_{}_{}.txt".format(
        EXP_ID, MUX, exp_start)
    DR_LOG_FILE = "dr_logs/default_route_log_{}_{}_{}.txt".format(
        EXP_ID, MUX, exp_start)
    FRRP_LOG_FILE = "frrp_logs/frrp_log_{}_{}_{}.txt".format(
        EXP_ID, MUX, exp_start)

    run_measurements(producer, source_as_file, exp_start)
Example #13
0
 def before_container_hook(self, app):
     if is_quast(app):
         fu.mkdir_p(fs.get_task_dir_path(app, 'tmp/assembly_metrics'))
Example #14
0
def make_basedir(path):
    mkdir_p(os.path.dirname(path))
Example #15
0
def _find_and_submit(tasks: List[Task],
                     work_folder: str,
                     concurrent_jobs: int,
                     submit_limit: int,
                     submitter: SyncSubmission):
    submitter.warm_cache(tasks)

    submitted = 0
    # To maintain concurrent_jobs limit, we set a pbs dependency on previous jobs.
    # mapping of concurrent slot number to the last job id to be submitted in it.
    last_job_slots = {}  # type: Dict[int, str]

    for task in tasks:
        if submitted == submit_limit:
            click.echo("Submit limit ({}) reached, done.".format(submit_limit))
            break

        require_job_id = last_job_slots.get(submitted % concurrent_jobs)

        run_path = task.resolve_path(work_folder).joinpath('{:03d}'.format(submitted))
        if run_path.exists():
            raise RuntimeError("Calculated job folder should be unique? Got %r" % (run_path,))

        fileutils.mkdir_p(run_path)

        job_id, command = submitter.submit(
            task=task,
            output_file=(run_path.joinpath('out.log')),
            error_file=run_path.joinpath('err.log'),
            job_name='{}-{:02}'.format(task.collection.name, submitted),
            require_job_id=require_job_id,
        )

        if job_id:
            # Not used by the job, but useful for our reference, and potentially by future monitoring.
            run_path.joinpath('submission-info.yaml').write_text(
                yaml.safe_dump(
                    {
                        'pbs_command': ' '.join(shlex.quote(arg) for arg in command),
                        'pbs_job_id': job_id,
                        'input_paths': [str(p) for p in task.input_paths],
                        'file_dataset_count': task.dataset_count,
                        'collection_name': task.collection.name
                    },
                    default_flow_style=False,
                    indent=4
                )
            )

            last_job_slots[submitted % concurrent_jobs] = job_id
            submitted += 1

            click.echo(
                "{prefix}: submitted {job_id} with {dataset_count} datasets using directory {run_path}".format(
                    prefix=style(
                        "[{:02d} {}]".format(submitted, task.collection.name),
                        fg='blue', bold=True
                    ),
                    job_id=style(job_id, bold=True),
                    dataset_count=style(str(task.dataset_count), bold=True),
                    run_path=style(str(run_path), bold=True)
                )
            )

        time.sleep(SUBMIT_THROTTLE_SECS)
Example #16
0
def main(random_seed, test_on_gt, only_test, overfit, fusion,
         weighted_aggregation):
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)

    n_epochs = 3
    lr = 1e-2
    wd = 0
    lr_scheduler = True

    # graph settings
    h_dim = 128
    x_dim = 128
    c_dim = 90
    phi_dim = 2048
    max_steps = 3

    train_db = JointCocoTasks()
    initializer = InitializerMul(h_dim=h_dim, phi_dim=phi_dim, c_dim=c_dim)
    if weighted_aggregation:
        aggregator = AllLinearAggregatorWeightedWithDetScore(
            in_features=h_dim, out_features=x_dim)
    else:
        aggregator = AllLinearAggregator(in_features=h_dim, out_features=x_dim)
    output_model = OutputModelFirstLast(h_dim=h_dim,
                                        num_tasks=len(TASK_NUMBERS))

    network = GGNNDiscLoss(
        initializer=initializer,
        aggregator=aggregator,
        output_model=output_model,
        max_steps=max_steps,
        h_dim=h_dim,
        x_dim=x_dim,
        class_dim=c_dim,
        fusion=fusion,
    )
    optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd)
    experiment = JointGraphExperiment(
        network=network,
        optimizer=optimizer,
        dataset=train_db,
        tensorboard=True,
        seed=random_seed,
    )

    train_folder = "ggnn-full-seed:{s}".format(s=random_seed)
    folder = os.path.join(SAVING_DIRECTORY, train_folder)
    mkdir_p(folder)

    if not only_test:
        experiment.train_n_epochs(n_epochs,
                                  overfit=overfit,
                                  lr_scheduler=lr_scheduler)

        torch.save(network.state_dict(), os.path.join(folder, "model.mdl"))
    else:
        network.load_state_dict(torch.load(os.path.join(folder, "model.mdl")))

    for task_number in TASK_NUMBERS:
        if test_on_gt:
            test_db = CocoTasksTestGT(task_number)
        else:
            test_db = CocoTasksTest(task_number)

        print("testing task {}".format(task_number), "---------------------")

        # test_model
        detections = experiment.do_test(test_db, task_number=task_number)

        detection_file_name = "detections_wa:{}_tn:{}_tgt:{}_f:{}.json".format(
            weighted_aggregation, task_number, test_on_gt, fusion)

        # save detections
        with open(os.path.join(folder, detection_file_name), "w") as f:
            json.dump(detections, f)

        # perform evaluation
        with redirect_stdout(open(os.devnull, "w")):
            gtCOCO = test_db.task_coco
            dtCOCO = gtCOCO.loadRes(os.path.join(folder, detection_file_name))
            cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox")
            cocoEval.params.catIds = 1
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()

        print("mAP:\t\t %1.6f" % cocoEval.stats[0])
        print("[email protected]:\t\t %1.6f" % cocoEval.stats[1])

        # save evaluation performance
        result_file_name = "result_wa:{}_tn:{}_tgt:{}_f:{}.txt".format(
            weighted_aggregation, task_number, test_on_gt, fusion)

        with open(os.path.join(folder, result_file_name), "w") as f:
            f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
Example #17
0
def make_basedir(path):
    mkdir_p(os.path.dirname(path))
Example #18
0
def main(random_seed, test_on_gt, only_test, overfit):
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)

    n_epochs = 3
    lr = 1e-2
    wd = 0
    lr_scheduler = True

    train_db = JointCocoTasks()

    network = JointClassifier()
    optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd)
    experiment = JointClassifierExperiment(
        network=network,
        optimizer=optimizer,
        dataset=train_db,
        tensorboard=True,
        seed=random_seed,
    )

    train_folder = "ablation-joint-classifier-seed:{s}".format(s=random_seed)
    folder = os.path.join(SAVING_DIRECTORY, train_folder)
    mkdir_p(folder)

    if not only_test:
        experiment.train_n_epochs(n_epochs,
                                  overfit=overfit,
                                  lr_scheduler=lr_scheduler)

        torch.save(network.state_dict(), os.path.join(folder, "model.mdl"))
    else:
        network.load_state_dict(torch.load(os.path.join(folder, "model.mdl")))

    for task_number in TASK_NUMBERS:
        if test_on_gt:
            test_db = CocoTasksTestGT(task_number)
        else:
            test_db = CocoTasksTest(task_number)

        print("testing task {}".format(task_number), "---------------------")

        # test_model
        detections = experiment.do_test(test_db, task_number=task_number)

        detections_file_name = "detections_tn:{}_tgt:{}.json".format(
            task_number, test_on_gt)

        # save detections
        with open(os.path.join(folder, detections_file_name), "w") as f:
            json.dump(detections, f)

        # perform evaluation
        with redirect_stdout(open(os.devnull, "w")):
            gtCOCO = test_db.task_coco
            dtCOCO = gtCOCO.loadRes(os.path.join(folder, detections_file_name))
            cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox")
            cocoEval.params.catIds = 1
            cocoEval.evaluate()
            cocoEval.accumulate()
            cocoEval.summarize()

        print("mAP:\t\t %1.6f" % cocoEval.stats[0])
        print("[email protected]:\t\t %1.6f" % cocoEval.stats[1])

        # save evaluation performance
        result_file_name = "result_tn:{}_tgt:{}.txt".format(
            task_number, test_on_gt)

        with open(os.path.join(folder, result_file_name), "w") as f:
            f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
Example #19
0
def pypier(args, reqs):
    '''
    only supports pure-python repos for now
    '''
    parser = argparse.ArgumentParser(prog='pypier')
    parser.add_argument('cmd',
                        choices=('config', 'publish', 'pip-index') + PIP_CMDS)
    cmd = parser.parse_args(args[1:2]).cmd
    cache = reqs.cache
    executor = reqs.executor
    site_config = reqs.site_config
    pypier_repo = site_config['pypier']['repo']
    pypier_repo_ro = site_config['pypier']['repo_ro']
    if cmd == 'config':
        print
        print 'PyPIER repos:'
        print '  ', pypier_repo_ro, '(fetch)'
        print '  ', pypier_repo, '(publish)'
    elif cmd == 'publish':
        parser.add_argument('--dry-run', action='store_true')
        arg_vals = parser.parse_args(args[1:])
        setup_dir = find_project_dir(os.getcwd(), 'setup.py')
        pypier_read_write = cache.workon_project_git('pypier', pypier_repo)
        executor.python('setup.py', 'sdist').redirect(cwd=setup_dir)
        # TODO manylinux wheels?  OSX wheels?
        version = executor.python('setup.py', version=None).batch()[0].strip()
        output = [
            fn for fn in os.listdir(setup_dir + '/dist/') if version in fn
        ]
        name = output[0].split('-', 1)[0]
        # typical artifact: foo-ver.tar.gz
        dst = pypier_read_write.path + '/packages/' + name + '/'
        fileutils.mkdir_p(dst)
        # TODO: instead of just looking for anything in the dist
        # directory, query setup for the version and check for that.
        for result in output:
            if os.path.exists(os.path.join(dst, result)):
                raise EnvironmentError(
                    "{} has already been published".format(result))
        for result in output:
            shutil.copy(setup_dir + '/dist/' + result, dst)
        with fileutils.atomic_save(os.path.join(dst, 'pkg_info.json')) as f:
            pkg_info = get_pkg_info(executor, setup_dir)
            pkg_info_json = json.dumps(pkg_info, indent=2, sort_keys=True)
            f.write(pkg_info_json + '\n')
        update_index(pypier_read_write.path)
        source_metadata = get_source_metadata(executor, setup_dir)
        commit_msg = 'PyPIER publish: {}\n\n{}\n'.format(
            ', '.join(output),
            json.dumps(source_metadata, indent=2, sort_keys=True))
        pypier_read_write.push(commit_msg, dry_run=arg_vals.dry_run)
    elif cmd == 'pip-index':
        pypier_read_only = cache.pull_project_git('pypier', pypier_repo_ro)
        link_path = pypier_read_only + '/packages/index.html'
        print link_path  # NOTE: this print command is the primary purpose
    elif cmd in PIP_CMDS:
        pypier_read_only = cache.pull_project_git('pypier', pypier_repo_ro)
        link_path = pypier_read_only + '/packages/index.html'
        #env = dict(os.environ)
        #env['PIP_FIND_LINKS'] = ' '.join(
        #    [link_path] + env.get('PIP_FIND_LINKS', '').split())
        # TODO: figure out clean way to extend env
        # TODO: remove ALL_PROXY='' once urllib3 + requests
        #       do a release and don't pre-emptively die
        #       on socks5h:// proxy
        executor.patch_env(PIP_FIND_LINKS=link_path,
                           ALL_PROXY='').command(['python', '-m', 'pip'] +
                                                 args[1:]).redirect(
                                                     stdout=sys.stdout,
                                                     stderr=sys.stderr)
    else:
        # argparse should catch this above
        raise ValueError('unrecognized sub-command %r' % cmd)
Example #20
0
File: core.py Project: ra2003/chert
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')
Example #21
0
    def export(self):
        fal = self.fal
        self._call_custom_hook('pre_export')
        output_path = self.paths['output_path']

        with chlog.critical('create output path'):
            mkdir_p(output_path)

        def export_entry(entry):
            entry_custom_base_path = os.path.split(entry.entry_root)[0]
            if entry_custom_base_path:
                mkdir_p(pjoin(output_path, entry_custom_base_path))
            er = entry.entry_root
            entry_html_fn = er + EXPORT_HTML_EXT
            entry_gen_md_fn = er + '.gen.md'
            entry_data_fn = er + '.json'

            html_output_path = pjoin(output_path, entry_html_fn)
            data_output_path = pjoin(output_path, entry_data_fn)
            gen_md_output_path = pjoin(output_path, entry_gen_md_fn)

            #fal.write(html_output_path, entry.entry_html)
            #
            fal.write(html_output_path, entry.entry_html)
            fal.write(gen_md_output_path, entry.content_md)  # TODO
            _data = json.dumps(entry.loaded_parts, indent=2, sort_keys=True)
            fal.write(data_output_path, _data)

            # TODO: copy file
            # fal.write(src_output_path, entry.source_text)
            return

        for entry in self.entries:
            export_entry(entry)
        for entry in self.draft_entries:
            export_entry(entry)
        for entry in self.special_entries:
            export_entry(entry)

        # index is just the most recent entry for now
        index_path = pjoin(output_path, 'index' + EXPORT_HTML_EXT)
        if self.entries:
            index_content = self.entries[0].entry_html
        else:
            index_content = 'No entries yet!'
        fal.write(index_path, index_content)
        archive_path = pjoin(output_path, ('archive' + EXPORT_HTML_EXT))
        fal.write(archive_path, self.entries.rendered_html)

        # output feeds
        rss_path = pjoin(output_path, RSS_FEED_FILENAME)
        fal.write(rss_path, self.entries.rendered_rss_feed)
        atom_path = pjoin(output_path, ATOM_FEED_FILENAME)
        fal.write(atom_path, self.entries.rendered_atom_feed)

        for tag, entry_list in self.tag_map.items():
            tag_path = pjoin(output_path, entry_list.path_part)
            mkdir_p(tag_path)
            rss_path = pjoin(tag_path, RSS_FEED_FILENAME)
            atom_path = pjoin(tag_path, ATOM_FEED_FILENAME)
            archive_path = pjoin(tag_path, 'index.html')
            fal.write(rss_path, entry_list.rendered_rss_feed)
            fal.write(atom_path, entry_list.rendered_atom_feed)
            fal.write(archive_path, entry_list.rendered_html)

        # copy assets, i.e., all directories under the theme path
        for sdn in get_subdirectories(self.theme_path):
            cur_src = pjoin(self.theme_path, sdn)
            cur_dest = pjoin(output_path, sdn)
            with chlog.critical('copy assets', src=cur_src, dest=cur_dest):
                copytree(cur_src, cur_dest)

        # optionally symlink the uploads directory.  this is an
        # important step for sites with uploads because Chert's
        # default rsync behavior picks up on these uploads by
        # following the symlink.
        with chlog.critical('link uploads directory') as rec:
            uploads_link_path = pjoin(output_path, 'uploads')
            if not os.path.isdir(self.uploads_path):
                rec.failure('no uploads directory at {}', self.uploads_path)
            else:
                message = None
                if os.path.islink(uploads_link_path):
                    os.unlink(uploads_link_path)
                    message = 'refreshed existing uploads symlink'
                os.symlink(self.uploads_path, uploads_link_path)
                rec.success(message)

        self._call_custom_hook('post_export')
    def download(self, path=LOCAL_DATA):
        # mostly taken from https://github.com/streamlit/demo-face-gan/
        #   blob/master/streamlit_app.py
        root = Path(path).resolve()
        path = root / self.filename

        # Don't download the file twice. (If possible, verify the
        # download using the file length.)
        if os.path.exists(path):
            if not self.size or os.path.getsize(path) == self.size:
                return path

        mkdir_p(path.parent)

        # These are handles to two visual elements to animate.
        status, progress_bar = None, None
        try:
            status = st.warning("Downloading %s..." % path)

            # handle cases where files hosted on gdrive sometimes fail
            # to download
            if "google.com" in self.url:
                _ = gdown.cached_download(self.url, path=path)
            else:
                progress_bar = st.progress(0)
                # with open(path, "wb") as output_file:
                with urllib.request.urlopen(
                        self.url, cafile=certifi.where()) as response:
                    if response.info()["Content-Length"] is not None:
                        with open(path, "wb") as output_file:
                            length = int(response.info()["Content-Length"])
                            counter = 0.0
                            MEGABYTES = 2.0**20.0
                            while True:
                                data = response.read(8192)
                                if not data:
                                    break
                                counter += len(data)
                                output_file.write(data)

                                # We perform animation by overwriting the elements.
                                status.warning(
                                    "Downloading %s... (%6.2f/%6.2f MB)" %
                                    (path, counter / MEGABYTES,
                                     length / MEGABYTES))
                                progress_bar.progress(
                                    min(counter / length, 1.0))

        except urllib.error.URLError as e:
            logger.exception(f"Invalid URL: {self.url}", exc_info=e)
        # Finally, we remove these visual elements by calling .empty().
        finally:
            if status is not None:
                status.empty()
            if progress_bar is not None:
                progress_bar.empty()

        if not path.exists():
            raise FileNotFoundError(str(path))

        elif os.path.getsize(path) == 0:
            os.remove(path)
            raise ValueError(f"Invalid URL: {self.url}")

        return path
Example #23
0
def _ensure_conf():

    if hasattr(conf, 'data_dir'):
        conf.data_dir = os.path.abspath(conf.data_dir)
    else:
        conf.set_default_value('data_dir', os.path.abspath(os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir))

    ## Get `conf.cache` working because it's needed for reporting errors
    def _configure_cache():
        conf.set_default_value('cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache')))
        if conf.cache is False:
            return
        if conf.has_own_property('cache'):
            conf.cache = os.path.abspath(os.path.join(conf.data_dir, os.path.expanduser(conf.cache)))
        if not os.path.isdir(conf.cache):
            try:
                mkdir_p(conf.cache)
            except PermissionError:
                print("Warning: caching is disabled because the directory {!r} can't be created.\n".format(conf.cache) +
                      "If you don't want caching, set `cache = False` in your config.py.")
                conf.cache = False
                return
        if not os.access(conf.cache, os.R_OK):
            print('Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'.format(conf.cache) +
                  "If you don't want caching, set `cache = False` in your config.py.")
            conf.cache = False
    _configure_cache()

    def _load_config_file():
        _config_filepath = os.path.join(conf.data_dir, 'config.py')
        if os.path.isfile(_config_filepath):
            try:
                _conf_module = imp.load_source('config', _config_filepath)
            except Exception:
                raise utils.PheWebError("PheWeb tried to load your config.py at {!r} but it failed.".format(_config_filepath))
            else:
                for key in dir(_conf_module):
                    if not key.startswith('_'):
                        conf[key] = getattr(_conf_module, key)
    _load_config_file()

    conf.set_default_value('lzjs_version', '0.9.0')  # Global setting, rarely needs configuration: which version of LZjs to fetch from the CDN
    conf.set_default_value('custom_templates', lambda: os.path.join(conf.data_dir, 'custom_templates'), is_function=True)
    conf.set_default_value('debug', False)
    conf.set_default_value('limit_num_variants', False)
    conf.set_default_value('assoc_min_maf', 0)
    conf.set_default_value('variant_inclusion_maf', 0)
    conf.set_default_value('within_pheno_mask_around_peak', int(500e3))
    conf.set_default_value('between_pheno_mask_around_peak', int(1e6))
    conf.set_default_value('manhattan_num_unbinned', 500)
    conf.set_default_value('manhattan_peak_max_count', 500)
    conf.set_default_value('manhattan_peak_pval_threshold', 1e-6)
    conf.set_default_value('manhattan_peak_sprawl_dist', int(200e3))
    conf.set_default_value('top_hits_pval_cutoff', 1e-6)

    conf.set_default_value('allow_variant_json_cors', False)
    conf.set_default_value('urlprefix', '')

    if 'minimum_maf' in conf:
        raise utils.PheWebError("minimum_maf has been deprecated.  Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead")

    if conf.get('login', {}).get('whitelist', None):
        conf.login['whitelist'] = [addr.lower() for addr in conf.login['whitelist']]

    if not os.path.isdir(conf.data_dir):
        mkdir_p(conf.data_dir)
    if not os.access(conf.data_dir, os.R_OK):
        raise utils.PheWebError("Your data directory, {!r}, is not readable.".format(conf.data_dir))

    ### Parsing

    def scientific_int(value):
        '''like int(value) but accepts "1.3e-4"'''
        try:
            return int(value)
        except ValueError:
            x = float(value)
            if x.is_integer():
                return int(x)
            raise

    class Field:
        def __init__(self, d):
            self._d = d
        def parse(self, value):
            '''parse from input file'''
            # nullable
            if self._d['nullable'] and value in conf.parse.null_values:
                return ''
            # type
            x = self._d['type'](value)
            # range
            if 'range' in self._d:
                assert self._d['range'][0] is None or x >= self._d['range'][0]
                assert self._d['range'][1] is None or x <= self._d['range'][1]
            if 'sigfigs' in self._d:
                x = utils.round_sig(x, self._d['sigfigs'])
            if 'proportion_sigfigs' in self._d:
                if 0 <= x < 0.5:
                    x = utils.round_sig(x, self._d['proportion_sigfigs'])
                elif 0.5 <= x <= 1:
                    x = 1 - utils.round_sig(1-x, self._d['proportion_sigfigs'])
                else:
                    raise utils.PheWebError('cannot use proportion_sigfigs on a number outside [0-1]')
            if 'decimals' in self._d:
                x = round(x, self._d['decimals'])
            return x
        def read(self, value):
            '''read from internal file'''
            if self._d['nullable'] and value == '':
                return ''
            x = self._d['type'](value)
            return x

    default_null_values = ['', '.', 'NA', 'N/A', 'n/a', 'nan', '-nan', 'NaN', '-NaN', 'null', 'NULL']

    default_field = {
        'aliases': [],
        'required': False,
        'type': str,
        'nullable': False,
        'from_assoc_files': True, # if this is False, then the field will not be parsed from input files, because annotation will add it.
    }

    default_per_variant_fields = OrderedDict([
        ('chrom', {
            'aliases': ['#CHROM', 'chr'],
            'required': True,
            'tooltip_underscoretemplate': '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>',
            'tooltip_lztemplate': False,
        }),
        ('pos', {
            'aliases': ['BEG', 'BEGIN', 'BP'],
            'required': True,
            'type': scientific_int,
            'range': [0, None],
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('ref', {
            'aliases': ['reference', 'allele0'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('alt', {
            'aliases': ['alternate', 'allele1'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('rsids', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate': '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>',
            'tooltip_lztemplate': {'condition': 'rsid', 'template': '<strong>{{rsid}}</strong><br>'},
        }),
        ('nearest_genes', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate': 'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>',
            'tooltip_lztemplate': False,
        }),
    ])

    default_per_assoc_fields = OrderedDict([
        ('pval', {
            'aliases': ['PVALUE'],
            'required': True,
            'type': float,
            'nullable': True,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_lztemplate': {
                'condition': False,
                'template': ('{{#if pvalue}}P-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n' +
                             '{{#if pval}}P-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}'),
            },
            'display': 'P-value',
        }),
        ('beta', {
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate': 'Beta: <%= d.beta %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta %>)<% } %><br>',
            'tooltip_lztemplate': 'Beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>',
            'display': 'Beta',
        }),
        ('sebeta', {
            'aliases': ['se'],
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('or', {
            'type': float,
            'nullable': True,
            'range': [0, None],
            'sigfigs': 2,
            'display': 'Odds Ratio',
        }),
        ('maf', {
            'type': float,
            'range': [0, 0.5],
            'sigfigs': 2,
            'tooltip_lztemplate': {'transform': '|percent'},
            'display': 'MAF',
        }),
        ('af', {
            'aliases': ['A1FREQ'],
            'type': float,
            'range': [0, 1],
            'proportion_sigfigs': 2,
            'tooltip_lztemplate': {'transform': '|percent'},
            'display': 'AF',
        }),
        ('ac', {
            'type': float,
            'range': [0, None],
            'decimals': 1,
            'display': 'AC',
        }),
        ('r2', {
            'type': float,
            'proportion_sigfigs': 2,
            'nullable': True,
            'display': 'R2',
        }),
        ('tstat', {
            'type': float,
            'sigfigs': 2,
            'nullable': True,
            'display': 'Tstat',
        }),
    ])

    default_per_pheno_fields = OrderedDict([
        ('num_cases', {
            'aliases': ['NS.CASE', 'N_cases'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#cases',
        }),
        ('num_controls', {
            'aliases': ['NS.CTRL', 'N_controls'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#controls',
        }),
        ('num_samples', {
            'aliases': ['NS', 'N'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#samples',
        }),
        # TODO: phenocode, phenostring, category, &c?
        # TODO: include `assoc_files` with {never_send: True}?
    ])

    conf.parse.null_values = deepcopy(default_null_values)
    conf.parse.per_variant_fields = deepcopy(default_per_variant_fields)
    conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields)
    conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields)
    conf.parse.fields = OrderedDict(itertools.chain(conf.parse.per_variant_fields.items(),
                                                    conf.parse.per_assoc_fields.items(),
                                                    conf.parse.per_pheno_fields.items()))
    assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len(conf.parse.per_assoc_fields) + len(conf.parse.per_pheno_fields) # no overlaps!

    if 'aliases' in conf:
        for alias, field in conf.aliases.items():
            conf.parse.fields[field].setdefault('aliases', []).append(alias)

    if 'null_values' in conf:
        conf.parse.null_values.extend(conf.null_values)

    # make all aliases lowercase and add parsers
    for field_name, field_dict in conf.parse.fields.items():
        for k,v in default_field.items():
            field_dict.setdefault(k, v)
        field_dict['aliases'] = list(set([field_name.lower()] + [alias.lower() for alias in field_dict['aliases']]))
        field_dict['_parse'] = Field(field_dict).parse
        field_dict['_read']  = Field(field_dict).read

    _repeated_aliases = [alias for alias,count in Counter(itertools.chain.from_iterable(f['aliases'] for f in conf.parse.fields.values())).most_common() if count > 1]
    if _repeated_aliases:
        raise utils.PheWebError('The following aliases appear for multiple fields: {}'.format(_repeated_aliases))


    def get_tooltip_underscoretemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            if 'tooltip_underscoretemplate' in field:
                if field['tooltip_underscoretemplate'] is False:
                    continue
                else:
                    template += '<% if(_.has(d, ' + repr(fieldname) + ')) { %>' + field['tooltip_underscoretemplate'] + '<% } %>\n'
            else:
                template += '<% if(_.has(d, ' + repr(fieldname) + ')) { %>' + field.get('display', fieldname) + ': <%= d[' + repr(fieldname) + '] %><br><% } %>\n'
        return template
    conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate()

    def get_tooltip_lztemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            lzt = field.get('tooltip_lztemplate', {})
            if lzt is False:
                continue
            if isinstance(lzt, str):
                lzt = {'template': lzt}
            if 'template' not in lzt:
                lzt['template'] = field.get('display', fieldname) + ': <strong>{{' + fieldname + lzt.get('transform','') + '}}</strong><br>'
            if 'condition' not in lzt:
                lzt['condition'] = fieldname

            if not lzt['condition']:
                template += lzt['template'] + '\n'
            else:
                template += '{{#if ' + lzt['condition'] + '}}' + lzt['template'] + '{{/if}}\n'
        return template
    conf.parse.tooltip_lztemplate = get_tooltip_lztemplate()
Example #24
0
def save_traffic_stats(lang, project, query_date, limit=DEFAULT_LIMIT):
    '''\
    1. Get articles
    2. Add images and summaries
    3. Prepare and save results
    '''
    articles = make_article_list(query_date, lang=lang, project=project)
    total_traffic = get_project_traffic(query_date, lang, project)
    articles = articles[:limit]
    articles = add_extras(articles, lang=lang, project=project)
    ret = {
        'articles':
        articles,
        'formatted_date':
        format_date(query_date, format='d MMMM yyyy', locale=lang),
        'date': {
            'day': query_date.day,
            'month': query_date.month,
            'year': query_date.year
        },
        'lang':
        lang,
        'full_lang':
        LOCAL_LANG_MAP[lang],
        'total_traffic':
        total_traffic,
        'total_traffic_short':
        shorten_number(total_traffic),
        'examples':
        [articles[0], articles[1], articles[2],
         articles[query_date.day * 2]],  # haha ok..
        'project':
        project.capitalize(),
        'permalink':
        DATE_PERMALINK_TMPL.format(lang=lang,
                                   project=project,
                                   year=query_date.year,
                                   month=query_date.month,
                                   day=query_date.day),
        'meta': {
            'fetched': datetime.utcnow().isoformat()
        }
    }
    outfile_name = DATA_PATH_TMPL.format(lang=lang,
                                         project=project,
                                         year=query_date.year,
                                         month=query_date.month,
                                         day=query_date.day)

    with tlog.critical('saving_single_day_stats') as rec:
        rec['out_file'] = os.path.abspath(outfile_name)
        try:
            out_file = codecs.open(outfile_name, 'w')
        except IOError:
            mkdir_p(os.path.dirname(outfile_name))
            out_file = codecs.open(outfile_name, 'w')
        with out_file:
            data_bytes = json.dumps(ret, indent=2, sort_keys=True)
            rec['len_bytes'] = len(data_bytes)
            out_file.write(data_bytes)

        rec.success('wrote {len_bytes} bytes to {out_file}')

    return
Example #25
0
def run(argv):
    # I need these genenames to be unique. So, if a SYMBOL is not unique, I use the ENSG instead.

    gene_dir = os.path.join(conf.data_dir, 'sites', 'genes')
    gencode_file = os.path.join(gene_dir, 'gencode.gtf.gz')
    bed_file = utils.get_cacheable_file_location(gene_dir, 'genes.bed')

    if not os.path.exists(bed_file):
        print('genes.bed will be stored at {bed_file!r}'.format(
            bed_file=bed_file))
        mkdir_p(gene_dir)
        if not os.path.exists(gencode_file):
            wget = utils.get_path('wget')
            # Link from <http://www.gencodegenes.org/releases/19.html>
            utils.run_cmd([
                wget, '-O', gencode_file,
                "ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz"
            ])

        good_genetypes = set('''
        protein_coding
        IG_C_gene
        IG_D_gene
        IG_J_gene
        IG_V_gene
        TR_C_gene
        TR_D_gene
        TR_J_gene
        TR_V_gene
        '''.split())

        genes = []
        with gzip.open(gencode_file, 'rt') as f:

            for l in f:
                if l.startswith('#'): continue
                r = l.split('\t')
                if r[2] != 'gene': continue

                # Remove pseudogenes and other unwanted types of genes.
                genetype = re.search(r'gene_type "(.+?)"', r[8]).group(1)
                if genetype not in good_genetypes: continue

                assert r[0].startswith('chr')
                chrom = r[0][3:]
                pos1, pos2 = int(r[3]), int(r[4])
                assert pos1 < pos2
                symbol = re.search(r'gene_name "(.+?)"', r[8]).group(1)
                ensg = re.search(r'gene_id "(ENSG[R0-9]+?)(?:\.[0-9]+)?"',
                                 r[8]).group(1)

                genes.append({
                    'chrom': chrom,
                    'start': pos1,
                    'end': pos2,
                    'symbol': symbol,
                    'ensg': ensg,
                })

        symbol_counts = collections.Counter(g['symbol'] for g in genes)
        for g in genes:
            if symbol_counts[g['symbol']] > 1:
                g['symbol'] = g['ensg']
        assert len(set(g['symbol'] for g in genes)) == len(genes)

        with open(bed_file, 'w') as f:
            writer = csv.DictWriter(
                f,
                delimiter='\t',
                fieldnames='chrom start end symbol ensg'.split(),
                lineterminator='\n')
            writer.writerows(genes)

    else:
        print("gencode is at {bed_file!r}".format(bed_file=bed_file))
Example #26
0
            f_out.write('{chrom}\t{1}\t{2}\t{3}\n'.format(*next_cpra, chrom=utils.chrom_order_list[next_cpra[0]]))

            for phenocode in next_cpras.pop(next_cpra):
                try:
                    next_cpra = next(readers[phenocode])
                except StopIteration:
                    del readers[phenocode]
                else:
                    next_cpras.setdefault(next_cpra, []).append(phenocode)

        assert not readers, list(readers.items())

    print('{:8} variants in {} <- {}'.format(n_variants, os.path.basename(out_filename), [os.path.basename(path) for path in input_filenames]))


mkdir_p(conf.data_dir + '/sites')
mkdir_p(conf.data_dir + '/tmp')

def merge_files_in_queue(lock, manna_dict):
    # Keep a work queue of files that need to get merged.
    # Each process takes files off the queue, merges them, and pushes the result back onto the queue.
    # But if there are fewer than MIN_NUM_FILES_TO_MERGE_AT_ONCE on the work queue, and there are files currently being merged (ie, the process is not alone),
    #    then the process just exits rather than merge a small number of files.

    while True:
        with lock:
            if len(manna_dict['files_to_merge']) <= 1: # no work to do.
                return
            elif len(manna_dict['files_to_merge']) >= MIN_NUM_FILES_TO_MERGE_AT_ONCE or len(manna_dict['files_being_merged']) == 0:
                # If there's a good amount of work to do (or if we're the only process left to do the work), we merge some files.
                files_to_merge_now = manna_dict['files_to_merge'][-NUM_FILES_TO_MERGE_AT_ONCE:]
Example #27
0
def copy_to_file(src_file, dst_file, app):
    file_ = os.path.join(app['path'], dst_file)
    dir_ = os.path.dirname(file_)
    fu.mkdir_p(dir_)
    shutil.copy(src_file, file_)
Example #28
0
def make_basedir(path: Union[str, Path]) -> None:
    mkdir_p(os.path.dirname(path))
Example #29
0
def _ensure_conf():

    if hasattr(conf, 'data_dir'):
        conf.data_dir = os.path.abspath(conf.data_dir)
    else:
        conf.set_default_value(
            'data_dir',
            os.path.abspath(
                os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir))

    ## Get `conf.cache` working because it's needed for reporting errors
    def _configure_cache():
        conf.set_default_value(
            'cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache')))
        if conf.cache is False:
            return
        if conf.has_own_property('cache'):
            conf.cache = os.path.abspath(
                os.path.join(conf.data_dir, os.path.expanduser(conf.cache)))
        if not os.path.isdir(conf.cache):
            try:
                mkdir_p(conf.cache)
            except PermissionError:
                print(
                    "Warning: caching is disabled because the directory {!r} can't be created.\n"
                    .format(conf.cache) +
                    "If you don't want caching, set `cache = False` in your config.py."
                )
                conf.cache = False
                return
        if not os.access(conf.cache, os.R_OK):
            print(
                'Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'
                .format(conf.cache) +
                "If you don't want caching, set `cache = False` in your config.py."
            )
            conf.cache = False

    _configure_cache()

    def _load_config_file():
        _config_filepath = os.path.join(conf.data_dir, 'config.py')
        if os.path.isfile(_config_filepath):
            try:
                _conf_module = imp.load_source('config', _config_filepath)
            except Exception:
                raise utils.PheWebError(
                    "PheWeb tried to load your config.py at {!r} but it failed."
                    .format(_config_filepath))
            else:
                for key in dir(_conf_module):
                    if not key.startswith('_'):
                        conf[key] = getattr(_conf_module, key)

    _load_config_file()

    conf.set_default_value(
        'lzjs_version', '0.9.0'
    )  # Global setting, rarely needs configuration: which version of LZjs to fetch from the CDN
    conf.set_default_value(
        'custom_templates',
        lambda: os.path.join(conf.data_dir, 'custom_templates'),
        is_function=True)
    conf.set_default_value('debug', False)
    conf.set_default_value('limit_num_variants', False)
    conf.set_default_value('assoc_min_maf', 0)
    conf.set_default_value('variant_inclusion_maf', 0)
    conf.set_default_value('within_pheno_mask_around_peak', int(500e3))
    conf.set_default_value('between_pheno_mask_around_peak', int(1e6))
    conf.set_default_value('manhattan_num_unbinned', 500)
    conf.set_default_value('manhattan_peak_max_count', 500)
    conf.set_default_value('manhattan_peak_pval_threshold', 1e-6)
    conf.set_default_value('manhattan_peak_sprawl_dist', int(200e3))
    conf.set_default_value('top_hits_pval_cutoff', 1e-6)

    # Whether to show a table of correlated phenotypes in the app
    conf.set_default_value('show_correlations', False)
    conf.set_default_value('pheno_correlations_pvalue_threshold', 0.05)

    conf.set_default_value('allow_variant_json_cors', False)
    conf.set_default_value('urlprefix', '')

    if 'minimum_maf' in conf:
        raise utils.PheWebError(
            "minimum_maf has been deprecated.  Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead"
        )

    if conf.get('login', {}).get('whitelist', None):
        conf.login['whitelist'] = [
            addr.lower() for addr in conf.login['whitelist']
        ]

    if not os.path.isdir(conf.data_dir):
        mkdir_p(conf.data_dir)
    if not os.access(conf.data_dir, os.R_OK):
        raise utils.PheWebError(
            "Your data directory, {!r}, is not readable.".format(
                conf.data_dir))

    ### Parsing

    def scientific_int(value):
        '''like int(value) but accepts "1.3e-4"'''
        try:
            return int(value)
        except ValueError:
            x = float(value)
            if x.is_integer():
                return int(x)
            raise

    class Field:
        def __init__(self, d):
            self._d = d

        def parse(self, value):
            '''parse from input file'''
            # nullable
            if self._d['nullable'] and value in conf.parse.null_values:
                return ''
            # type
            x = self._d['type'](value)
            # range
            if 'range' in self._d:
                assert self._d['range'][0] is None or x >= self._d['range'][0]
                assert self._d['range'][1] is None or x <= self._d['range'][1]
            if 'sigfigs' in self._d:
                x = utils.round_sig(x, self._d['sigfigs'])
            if 'proportion_sigfigs' in self._d:
                if 0 <= x < 0.5:
                    x = utils.round_sig(x, self._d['proportion_sigfigs'])
                elif 0.5 <= x <= 1:
                    x = 1 - utils.round_sig(1 - x,
                                            self._d['proportion_sigfigs'])
                else:
                    raise utils.PheWebError(
                        'cannot use proportion_sigfigs on a number outside [0-1]'
                    )
            if 'decimals' in self._d:
                x = round(x, self._d['decimals'])
            return x

        def read(self, value):
            '''read from internal file'''
            if self._d['nullable'] and value == '':
                return ''
            x = self._d['type'](value)
            return x

    default_null_values = [
        '', '.', 'NA', 'N/A', 'n/a', 'nan', '-nan', 'NaN', '-NaN', 'null',
        'NULL'
    ]

    default_field = {
        'aliases': [],
        'required': False,
        'type': str,
        'nullable': False,
        'from_assoc_files':
        True,  # if this is False, then the field will not be parsed from input files, because annotation will add it.
    }

    default_per_variant_fields = OrderedDict([
        ('chrom', {
            'aliases': ['#CHROM', 'chr'],
            'required': True,
            'tooltip_underscoretemplate':
            '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>',
            'tooltip_lztemplate': False,
        }),
        ('pos', {
            'aliases': ['BEG', 'BEGIN', 'BP'],
            'required': True,
            'type': scientific_int,
            'range': [0, None],
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('ref', {
            'aliases': ['reference', 'allele0', 'A1'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('alt', {
            'aliases': ['alternate', 'allele1', 'A2'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('rsids', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate':
            '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>',
            'tooltip_lztemplate': {
                'condition': 'rsid',
                'template': '<strong>{{rsid}}</strong><br>'
            },
        }),
        ('nearest_genes', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate':
            'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>',
            'tooltip_lztemplate': False,
        }),
    ])

    default_per_assoc_fields = OrderedDict([
        ('pval', {
            'aliases': ['PVALUE', 'P'],
            'required': True,
            'type': float,
            'nullable': True,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_lztemplate': {
                'condition':
                False,
                'template':
                ('{{#if pvalue}}P-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n'
                 +
                 '{{#if pval}}P-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}'
                 ),
            },
            'display': 'P-value',
        }),
        ('beta', {
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'Beta: <%= d.beta %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta %>)<% } %><br>',
            'tooltip_lztemplate':
            'Beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>',
            'display': 'Beta',
        }),
        ('sebeta', {
            'aliases': ['se'],
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('or', {
            'type': float,
            'nullable': True,
            'range': [0, None],
            'sigfigs': 2,
            'display': 'Odds Ratio',
        }),
        ('maf', {
            'type': float,
            'range': [0, 0.5],
            'sigfigs': 2,
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF',
        }),
        ('af', {
            'aliases': ['A1FREQ', 'FRQ'],
            'type': float,
            'range': [0, 1],
            'proportion_sigfigs': 2,
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'AF',
        }),
        ('ac', {
            'type': float,
            'range': [0, None],
            'decimals': 1,
            'display': 'AC',
        }),
        ('r2', {
            'type': float,
            'proportion_sigfigs': 2,
            'nullable': True,
            'display': 'R2',
        }),
        ('tstat', {
            'type': float,
            'sigfigs': 2,
            'nullable': True,
            'display': 'Tstat',
        }),
    ])

    default_per_pheno_fields = OrderedDict([
        ('num_cases', {
            'aliases': ['NS.CASE', 'N_cases'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#cases',
        }),
        ('num_controls', {
            'aliases': ['NS.CTRL', 'N_controls'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#controls',
        }),
        ('num_samples', {
            'aliases': ['NS', 'N'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#samples',
        }),
        # TODO: phenocode, phenostring, category, &c?
        # TODO: include `assoc_files` with {never_send: True}?
    ])

    conf.parse.null_values = deepcopy(default_null_values)
    conf.parse.per_variant_fields = deepcopy(default_per_variant_fields)
    conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields)
    conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields)
    conf.parse.fields = OrderedDict(
        itertools.chain(conf.parse.per_variant_fields.items(),
                        conf.parse.per_assoc_fields.items(),
                        conf.parse.per_pheno_fields.items()))
    assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len(
        conf.parse.per_assoc_fields) + len(
            conf.parse.per_pheno_fields)  # no overlaps!

    if 'aliases' in conf:
        for alias, field in conf.aliases.items():
            conf.parse.fields[field].setdefault('aliases', []).append(alias)

    if 'null_values' in conf:
        conf.parse.null_values.extend(conf.null_values)

    # make all aliases lowercase and add parsers
    for field_name, field_dict in conf.parse.fields.items():
        for k, v in default_field.items():
            field_dict.setdefault(k, v)
        field_dict['aliases'] = list(
            set([field_name.lower()] +
                [alias.lower() for alias in field_dict['aliases']]))
        field_dict['_parse'] = Field(field_dict).parse
        field_dict['_read'] = Field(field_dict).read

    _repeated_aliases = [
        alias for alias, count in Counter(
            itertools.chain.from_iterable(
                f['aliases']
                for f in conf.parse.fields.values())).most_common()
        if count > 1
    ]
    if _repeated_aliases:
        raise utils.PheWebError(
            'The following aliases appear for multiple fields: {}'.format(
                _repeated_aliases))

    def get_tooltip_underscoretemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            if 'tooltip_underscoretemplate' in field:
                if field['tooltip_underscoretemplate'] is False:
                    continue
                else:
                    template += '<% if(_.has(d, ' + repr(
                        fieldname) + ')) { %>' + field[
                            'tooltip_underscoretemplate'] + '<% } %>\n'
            else:
                template += '<% if(_.has(d, ' + repr(
                    fieldname) + ')) { %>' + field.get(
                        'display', fieldname) + ': <%= d[' + repr(
                            fieldname) + '] %><br><% } %>\n'
        return template

    conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate()

    def get_tooltip_lztemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            lzt = field.get('tooltip_lztemplate', {})
            if lzt is False:
                continue
            if isinstance(lzt, str):
                lzt = {'template': lzt}
            if 'template' not in lzt:
                lzt['template'] = field.get(
                    'display',
                    fieldname) + ': <strong>{{' + fieldname + lzt.get(
                        'transform', '') + '}}</strong><br>'
            if 'condition' not in lzt:
                lzt['condition'] = fieldname

            if not lzt['condition']:
                template += lzt['template'] + '\n'
            else:
                template += '{{#if ' + lzt['condition'] + '}}' + lzt[
                    'template'] + '{{/if}}\n'
        return template

    conf.parse.tooltip_lztemplate = get_tooltip_lztemplate()
Example #30
0
def create_benchmark_file(app, path, contents = ''):
    loc = os.path.join(app["path"] + path)
    fu.mkdir_p(os.path.dirname(loc))
    with open(loc, 'w+') as f:
        f.write(contents)
    return loc
Example #31
0
def run_measurements(producer, source_as_file, exp_start, utc):
    global DR_LOG_FILE
    global GENERAL_LOG_FILE
    global FRRP_LOG_FILE
    global EXP_ID
    global PREFIX
    global CONST_WAIT_TIME
    global ROUTER_CLIENTS

    dir_name = "results/{}_{}".format(EXP_ID, utc.timestamp)
    mkdir_p(dir_name)

    with open(DR_LOG_FILE, 'w') as f:
        f.write('# Entry formats\n')
        f.write('# Error:       -1|src,asn,msg,err_msg\n')
        f.write('# DR Entry:     0|asn,degree,has_default_route\n')

    with open(FRRP_LOG_FILE, 'w') as f:
        f.write('# Entry formats\n')
        f.write('# Error:              -1|src,asn,msg,err_msg\n')
        f.write(
            '# Final Graph:         1|src,asn,graph_path,measured_graph_file\n'
        )
        f.write(
            '# FRRP Result:         0|src,asn,atlas,m_id,oisons,poison_set,path,as-path,rtts,as-rtts\n'
        )
        f.write(
            '# Lost Connectivity:   2|src,asn,atlas,m_id,poisons,poison_set,path,as-path,rtts,as-rtts\n'
        )
        f.write(
            "# Invalid Result      -2|src,asn,msg,first_path_not_connected\n")

    source_ases = parse_source_as_file(source_as_file)

    prefix_no_subnet = str(PREFIX.split('/')[0])
    start_collector(producer, PREFIX, '{}_{}'.format(EXP_ID, prefix_no_subnet))

    for source_asn, source_asn_degree in source_ases:
        try:
            with open(
                    os.path.join(dir_name, 'run_file_{}.p'.format(source_asn)),
                    'wb') as run_filename:
                frrp_run = FRRPRun(source_asn)

                log_message("{}Starting Experiment for AS{}{}".format(
                    '-' * 10, source_asn, '-' * 10))

                all_probes = find_probes_by_asn(source_asn)
                if not all_probes or len(all_probes) == 0:
                    log_message("AS{} does not have a stable probe!".format(
                        source_asn))
                    write_frrp_entry(
                        "-1|src,{},msg,no_stable_probe".format(source_asn))
                    write_default_route_entry(
                        "-1|src,{},msg,no_stable_probe".format(source_asn))
                    frrp_run.add_error("No stable probe")
                    frrp_run.no_stable_probe = True
                    pickle.dump(frrp_run, run_filename)
                    continue

                src_graph = nx.DiGraph()
                source_as = AS(source_asn)
                source_as.set_degree(source_asn_degree)

                src_graph.add_node(source_as)

                # Make normal announcement the very first time before doing a new ATLAS source
                if PORT2 is not None:
                    ports = [PORT, PORT2]
                else:
                    ports = [PORT]

                log_message(
                    'Making announcement to {} and waiting {} seconds for AS SEQ: {}'
                    .format(PREFIX, CONST_WAIT_TIME, None))
                make_announcement(ports)

                time.sleep(CONST_WAIT_TIME)

                poison_cache = PoisonCache()
                err = measure_single_source(frrp_run,
                                            src_graph,
                                            source_as,
                                            poison_cache=poison_cache,
                                            first=True)
                source_as.add_poison_cache(poison_cache)
                frrp_run.poison_cache = poison_cache

                if err == -1:
                    log_message(
                        "ATLAS AS {} returns error when trying to traceroute...moving on."
                        .format(source_asn))
                    with open('misnomer_atlas_ases.txt', 'a') as f:
                        f.write('{}\n'.format(source_asn))
                    write_frrp_entry(
                        "-1|src,{},msg,atlas_source_traceroute_error".format(
                            source_asn))
                    write_default_route_entry(
                        "-1|src,{},msg,atlas_source_traceroute_error".format(
                            source_asn))
                    frrp_run.add_error("source_traceroute_error")
                    pickle.dump(frrp_run, run_filename)
                    continue
                elif err == -2:
                    log_message("ATLAS AS {} returns empty response.".format(
                        source_asn))
                    with open('rerun_atlas_ases.txt', 'a') as f:
                        f.write('{}\n'.format(source_asn))
                    write_frrp_entry(
                        "-1|src,{},msg,atlas_source_empty_response".format(
                            source_asn))
                    write_default_route_entry(
                        "-1|src,{},msg,atlas_source_empty_response".format(
                            source_asn))
                    frrp_run.add_error("empty_source_response")
                    pickle.dump(frrp_run, run_filename)
                    continue

                has_default_route = src_graph.out_degree(source_as) <= 1
                log_message("ATLAS AS {} has a default route: {}".format(
                    source_asn, has_default_route))
                write_default_route_entry('0|{},{},{}'.format(
                    source_asn, source_as.degree, has_default_route))
                frrp_run.has_default_route = has_default_route

                mkdir_p('{}/graph_results'.format(dir_name))
                draw_graph(src_graph, '{}/graph_results'.format(dir_name),
                           'atlas-as-{}-measured-graph'.format(source_asn))

                gpickle_name = '{}/graph_results/atlas-as-{}-{}-{}-measured-graph.gpickle'.format(
                    dir_name, source_asn, exp_start, EXP_ID)
                nx.write_gpickle(src_graph, gpickle_name)
                frrp_run.graph_path = gpickle_name
                write_frrp_entry('1|src,{},graph_path,{}'.format(
                    source_asn, gpickle_name))

                pickle.dump(frrp_run, run_filename)
        except Exception as e:
            log_message("Receieved exception: {}".format(e), print_stack=True)
Example #32
0
def ensure_path(path):
    path = os.path.expanduser(path)
    if not os.path.exists(path):
        with log.sky_log.debug('ensure_path', path=path):
            fileutils.mkdir_p(path)
    return path
Example #33
0
def test_dir():
    path = os.path.join(os.path.dirname(os.path.realpath(__file__)), '..', '..', 'tmp', 'tests')
    fu.mkdir_p(path)
    return tempfile.mkdtemp(dir = path)
Example #34
0
def extract_zip(zip_file, dest_dir, members=None):
    if not zipfile.is_zipfile(zip_file): return
    mkdir_p(dest_dir)
    with zipfile.ZipFile(zip_file, 'r') as zf:
        zf.extractall(dest_dir, members)
Example #35
0
 def _writer(self, file, relate_dir, out_dir):
     out_dir = join_path(out_dir, relate_dir)
     mkdir_p(out_dir)
     out_file = join_path(out_dir, os.path.basename(file))
     return Line_Writer(out_file)
Example #36
0
def main(random_seed, task_number, test_on_gt, only_test, overfit):
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)

    n_epochs = 3
    lr = 1e-4
    wd = 0

    train_db = CocoTasksRanker(task_number)

    if test_on_gt:
        test_db = CocoTasksRankerTestGT(task_number)
    else:
        test_db = CocoTasksRankerTest(task_number)

    network = Ranker()
    optimizer = Adam(network.parameters(), lr=lr, weight_decay=wd)
    experiment = Experiment(network, train_db, optimizer=optimizer, tensorboard=True)

    folder = "single-task-ranker-baseline-tn:{tn}-seed:{s}".format(
        tn=task_number, s=random_seed
    )

    folder = os.path.join(SAVING_DIRECTORY, folder)
    mkdir_p(folder)

    if not only_test:
        # train
        experiment.train_n_epochs(n_epochs, lr_scheduler=True, overfit=overfit)

        # save model
        torch.save(network.state_dict(), os.path.join(folder, "model.mdl"))
    else:
        # load model
        network.load_state_dict(torch.load(os.path.join(folder, "model.mdl")))

    # test model
    detections = experiment.do_test(test_db)

    # save detections
    with open(
        os.path.join(folder, "detections_teg:{teg}.json".format(teg=test_on_gt)), "w"
    ) as f:
        json.dump(detections, f)

    detections_per_image = defaultdict(list)
    for d in detections:
        detections_per_image[d["image_id"]].append(d)

    fusion = "top_k"

    fused_detections = fuse(detections_per_image=detections_per_image)

    with open(
        os.path.join(
            folder, "detections_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion)
        ),
        "w",
    ) as f:
        json.dump(fused_detections, f)

    # perform evaluation
    with redirect_stdout(open(os.devnull, "w")):
        gtCOCO = test_db.task_coco
        dtCOCO = gtCOCO.loadRes(
            os.path.join(
                folder,
                "detections_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion),
            )
        )
        cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox")
        cocoEval.params.catIds = 1
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

    print("fusion: %s - mAP:\t\t %1.6f" % (fusion, cocoEval.stats[0]))
    print("fusion: %s - [email protected]:\t\t %1.6f" % (fusion, cocoEval.stats[1]))

    # save evaluation performance
    with open(
        os.path.join(
            folder, "result_teg:{teg}_f:{f}.json".format(teg=test_on_gt, f=fusion)
        ),
        "w",
    ) as f:
        f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
Example #37
0
def create_app_structure(path_app):
    bf.mkdir_p(path_app)
    bf.mkdir_p(path_app + "/dockerfiles")
def main(random_seed, task_number, test_on_gt, only_test, overfit):
    random.seed(random_seed)
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    torch.cuda.manual_seed_all(random_seed)

    n_epochs = 3
    lr = 1e-2
    wd = 0

    train_db = CocoTasksGT(task_number, "train")

    if test_on_gt:
        test_db = CocoTasksTestGT(task_number)
    else:
        test_db = CocoTasksTest(task_number)

    network = ClassifierBaselineNetwork()
    optimizer = SGD(network.parameters(), lr=lr, weight_decay=wd)
    experiment = ClassifierExperiment(network, train_db, optimizer=optimizer)

    folder = "single-task-classifier-baseline-tn:{tn}-seed:{s}".format(
        tn=task_number, s=random_seed)

    folder = os.path.join(SAVING_DIRECTORY, folder)
    mkdir_p(folder)

    if not only_test:
        # train
        experiment.train_n_epochs(n_epochs, lr_scheduler=True, overfit=overfit)

        # save model
        torch.save(network.state_dict(), os.path.join(folder, "model.mdl"))
    else:
        # load model
        network.load_state_dict(torch.load(os.path.join(folder, "model.mdl")))

    # test_model
    detections = experiment.do_test(test_db)

    # save detections
    with open(
            os.path.join(folder,
                         "detections-teg:{teg}.json".format(teg=test_on_gt)),
            "w") as f:
        json.dump(detections, f)

    # perform evaluation
    with redirect_stdout(open(os.devnull, "w")):
        gtCOCO = test_db.task_coco
        dtCOCO = gtCOCO.loadRes(
            os.path.join(folder,
                         "detections-teg:{teg}.json".format(teg=test_on_gt)))
        cocoEval = COCOeval(gtCOCO, dtCOCO, "bbox")
        cocoEval.params.catIds = 1
        cocoEval.evaluate()
        cocoEval.accumulate()
        cocoEval.summarize()

    print("mAP:\t\t %1.6f" % cocoEval.stats[0])
    print("[email protected]:\t\t %1.6f" % cocoEval.stats[1])

    # save evaluation performance
    with open(
            os.path.join(folder,
                         "result-teg:{teg}.txt".format(teg=test_on_gt)),
            "w") as f:
        f.write("%1.6f, %1.6f" % (cocoEval.stats[0], cocoEval.stats[1]))
Example #39
0
def get_cacheable_file_location(default_relative_dir, basename):
    if conf.cache:
        return os.path.join(conf.cache, basename)
    mkdir_p(get_generated_path(default_relative_dir))
    return get_generated_path(default_relative_dir, basename)
Example #40
0
def run_game():
    parser = argparse.ArgumentParser(description="Legend of Harren")
    parser.add_argument(
        "-l",
        "--log-level",
        default="INFO",
        choices=("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"),
        help="Logging level for command output.",
    )
    parser.add_argument("-L",
                        "--logfile",
                        dest="logfile",
                        default=None,
                        help="Location to place a log of the process output")
    parser.add_argument("-V",
                        "--version",
                        action="version",
                        version=get_version(),
                        help="Display the version number.")
    parser.add_argument(
        "-g",
        "--fullscreen",
        dest="fullscreen",
        action="store_true",
        help="Launch the new game in fullscreen mode",
    )
    parser.add_argument(
        "--no-splash",
        dest="no_splash",
        action="store_true",
        help="Skip the initial loading splash screen",
    )
    parser.add_argument(
        "--no-sound",
        dest="no_sound",
        action="store_true",
        help="Disable sound",
    )

    parsed_args = parser.parse_args()

    # Get logging related arguments & the configure logging
    if parsed_args.logfile:
        logfile = os.path.abspath(parsed_args.logfile)
    else:
        logfile = None

    # Don't bother with a file handler if we're not logging to a file
    handlers = (["console", "filehandler"] if logfile else [
        "console",
    ])

    # The base logging configuration
    BASE_CONFIG = {
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {
            "ConsoleFormatter": {
                "()": ColorFormatter,
                "format": "%(levelname)s: %(message)s",
                "datefmt": "%Y-%m-%d %H:%M:%S",
            },
            "VerboseFormatter": {
                "()":
                ColorFormatter,
                "format": ("%(levelname)-8s: %(asctime)s '%(message)s' "
                           "%(name)s:%(lineno)s"),
                "datefmt":
                "%Y-%m-%d %H:%M:%S",
            },
            "FileFormatter": {
                "()":
                ColorStripper,
                "format": ("%(levelname)-8s: %(asctime)s '%(message)s' "
                           "%(name)s:%(lineno)s"),
                "datefmt":
                "%Y-%m-%d %H:%M:%S",
            },
        },
        "handlers": {
            "console": {
                "level": "DEBUG",
                "class": "logging.StreamHandler",
                "formatter": "ConsoleFormatter",
            },
        },
        "loggers": {
            "harren": {
                "handlers": handlers,
                "level": parsed_args.log_level,
            },
            "pygame": {
                "handlers": handlers,
                "level": parsed_args.log_level,
            },
            "pytmx": {
                "handlers": handlers,
                "level": parsed_args.log_level,
            },
            "pyscroll": {
                "handlers": handlers,
                "level": parsed_args.log_level,
            },
        },
    }

    # If we have a log file, modify the dict to add in the filehandler conf
    if logfile:
        BASE_CONFIG["handlers"]["filehandler"] = {
            "level": parsed_args.log_level,
            "class": "logging.handlers.RotatingFileHandler",
            "filename": logfile,
            "formatter": "FileFormatter",
        }

    if parsed_args.log_level == "DEBUG":
        # Set a more noisy formatter
        BASE_CONFIG["handlers"]["console"]["formatter"] = "VerboseFormatter"

    # Setup the loggers
    dictConfig(BASE_CONFIG)

    LOG.info("#g<Launching Harren RPG!>")
    # Setup SDL Environment Variables
    os.environ["SDL_VIDEO_CENTERED"] = "1"

    try:
        import pygame  # noqa
    except ImportError:
        LOG.exception("#y<PyGame not found... exiting.>")
        sys.exit(1)

    # Make the config folder if it doesn't already exist
    mkdir_p(resources.CONFIG_FOLDER)

    # Disable or enable sound
    if parsed_args.no_sound:
        sound_enabled = False
    else:
        sound_enabled = True

    from harren.game_loop import GameState

    game = GameState(
        fullscreen=parsed_args.fullscreen,
        no_splash=parsed_args.no_splash,
        sound_enabled=sound_enabled,
    )
    game.main()
    __exit()
Example #41
0
def _ensure_conf():

    if not hasattr(conf, 'authentication'):
        conf.set_default_value('authentication', False)

    if hasattr(conf, 'data_dir'):
        conf.data_dir = os.path.abspath(conf.data_dir)
    else:
        conf.set_default_value(
            'data_dir',
            os.path.abspath(
                os.environ.get('PHEWEB_DATADIR', False) or os.path.curdir))

    ## Get `conf.cache` working because it's needed for reporting errors
    def _configure_cache():
        conf.set_default_value(
            'cache', os.path.abspath(os.path.expanduser('~/.pheweb/cache')))
        if conf.cache is False:
            return
        if conf.has_own_property('cache'):
            conf.cache = os.path.abspath(
                os.path.join(conf.data_dir, os.path.expanduser(conf.cache)))
        if not os.path.isdir(conf.cache):
            try:
                mkdir_p(conf.cache)
            except PermissionError:
                print(
                    "Warning: caching is disabled because the directory {!r} can't be created.\n"
                    .format(conf.cache) +
                    "If you don't want caching, set `cache = False` in your config.py."
                )
                conf.cache = False
                return
        if not os.access(conf.cache, os.R_OK):
            print(
                'Warning: the directory {!r} is configured to be your cache directory but it is not readable.\n'
                .format(conf.cache) +
                "If you don't want caching, set `cache = False` in your config.py."
            )
            conf.cache = False

    _configure_cache()

    def _load_config_file():
        _config_filepath = os.path.join(conf.data_dir, 'config.py')
        if os.path.isfile(_config_filepath):
            try:
                _conf_module = imp.load_source('config', _config_filepath)
            except Exception:
                raise utils.PheWebError(
                    "PheWeb tried to load your config.py at {!r} but it failed."
                    .format(_config_filepath))
            else:
                for key in dir(_conf_module):
                    if not key.startswith('_'):
                        conf[key] = getattr(_conf_module, key)
        print(str(conf))
        if conf.authentication:
            try:
                _auth_module = imp.load_source('config',
                                               conf.authentication_file)
            except Exception:
                raise utils.PheWebError(
                    "PheWeb tried to load your authentication file at {!r} but it failed."
                    .format(conf.authentication_file))
            else:
                for key in dir(_auth_module):
                    if not key.startswith('_'):
                        conf[key] = getattr(_conf_module, key)

    _load_config_file()

    conf.set_default_value(
        'custom_templates',
        lambda: os.path.join(conf.data_dir, 'custom_templates'),
        is_function=True)
    conf.set_default_value('debug', False)
    conf.set_default_value('quick', False)
    conf.set_default_value('assoc_min_maf', 0)
    conf.set_default_value('variant_inclusion_maf', 0)
    conf.set_default_value('within_pheno_mask_around_peak', int(500e3))
    conf.set_default_value('between_pheno_mask_around_peak', int(1e6))
    conf.set_default_value('manhattan_num_unbinned', 2000)
    conf.set_default_value('manhattan_unbin_anyway_pval', 5e-8)
    conf.set_default_value('manhattan_hla_num_unbinned', 200)
    conf.set_default_value('hla_begin', 26000000)
    conf.set_default_value('hla_end', 36000000)
    conf.set_default_value("n_query_threads", 4)
    conf.set_default_value('peak_pval_cutoff', 1e-6)

    if 'minimum_maf' in conf:
        raise utils.PheWebError(
            "minimum_maf has been deprecated.  Please remove it and use assoc_min_maf and/or variant_inclusion_maf instead"
        )

    if conf.get('login', {}).get('whitelist', None):
        conf.login['whitelist'] = [
            addr.lower() for addr in conf.login['whitelist']
        ]

    if not os.path.isdir(conf.data_dir):
        mkdir_p(conf.data_dir)
    if not os.access(conf.data_dir, os.R_OK):
        raise utils.PheWebError(
            "Your data directory, {!r}, is not readable.".format(
                conf.data_dir))

    ### Parsing

    def scientific_int(value):
        '''like int(value) but accepts "1.3e-4"'''
        try:
            return int(value)
        except ValueError:
            x = float(value)
            if x.is_integer():
                return int(x)
            raise

    class Field:
        def __init__(self, d):
            self._d = d

        def parse(self, value):
            '''parse from input file'''
            # nullable
            if self._d['nullable'] and value in conf.parse.null_values:
                return ''
            # type
            x = self._d['type'](value)
            # range
            if 'range' in self._d:
                assert self._d['range'][0] is None or x >= self._d['range'][0]
                assert self._d['range'][1] is None or x <= self._d['range'][1]
            if 'sigfigs' in self._d:
                x = utils.round_sig(x, self._d['sigfigs'])
            if 'decimals' in self._d:
                x = round(x, self._d['decimals'])
            return x

        def read(self, value):
            '''read from internal file'''
            if self._d['nullable'] and (value == '' or value == 'NA'):
                return ''
            x = self._d['type'](value)
            if 'range' in self._d:
                assert self._d['range'][0] is None or x >= self._d['range'][0]
                assert self._d['range'][1] is None or x <= self._d['range'][1]
            return x

    default_null_values = ['', '.', 'NA', 'nan', 'NaN']

    default_field = {
        'aliases': [],
        'required': False,
        'type': str,
        'nullable': False,
        'from_assoc_files':
        True,  # if this is False, then the field will not be parsed from input files, because annotation will add it.
    }

    default_per_variant_fields = OrderedDict([
        ('chrom', {
            'aliases': ['#CHROM', 'chr'],
            'required': True,
            'tooltip_underscoretemplate':
            '<b><%= d.chrom %>:<%= d.pos.toLocaleString() %> <%= d.ref %> / <%= d.alt %></b><br>',
            'tooltip_lztemplate': False,
        }),
        ('pos', {
            'aliases': ['BEG', 'BEGIN', 'BP'],
            'required': True,
            'type': scientific_int,
            'range': [0, None],
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('ref', {
            'aliases': ['reference', 'allele0'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('alt', {
            'aliases': ['alternate', 'allele1'],
            'required': True,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('rsids', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate':
            '<% _.each(_.filter((d.rsids||"").split(",")), function(rsid) { %>rsid: <%= rsid %><br><% }) %>',
            'tooltip_lztemplate': {
                'condition': 'rsid',
                'template': '<strong>{{rsid}}</strong><br>'
            },
        }),
        ('nearest_genes', {
            'from_assoc_files': False,
            'tooltip_underscoretemplate':
            'nearest gene<%= _.contains(d.nearest_genes, ",")? "s":"" %>: <%= d.nearest_genes %><br>',
            'tooltip_lztemplate': False,
        })
    ])

    default_per_assoc_fields = OrderedDict([
        ('pheno', {
            'tooltip_lztemplate':
            'phenotype: <strong>{{trait:pheno}}</strong><br>',
        }),
        ('pval', {
            'aliases': ['PVALUE'],
            'required': True,
            'type': float,
            'nullable': True,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'p-value: <%= pValueToReadable(d.pval) %><br>',
            'tooltip_lztemplate': {
                'condition':
                False,
                'template':
                ('{{#if trait:pvalue}}p-value: <strong>{{trait:pvalue|scinotation}}</strong><br>{{/if}}\n'
                 +
                 '{{#if trait:pval}}p-value: <strong>{{trait:pval|scinotation}}</strong><br>{{/if}}'
                 ),
            },
            'display': 'P-value',
        }),
        ('beta', {
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'beta: <%= d.beta.toFixed(2) %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta.toFixed(2) %>)<% } %><br>',
            'tooltip_lztemplate':
            'beta: <strong>{{trait:beta}}</strong>{{#if trait:sebeta}} ({{trait:sebeta}}){{/if}}<br>',
            'display': 'Beta',
        }),
        ('sebeta', {
            'aliases': ['se'],
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('or', {
            'type': float,
            'nullable': True,
            'range': [0, None],
            'sigfigs': 2,
            'display': 'Odds Ratio',
        }),
        ('maf', {
            'type': float,
            'nullable': True,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate': 'MAF: <%= d.maf.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF',
        }),
        ('maf_case', {
            'type': float,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'MAF cases: <%= d.maf_case.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF cases',
        }),
        ('maf_control', {
            'type': float,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'MAF controls: <%= d.maf_control.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF controls',
        }),
        ('maf_cases', {
            'type': float,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'MAF cases: <%= d.maf_cases.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF cases',
        }),
        ('maf_controls', {
            'type': float,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'MAF controls: <%= d.maf_controls.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF controls',
        }),
        (
            'af',
            {
                'aliases': ['A1FREQ'],
                'type': float,
                'range': [0, 1],
                'sigfigs':
                2,  # TODO: never round 99.99% to 100%.  Make sure MAF would have the right sigfigs.
                'tooltip_lztemplate': {
                    'transform': '|percent'
                },
                'display': 'AF',
            }),
        ('ac', {
            'type': float,
            'range': [0, None],
            'decimals': 1,
            'tooltip_underscoretemplate': 'AC: <%= d.ac.toFixed(1) %> <br>',
            'display': 'AC',
        }),
        ('r2', {
            'type': float,
            'sigfigs': 2,
            'nullable': True,
            'display': 'R2',
        }),
        ('tstat', {
            'type': float,
            'sigfigs': 2,
            'nullable': True,
            'display': 'Tstat',
        }),
        ('n_cohorts', {
            'type': int
        }),
        ('n_hom_cases', {
            'type': float
        }),
        ('n_het_cases', {
            'type': float
        }),
        ('n_hom_controls', {
            'type': float
        }),
        ('n_het_controls', {
            'type': float
        })
    ])

    default_per_pheno_fields = OrderedDict([
        ('n_case', {
            'aliases': ['NS.CASE', 'N_cases'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#cases',
        }),
        ('n_control', {
            'aliases': ['NS.CTRL', 'N_controls'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#controls',
        }),
        ('num_samples', {
            'aliases': ['NS', 'N'],
            'type': int,
            'nullable': True,
            'range': [0, None],
            'display': '#samples',
        }),
        # TODO: phenocode, phenostring, category, &c?
        # TODO: include `assoc_files` with {never_send: True}?
    ])

    default_GWAS_catalog_fields = OrderedDict([
        ('pheno', {
            'tooltip_lztemplate': 'phenotype: <strong>{{pheno}}</strong><br>',
        }),
        ('pval', {
            'aliases': ['PVALUE'],
            'required': True,
            'type': float,
            'nullable': True,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'p-value: <%= pValueToReadable(d.pval) %><br>',
            'tooltip_lztemplate': {
                'condition':
                False,
                'template':
                ('{{#if pvalue}}p-value: <strong>{{pvalue|scinotation}}</strong><br>{{/if}}\n'
                 +
                 '{{#if pval}}p-value: <strong>{{pval|scinotation}}</strong><br>{{/if}}'
                 ),
            },
            'display': 'P-value',
        }),
        ('beta', {
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'beta: <%= d.beta.toFixed(2) %><% if(_.has(d, "sebeta")){ %> (<%= d.sebeta.toFixed(2) %>)<% } %><br>',
            'tooltip_lztemplate':
            'beta: <strong>{{beta}}</strong>{{#if sebeta}} ({{sebeta}}){{/if}}<br>',
            'display': 'Beta',
        }),
        ('sebeta', {
            'aliases': ['se'],
            'type': float,
            'nullable': True,
            'sigfigs': 2,
            'tooltip_underscoretemplate': False,
            'tooltip_lztemplate': False,
        }),
        ('or', {
            'type': float,
            'nullable': True,
            'range': [0, None],
            'sigfigs': 2,
            'display': 'Odds Ratio',
        }),
        ('maf', {
            'type': float,
            'range': [0, 0.5],
            'sigfigs': 2,
            'tooltip_underscoretemplate': 'MAF: <%= d.maf.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF',
        }),
        ('maf_cases', {
            'type': float,
            'range': [0, 1],
            'sigfigs': 2,
            'tooltip_underscoretemplate':
            'MAF cases: <%= d.maf_cases.toFixed(4) %><br>',
            'tooltip_lztemplate': {
                'transform': '|percent'
            },
            'display': 'MAF cases',
        })
    ])

    conf.parse.null_values = deepcopy(default_null_values)
    conf.parse.per_variant_fields = deepcopy(default_per_variant_fields)
    conf.parse.per_assoc_fields = deepcopy(default_per_assoc_fields)
    conf.parse.per_pheno_fields = deepcopy(default_per_pheno_fields)
    conf.parse.fields = OrderedDict(
        itertools.chain(conf.parse.per_variant_fields.items(),
                        conf.parse.per_assoc_fields.items(),
                        conf.parse.per_pheno_fields.items()))
    assert len(conf.parse.fields) == len(conf.parse.per_variant_fields) + len(
        conf.parse.per_assoc_fields) + len(
            conf.parse.per_pheno_fields)  # no overlaps!

    if 'aliases' in conf:
        for alias, field in conf.aliases.items():
            conf.parse.fields[field].setdefault('aliases', []).append(alias)

    if 'null_values' in conf:
        conf.parse.null_values.extend(conf.null_values)

    # make all aliases lowercase and add parsers
    for field_name, field_dict in conf.parse.fields.items():
        for k, v in default_field.items():
            field_dict.setdefault(k, v)
        field_dict['aliases'] = list(
            set([field_name.lower()] +
                [alias.lower() for alias in field_dict['aliases']]))
        field_dict['_parse'] = Field(field_dict).parse
        field_dict['_read'] = Field(field_dict).read

    _repeated_aliases = [
        alias for alias, count in Counter(
            itertools.chain.from_iterable(
                f['aliases']
                for f in conf.parse.fields.values())).most_common()
        if count > 1
    ]
    if _repeated_aliases:
        raise utils.PheWebError(
            'The following aliases appear for multiple fields: {}'.format(
                _repeated_aliases))

    def get_tooltip_underscoretemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            if 'tooltip_underscoretemplate' in field:
                if field['tooltip_underscoretemplate'] is False:
                    continue
                else:
                    template += '<% if(_.has(d, ' + repr(
                        fieldname) + ')) { %>' + field[
                            'tooltip_underscoretemplate'] + '<% } %>\n'
            else:
                template += '<% if(_.has(d, ' + repr(
                    fieldname) + ')) { %>' + field.get(
                        'display', fieldname) + ': <%= d[' + repr(
                            fieldname) + '] %><br><% } %>\n'
        return template

    conf.parse.tooltip_underscoretemplate = get_tooltip_underscoretemplate()

    def get_tooltip_lztemplate():
        template = ''
        for fieldname, field in conf.parse.fields.items():
            lzt = field.get('tooltip_lztemplate', {})
            if lzt is False:
                continue
            if isinstance(lzt, str):
                lzt = {'template': lzt}
            if 'template' not in lzt:
                lzt['template'] = field.get(
                    'display',
                    fieldname) + ': <strong>{{' + fieldname + lzt.get(
                        'transform', '') + '}}</strong><br>'
            if 'condition' not in lzt:
                lzt['condition'] = fieldname

            if not lzt['condition']:
                template += lzt['template'] + '\n'
            else:
                template += '{{#if ' + lzt['condition'] + '}}' + lzt[
                    'template'] + '{{/if}}\n'
        return template

    conf.parse.tooltip_lztemplate = get_tooltip_lztemplate()

    ## these fields will be exported in this order when exporting variants to TSV.
    conf.set_default_value("var_export_fields", [
        'chrom', 'pos', 'ref', 'alt', 'maf', 'maf_cases', 'maf_controls',
        'most_severe', 'nearest_genes', 'rsids', "annotation.ac",
        "annotation.ac_hemi", "annotation.ac_het", "annotation.ac_hom",
        'annotation.an', "annotation.info", "annotation.hc_lof"
    ])

    ## these fields will be exported in this order when exporting variants to TSV.
    conf.set_default_value("var_top_pheno_export_fields", [
        "phenocode", "phenostring", "category", "pval", "beta", "maf",
        "maf_case", "maf_control", "n_case", "n_control"
    ])
    conf.set_default_value("gene_pheno_export_fields", [
        "variant.varid", "assoc.pval", "assoc.beta", "assoc.variant.rsids",
        "pheno.category", "pheno.num_cases", "pheno.num_controls",
        "pheno.phenocode", "pheno.phenostring",
        "variant.annotation.gnomad.AF_fin", "variant.annotation.gnomad.AF_nfe"
    ])
    conf.set_default_value("drug_export_fields", [
        "drug.molecule_name", "drug.molecule_type",
        "evidence.target2drug.action_type", "disease.efo_info.label",
        "evidence.drug2clinic.clinical_trial_phase.label", "drug.id"
    ])
    conf.set_default_value("lof_export_fields", [
        "pheno", "variants", "p_value", "beta", "ref_alt_cases",
        "ref_alt_ctrls"
    ])

    conf.set_default_value("report_conf", {"func_var_assoc_threshold": 0.0001})
    conf.set_default_value(
        "vis_conf", {
            "loglog_threshold": 10,
            "info_tooltip_threshold": 0.8,
            "manhattan_colors": ['rgb(53,0,212)', 'rgb(40, 40, 40)']
        })
    conf.set_default_value(
        "locuszoom_conf", {
            "p_threshold": 0.05,
            "prob_threshold": 0.0001,
            "ld_service": "finngen",
            "ld_max_window": 5000000
        })
    conf.set_default_value("lof_threshold", 1e-3)
    conf.set_default_value("noindex", True)
    conf.set_default_value("anno_cpra", True)
    conf.set_default_value("show_ukbb", False)
    conf.set_default_value("show_risteys", False)
    conf.set_default_value("lof_threshold", 1e-3)
Example #42
0
def get_cacheable_file_location(default_relative_dir, basename):
    if conf.cache:
        return os.path.join(conf.cache, basename)
    mkdir_p(get_generated_path(default_relative_dir))
    return get_generated_path(default_relative_dir, basename)