def test_get_dist_matrix(self):
        """Test dnadiff pairwise on multiple bins and get the resulting
        distance matrix"""
        names = [
            "sample0_gt1000_bin0",
            "sample0_gt1000_bin10",
            "sample0_gt1000_bin11",
            "sample0_gt1000_bin1",
            "sample0_gt1000_bin2",
            "sample1_gt1000_bin51",
            "sample1_gt1000_bin67",
            "sample1_gt1000_bin6",
        ]
        files = [ospj(DATA_PATH, "{}.fa".format(n)) for n in names]
        dnadiff_dist_matrix.run_dnadiff_pairwise(files, names,
                TMP_BASENAME_DIR)
        for f in [
            ospj(TMP_BASENAME_DIR,
                "{}_vs_{}".format(names[i], names[j]), "out.report")
                for i in range(len(names))
                for j in range(i + 1, len(names))]:
            ok_(os.path.exists(f))

        matrix = dnadiff_dist_matrix.get_dist_matrix(TMP_BASENAME_DIR, names,
                50)
        matrix_exp = np.genfromtxt(ospj(DATA_PATH, "expected_dist_matrix.tsv"),
                delimiter="\t")
        np.testing.assert_almost_equal(matrix, matrix_exp, decimal=2)
Example #2
0
 def render_story(self, story):
     subdir = ospj(OUTPUT_DIR, 'stories', story.author.fs_name)
     makedirs(subdir, exist_ok=True)
     filename = '{}.html'.format(story.fs_name)
     template = self.env.get_template('story.html')
     with open(ospj(subdir, filename), 'w') as f:
         print(template.render(story=story, depth=2), file=f)
Example #3
0
 def render_story_list_by_author(self, author):
     subdir = ospj(OUTPUT_DIR, 'authors')
     makedirs(subdir, exist_ok=True)
     filename = '{}.html'.format(author.fs_name)
     template = self.env.get_template('stories_by_author.html')
     stories = sorted(author.stories, key=attrgetter('date_sort_key'))
     with open(ospj(subdir, filename), 'w') as f:
         print(template.render(author=author, stories=stories, depth=1), file=f)
Example #4
0
def main(args):
    with open(os.path.expanduser(args.config_file), 'r') as fh:
        config = yaml.load(fh)

    # Create an archive
    dirname = get_dirname(args.repo)
    
    logger.info("Using repo {}".format(dirname))

    repo = Repo(args.repo)
    assert not repo.is_dirty()

    archive_name = dirname
    git_tag = next((tag for tag in repo.tags if tag.commit == repo.head.commit), None)
    if git_tag:
        archive_name += '_' + git_tag    

    else:
        archive_name += '_' + repo.head.object.hexsha

    if args.extra_tag:
        archive_name += '_' + args.extra_tag

    logger.info("Creating repo archive {}".format(archive_name))
    archive = "{0}.tar.gz".format(archive_name)
    archive_path = ospj(args.repo, archive)

    run_dir = os.getcwd()
    os.chdir(args.repo)
    os.system("git-archive-all {}".format(archive))
    os.chdir(run_dir)

    logger.info("Archive created.")

    # Transfer archive to remote
    remote_dir = config['hosts']['irma']['archive_dir']

    Connection('irma').put(archive_path, remote=remote_dir)
    logger.info("Archive successfully transferred to irma")

    # Extract remote archive
    c = Connection('irma')     
    remote_archive_path = ospj(remote_dir, archive)
    remote_extracted_path = remote_archive_path.replace('.tar.gz', '')

    c.run('rm -r {} || true'.format(remote_extracted_path))
    c.run('cd {}; tar -xvzf {}'.format(remote_dir, remote_archive_path))

    # Create a link from dev or latest to the new archive
    if args.mode == 'dev': 
        link_name = "{}_dev".format(dirname)
    else:
        link_name = "{}_latest".format(dirname)
    c.run('cd {}; ln -sfn {} {}'.format(remote_dir, remote_extracted_path, link_name))
    logger.info("Linking: {} {}".format(remote_extracted_path, link_name))
    logger.info("{} successfully linked as the new {}".format(dirname, link_name))
Example #5
0
 def render_story_list_all(self):
     filename = 'stories_all.html'
     template = self.env.get_template(filename)
     stories = []
     for author in self.story_data.values():
         stories.extend(author.stories)
     with open(ospj(OUTPUT_DIR, 'stories_all_date.html'), 'w') as f:
         print(template.render(stories=sorted(stories, key=attrgetter('date_sort_key')), depth=0), file=f)
     with open(ospj(OUTPUT_DIR, 'stories_all_title.html'), 'w') as f:
         print(template.render(stories=sorted(stories, key=attrgetter('title_sort_key')), depth=0), file=f)
Example #6
0
 def setUp(self):
     self.prefs = Prefs()
     work_dir = ospj(dirname(__file__), 'data/purgecounter/dynamic')
     self.filename = ospj(work_dir, PURGE_HISTORY)
     self.prefs._Prefs__data['WORK_DIR'] = work_dir
     self.counter = Counter()
     host = 'host'
     count = 1
     self.counter[host] += count
     self.test_string = '%s:%s\n' % (host, self.counter[host])
Example #7
0
 def test_verify_del(self):
     out = StringIO()
     verify.print_revertant_mutations_info(
         ospj(DATA_PATH, "to_be_reverted_mutations.txt"),
         ospj(DATA_PATH, "oncotator.del.txt"),
         ospj(DATA_PATH, "BRCA_transcripts.fa"),
         revmuts_file_format='hgvs',
         outfile=out
     )
     assert_equals(open(ospj(DATA_PATH, "output", "oncotator.del.maf.out.tsv")).read(), out.getvalue())
Example #8
0
 def test_validate_mutations(self):
     out = StringIO()
     sufam.__main__.validate_mutations(ospj(DATA_PATH, "mutations.vcf"),
                                         ospj(DATA_PATH, "subset1.bam"),
                                         ospj(DATA_PATH, "human_g1k_v37_chr17.fa"),
                                         "test",
                                         "matrix",
                                         out
                                         )
     assert_equals("0\n1\n", out.getvalue())
Example #9
0
 def test_multi_bam_vcf(self):
     out = StringIO()
     sufam.__main__.validate_mutations(ospj(DATA_PATH, "mutations.vcf"),
                                       [ospj(DATA_PATH, "subset{}.bam".format(i)) for i in range(1, 4)],
                                       ospj(DATA_PATH, "human_g1k_v37_chr17.fa"),
                                       ["subset{}".format(i) for i in range(1, 4)],
                                       "vcf",
                                       out
                                       )
     assert_equals(open(ospj(DATA_PATH, "multi_bam.vcf")).read(), out.getvalue())
Example #10
0
 def test_sum_bases_in_bins(self):
     """Test sum_bases_in_bins"""
     scg_tsv = ospj(DATA_PATH, "sample0_gt500_scg.tsv")
     b = sum_bases_in_bins(pd.read_csv(scg_tsv, sep="\t"),
             ospj(DATA_PATH, "sample0_gt500.fa"))
     assert_equal(12, b)
     df = get_approved_bins(ospj(DATA_PATH, "sample0_gt500_scg.tsv"),
             max_missing_scg=2, max_multicopy_scg=4)
     b = sum_bases_in_bins(df, ospj(DATA_PATH, "sample0_gt500.fa"))
     assert_equal(4, b)
Example #11
0
def run_dnadiff_pairwise(fasta_files, fasta_names, output_folder):
    """Runs MUMmer's dnadiff pairwise for given fasta_files. Uses fasta_names
    to organize output folders for dnadiff as fastaname1_vs_fastaname2."""
    assert len(fasta_files) == len(fasta_names)

    for i in range(len(fasta_files)):
        for j in range(i + 1, len(fasta_files)):
            out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format(
                fn1=fasta_names[i], fn2=fasta_names[j]))
            dir_utils.mkdir_p(out_dir)
            run_dnadiff(fasta_files[i], fasta_files[j], ospj(out_dir, "out"))
 def test_plot_dist_matrix_88_bins(self):
     """Plot a distance matrix with 88 samples"""
     names = [
         "sample0_gt1000_bin{}".format(i) for i in range(88)
     ]
     matrix = np.genfromtxt(ospj(DATA_PATH, "expected_dist_matrix_88_bins.tsv"),
             delimiter="\t")
     heatmap = ospj(TMP_BASENAME_DIR, "hclust_heatmap.pdf")
     dendrogram = ospj(TMP_BASENAME_DIR, "hclust_dendrogram.pdf")
     dnadiff_dist_matrix.plot_dist_matrix(matrix, names, heatmap, dendrogram)
     ok_(os.path.exists(heatmap))
     ok_(os.path.exists(dendrogram))
Example #13
0
 def test_write_approved_bins(self):
     """Test write_approved_bins"""
     df = get_approved_bins(ospj(DATA_PATH, "sample0_gt500_scg.tsv"),
             max_missing_scg=2, max_multicopy_scg=4)
     assert_equal(2, int(df.Cluster))
     write_approved_bins(df, ospj(DATA_PATH, "sample0_gt500.fa"),
             TMP_BASENAME_DIR, "sample0_gt500")
     ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "sample0_gt500_bin2.fa")))
     # make sure both have equal amount of records
     assert_equal(
         open(ospj(TMP_BASENAME_DIR, "sample0_gt500_bin2.fa")).read().count(">"),
         open(ospj(DATA_PATH, "sample0_gt500_bin2.fa")).read().count(">"))
Example #14
0
    def setUp(self):
        self.directory = ospj(dirname(__file__), 'data/deny_hosts')
        self.work_dir = ospj(self.directory, 'work')
        self.logfile = ospj(self.work_dir, 'logfile')
        self.prefs = Prefs()

        self.lock_file = LockFile(ospj(self.directory, 'lockfile'))
        self.lock_file.remove(die_=False)
        self.lock_file.create()

        self.prefs._Prefs__data['ETC_DIR'] = ospj(self.directory, 'etc')
        self.prefs._Prefs__data['WORK_DIR'] = self.work_dir
    def test_mummer_report_class(self):
        """Test mummer report class"""
        dnadiff_dist_matrix.run_dnadiff(ospj(DATA_PATH,
            "sample0_gt1000_bin0.fa"), ospj(DATA_PATH,
                "sample0_gt1000_bin1.fa"), ospj(TMP_BASENAME_DIR, "out"))
        ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "out.report")))

        mumr = dnadiff_dist_matrix.MUMmerReport(ospj(TMP_BASENAME_DIR,
            "out.report"))
        assert_equal(mumr.tot_bases[0], 3213)
        assert_equal(mumr.tot_bases[1], 43514)
        assert_equal(mumr.aligned_bases[0], 0)
        assert_equal(mumr.aligned_bases[1], 0)
Example #16
0
 def test_get_winning_bins(self):
     """Test get_winning_bins"""
     scg_tsvs = [ospj(DATA_PATH, p) for p in ["sample0_gt300_scg.tsv",
         "sample0_gt500_scg.tsv"]]
     fasta_files = [ospj(DATA_PATH, p) for p in ["sample0_gt300.fa",
         "sample0_gt500.fa"]]
     winning_index, df = get_winning_bins(scg_tsvs, fasta_files,
             max_missing_scg=2, max_multicopy_scg=4)
     assert_equal(1, winning_index)
     winning_index, df = get_winning_bins(list(reversed(scg_tsvs)),
         list(reversed(fasta_files)), max_missing_scg=2,
         max_multicopy_scg=4)
     assert_equal(0, winning_index)
 def test_parallel_run_dnadiff_pairwise(self):
     """Test dnadiff pairwise on multiple bins"""
     names = ["bin{0}".format(i) for i in range(3)]
     dnadiff_dist_matrix.parallel_run_dnadiff_pairwise(
         [ospj(DATA_PATH, b) for b in
             ["sample0_gt1000_bin0.fa", "sample0_gt1000_bin1.fa",
                 "sample0_gt1000_bin2.fa"]],
         names, TMP_BASENAME_DIR)
     for f in [
         ospj(TMP_BASENAME_DIR,
             "{}_vs_{}".format(names[i], names[j]), "out.report")
             for i in range(len(names))
             for j in range(i + 1, len(names))]:
         ok_(os.path.exists(f))
Example #18
0
    def test_find(self):
        out = StringIO()

        reffa = ospj(DATA_PATH, "human_g1k_v37_chr17.fa")
        mutations_tsv = ospj(DATA_PATH, "germline_mutations", "T1_test_mutation.tsv")
        search_bam = ospj(DATA_PATH, "T1.bam")
        normal_bam = ospj(DATA_PATH, "N1.bam")

        find_revertant_mutations(reffa, mutations_tsv, search_bam, normal_bam, out)
        out.seek(0)
        test = pd.read_csv(out, sep="\t")
        truth = pd.read_csv(ospj(DATA_PATH, "output", "T1_test.tsv"), sep="\t")
        assert_frame_equal(truth.drop("MAF", axis=1),
                           test.drop("MAF", axis=1))
        assert_array_almost_equal(truth.MAF, test.MAF, decimal=6)
 def test_write_fasta_names(self):
     names = [
         "sample0_gt1000_bin0",
         "sample0_gt1000_bin10",
         "sample0_gt1000_bin11",
         "sample0_gt1000_bin1",
         "sample0_gt1000_bin2",
         "sample1_gt1000_bin51",
         "sample1_gt1000_bin67",
         "sample1_gt1000_bin6",
     ]
     files = [ospj(DATA_PATH, "{}.fa".format(n)) for n in names]
     dnadiff_dist_matrix.write_fasta_names(names, files,
             ospj(TMP_BASENAME_DIR, "fasta_names.tsv"), "\t")
     ok_(os.path.exists(ospj(TMP_BASENAME_DIR, "fasta_names.tsv")))
Example #20
0
    def __init__(self, args):
        super().__init__()
        self.args = args
        self.device = torch.device(
            'cuda' if torch.cuda.is_available() else 'cpu')

        self.nets, self.nets_ema = build_model(args)
        # below setattrs are to make networks be children of Solver, e.g., for self.to(self.device)
        for name, module in self.nets.items():
            utils.print_network(module, name)
            setattr(self, name, module)
        for name, module in self.nets_ema.items():
            setattr(self, name + '_ema', module)

        if args.mode == 'train':
            self.optims = Munch()
            for net in self.nets.keys():
                if net == 'fan':
                    continue
                self.optims[net] = torch.optim.Adam(
                    params=self.nets[net].parameters(),
                    lr=args.f_lr if net == 'mapping_network' else args.lr,
                    betas=[args.beta1, args.beta2],
                    weight_decay=args.weight_decay)

            self.ckptios = [
                CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets.ckpt'),
                             **self.nets),
                CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets_ema.ckpt'),
                             **self.nets_ema),
                CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_optims.ckpt'),
                             **self.optims)
            ]
        else:
            self.ckptios = [
                CheckpointIO(ospj(args.checkpoint_dir, '{:06d}_nets_ema.ckpt'),
                             **self.nets_ema)
            ]

        self.to(self.device)
        for name, network in self.named_children():
            # Do not initialize the FAN parameters
            if ('ema' not in name) and ('fan' not in name):
                print('Initializing %s...' % name)
                network.apply(utils.he_init)

        ### modify def sample
        self._load_checkpoint(args.resume_iter)
Example #21
0
 def test_plot_dist_matrix_88_bins(self):
     """Plot a distance matrix with 88 samples"""
     names = ["sample0_gt1000_bin{}".format(i) for i in range(88)]
     matrix = np.genfromtxt(ospj(DATA_PATH,
                                 "expected_dist_matrix_88_bins.tsv"),
                            delimiter="\t")
     heatmap = ospj(TMP_BASENAME_DIR, "hclust_heatmap.pdf")
     dendrogram = ospj(TMP_BASENAME_DIR, "hclust_dendrogram.pdf")
     clustering = ospj(TMP_BASENAME_DIR, "clustering.tsv")
     clustering_threshold = 0.05
     dnadiff_dist_matrix.plot_dist_matrix(matrix, names, heatmap,
                                          dendrogram, clustering_threshold,
                                          clustering)
     ok_(os.path.exists(heatmap))
     ok_(os.path.exists(dendrogram))
     ok_(os.path.exists(clustering))
Example #22
0
 def render_story_list_all(self):
     filename = 'stories_all.html'
     template = self.env.get_template(filename)
     stories = []
     for author in self.story_data.values():
         stories.extend(author.stories)
     with open(ospj(OUTPUT_DIR, 'stories_all_date.html'), 'w') as f:
         print(template.render(stories=sorted(
             stories, key=attrgetter('date_sort_key')),
                               depth=0),
               file=f)
     with open(ospj(OUTPUT_DIR, 'stories_all_title.html'), 'w') as f:
         print(template.render(stories=sorted(
             stories, key=attrgetter('title_sort_key')),
                               depth=0),
               file=f)
Example #23
0
    def write_seqs(self, mytup):

        completename = ospj(self.aln_dir, self.glob_exon + ".unaligned.fasta")

        with open(completename, 'a') as f:

            f.write("{exon}\n{seq}\n".format(exon=mytup[0], seq=mytup[1]))
Example #24
0
    def check_corenames(self):
        names = self.corenames
        out = []

        for k, v in names.items():
            isreqs = check_reqs(self.int_reqs, v)
            islabel = v.__contains__(self.step)

            if isreqs and not islabel:
                stem = ospj(self.path, k)
                out.append((k, [
                    ospj(stem, i) for i in os.listdir(stem)
                    if re.findall(self.pattern, i)
                ]))

        return out
Example #25
0
    def export_agent(self, step):
        _dir = ospj(self._t_prof.path_agent_export_storage, str(self._t_prof.name), str(step))
        file_util.create_dir_if_not_exist(_dir)

        # """"""""""""""""""""""""""""
        # Deep CFR
        # """"""""""""""""""""""""""""
        if self._AVRG:
            MODE = EvalAgentDeepCFR.EVAL_MODE_AVRG_NET

            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            w = {EvalAgentDeepCFR.EVAL_MODE_AVRG_NET: self._pull_avrg_net_eval_strat()}
            eval_agent.update_weights(w)
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)

        # """"""""""""""""""""""""""""
        # SD-CFR
        # """"""""""""""""""""""""""""
        if self._SINGLE:
            MODE = EvalAgentDeepCFR.EVAL_MODE_SINGLE
            t_prof = copy.deepcopy(self._t_prof)
            t_prof.eval_modes_of_algo = [MODE]

            eval_agent = EvalAgentDeepCFR(t_prof=t_prof)
            eval_agent.reset()

            eval_agent._strategy_buffers = self._strategy_buffers  # could copy - it's just for the export, so it's ok
            eval_agent.set_mode(mode=MODE)
            eval_agent.store_to_disk(path=_dir, file_name="eval_agent" + MODE)
Example #26
0
def find_html_files(directory):
    # I'm inclined to use a generator for os.walk usage, but listdir
    # already returns a list so there isn't much benefit here
    return [
        ospj(directory, filename) for filename in listdir(directory)
        if filename.endswith(HTML_FILE_EXTENSION)
    ]
Example #27
0
    def test_find(self):
        out = StringIO()

        reffa = ospj(DATA_PATH, "human_g1k_v37_chr17.fa")
        mutations_tsv = ospj(DATA_PATH, "germline_mutations",
                             "T1_test_mutation.tsv")
        search_bam = ospj(DATA_PATH, "T1.bam")
        normal_bam = ospj(DATA_PATH, "N1.bam")

        find_revertant_mutations(reffa, mutations_tsv, search_bam, normal_bam,
                                 out)
        out.seek(0)
        test = pd.read_csv(out, sep="\t")
        truth = pd.read_csv(ospj(DATA_PATH, "output", "T1_test.tsv"), sep="\t")
        assert_frame_equal(truth.drop("MAF", axis=1), test.drop("MAF", axis=1))
        assert_array_almost_equal(truth.MAF, test.MAF, decimal=6)
Example #28
0
 def test_send_hosts(self):
     sync = Sync(self.prefs)
     self.assertEqual(sync.receive_new_hosts(), self.test_hosts)
     filename = ospj(self.work_dir, SYNC_RECEIVED_HOSTS)
     with open(filename) as f:
         hosts = [line.strip().split(':')[0] for line in f]
     self.assertEqual(self.test_hosts, hosts)
Example #29
0
 def export_all(self, iter_nr):
     """
     Exports all logs of the current run in Tensorboard's format and as json files.
     """
     if self._path_log_storage is not None:
         path_crayon = ospj(self._path_log_storage, str(self._name),
                            str(iter_nr), "crayon")
         path_json = ospj(self._path_log_storage, str(self._name),
                          str(iter_nr), "as_json")
         create_dir_if_not_exist(path=path_crayon)
         create_dir_if_not_exist(path=path_json)
         for e in self._experiments.values():
             e.to_zip(filename=ospj(path_crayon, e.xp_name + ".zip"))
             write_dict_to_file_json(dictionary=self._custom_logs,
                                     _dir=path_json,
                                     file_name="logs")
Example #30
0
    def celeba_preprocess(self):
        assert os.path.exists(
            self.celeba_image_dir
        ), f'Image data directory does not exist: {self.celeba_image_dir}'
        assert os.path.exists(
            self.celeba_attr_file
        ), f'Attribute file does not exist: {self.celeba_attr_file}'
        with open(self.celeba_attr_file, 'r') as f:
            img_name_attrs_lines = f.readlines()
        all_attr_names = img_name_attrs_lines[1].split()
        for i, attr_name in enumerate(all_attr_names):
            self.attr2idx[attr_name] = i
            self.idx2attr[i] = attr_name
        lines = img_name_attrs_lines[2:]

        random.seed(1234)
        random.shuffle(lines)
        for i, line in enumerate(lines):
            split = line.strip().split()
            filename = split[0]
            values = split[1:]
            label = []
            for attr_name in self.selected_attrs:
                idx = self.attr2idx[attr_name]
                label.append(values[idx] == '1')
            filepath = ospj(self.celeba_image_dir, filename)
            if i < 2000:
                self.celeba_test_dataset.append([filepath, label])
            else:  # 28000
                self.celeba_train_dataset.append([filepath, label])
        print(
            f'Finished preprocessing the {self.celeba_dataset_name} dataset...'
        )
Example #31
0
 def setUp(self):
     self.work_dir = mkdtemp()
     self.warned_hosts_filename = ospj(self.work_dir, ALLOWED_WARNED_HOSTS)
     self.prefs = Prefs()
     self.prefs._Prefs__data['WORK_DIR'] = self.work_dir
     self.prefs._Prefs__data['ALLOWED_HOSTS_HOSTNAME_LOOKUP'] = 'false'
     self.allowed_hosts = AllowedHosts(self.prefs)
Example #32
0
 def test_send_hosts(self):
     sync = Sync(self.prefs)
     self.assertEqual(sync.receive_new_hosts(), self.test_hosts)
     filename = ospj(self.work_dir, SYNC_RECEIVED_HOSTS)
     with open(filename) as f:
         hosts = [line.strip().split(':')[0] for line in f]
     self.assertEqual(self.test_hosts, hosts)
Example #33
0
    def test_resolve_fetch_http_basic_auth_get(self):
        logger.info(
            self.getTestHeader('test resolve fetch http basic auth GET'))
        try:
            patched_requests_get = None

            def mocked_request_auth_get_success(*args, **kwargs):
                args[0].auth = None
                patched_requests_get.stop()
                return BaseTest.MockResponse({}, 200)

            patched_requests_get = mock.patch.multiple(
                "bdbag.fetch.transports.fetch_http.requests.Session",
                get=mocked_request_auth_get_success,
                auth=None,
                create=True)

            patched_requests_get.start()
            bdb.resolve_fetch(self.test_bag_fetch_http_dir,
                              keychain_file=ospj(self.test_config_dir,
                                                 'test-keychain-1.json'),
                              cookie_scan=False)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False)
        except Exception as e:
            self.fail(bdbag.get_typed_exception(e))
Example #34
0
    def get_split_info(self):
        '''
        Helper method to read image, attrs, objs samples

        Returns
            train_data, val_data, test_data: List of tuple of image, attrs, obj
        '''
        data = torch.load(ospj(self.root, 'metadata_{}.t7'.format(self.split)))

        train_data, val_data, test_data = [], [], []

        for instance in data:
            image, attr, obj, settype = instance['image'], instance['attr'], \
                instance['obj'], instance['set']
            curr_data = [image, attr, obj]

            if attr == 'NA' or (attr,
                                obj) not in self.pairs or settype == 'NA':
                # Skip incomplete pairs, unknown pairs and unknown set
                continue

            if settype == 'train':
                train_data.append(curr_data)
            elif settype == 'val':
                val_data.append(curr_data)
            else:
                test_data.append(curr_data)

        return train_data, val_data, test_data
Example #35
0
 def test_read_with_create_default_keychain(self):
     logger.info(self.getTestHeader('read keychain with create default if missing'))
     try:
         keychain_file = ospj(self.test_config_dir, ".bdbag", 'keychain.json')
         keychain.read_keychain(keychain_file=keychain_file)
     except Exception as e:
         self.fail(get_typed_exception(e))
Example #36
0
    def sample(self, loaders):
        args = self.args
        nets_ema = self.nets_ema
        os.makedirs(args.result_dir, exist_ok=True)
        self._load_checkpoint(args.resume_iter)

        src = next(InputFetcher(loaders.src, None, args.latent_dim, 'test'))
        ref = next(InputFetcher(loaders.ref, None, args.latent_dim, 'test'))

        fname = ospj(args.result_dir, 'reference.jpg')
        print('Working on {}...'.format(fname))
        utils.translate_using_reference(nets_ema, args, src.x, ref.x, ref.y, fname)

        fname = ospj(args.result_dir, 'video_ref.mp4')
        print('Working on {}...'.format(fname))
        utils.video_ref(nets_ema, args, src.x, ref.x, ref.y, fname)
Example #37
0
 def test_create_bag_duplicate_manifest_entry_from_remote(self):
     logger.info(
         self.getTestHeader(
             'create bag with fetch.txt entry for local file'))
     try:
         duplicate_file = "test-fetch-http.txt"
         shutil.copy(ospj(self.test_http_dir, duplicate_file),
                     ospj(self.test_data_dir, duplicate_file))
         with self.assertRaises(bdbagit.BagManifestConflict) as ar:
             bdb.make_bag(self.test_data_dir,
                          remote_file_manifest=ospj(
                              self.test_config_dir,
                              'test-fetch-manifest.json'))
         logger.error(bdbag.get_typed_exception(ar.exception))
     except Exception as e:
         self.fail(bdbag.get_typed_exception(e))
Example #38
0
    def add_new_iteration_strategy_model(self, owner, adv_net_state_dict,
                                         cfr_iter):
        iter_strat = IterationStrategy(t_prof=self._t_prof,
                                       env_bldr=self._env_bldr,
                                       owner=owner,
                                       device=self._t_prof.device_inference,
                                       cfr_iter=cfr_iter)

        iter_strat.load_net_state_dict(
            self._ray.state_dict_to_torch(
                adv_net_state_dict, device=self._t_prof.device_inference))
        self._strategy_buffers[iter_strat.owner].add(
            iteration_strat=iter_strat)

        #  Store to disk
        if self._t_prof.export_each_net:
            path = ospj(self._t_prof.path_strategy_nets, self._t_prof.name)
            file_util.create_dir_if_not_exist(path)
            file_util.do_pickle(obj=iter_strat.state_dict(),
                                path=path,
                                file_name=str(iter_strat.cfr_iteration) +
                                "_P" + str(iter_strat.owner) + ".pkl")

        if self._t_prof.log_verbose:
            if owner == 1:
                # Logs
                process = psutil.Process(os.getpid())
                self.add_scalar(self._exp_mem_usage,
                                "Debug/Memory Usage/Chief", cfr_iter,
                                process.memory_info().rss)
Example #39
0
    def ffhq_preprocess(self):
        assert os.path.exists(
            self.ffhq_image_dir
        ), f'Image data directory does not exist: {self.ffhq_image_dir}'
        assert os.path.exists(
            self.ffhq_attr_file
        ), f'Attribute file does not exist: {self.ffhq_attr_file}'
        with open(self.ffhq_attr_file, 'r') as f:
            img_name_attrs_lines = f.readlines()

        lines = img_name_attrs_lines[2:]
        for i, line in enumerate(lines):
            split = line.strip().split()
            filename = split[0]
            values = split[1:]
            label = []
            for attr_name in self.selected_attrs:
                idx = self.attr2idx[attr_name]
                label.append(values[idx] == '1')
            img_sub_dir = f'{(i // 1000):02d}000'
            filepath = ospj(self.ffhq_image_dir, img_sub_dir, filename)
            if i >= 66000:
                self.ffhq_test_dataset.append([filepath, label])
            else:  # 4000
                self.ffhq_train_dataset.append([filepath, label])
        print(
            f'Finished preprocessing the {self.ffhq_dataset_name} dataset...')
Example #40
0
 def test_read_with_update_base_config(self):
     logger.info(self.getTestHeader('read config with auto-upgrade version'))
     try:
         config_file = ospj(self.test_config_dir, 'base-config.json')
         bdbcfg.read_config(config_file=config_file, auto_upgrade=True)
     except Exception as e:
         self.fail(get_typed_exception(e))
Example #41
0
 def test_create_keychain(self):
     logger.info(self.getTestHeader('create keychain'))
     try:
         keychain_file = ospj(self.test_config_dir, ".bdbag", 'keychain.json')
         keychain.write_keychain(keychain_file=keychain_file)
     except Exception as e:
         self.fail(get_typed_exception(e))
Example #42
0
 def test_read_with_create_default_config(self):
     logger.info(self.getTestHeader('read config with create default if missing'))
     try:
         config_file = ospj(self.test_config_dir, ".bdbag", 'bdbag.json')
         bdbcfg.read_config(config_file=config_file)
     except Exception as e:
         self.fail(get_typed_exception(e))
Example #43
0
 def test_create_config(self):
     logger.info(self.getTestHeader('create config'))
     try:
         config_file = ospj(self.test_config_dir, ".bdbag", 'bdbag.json')
         bdbcfg.write_config(config_file=config_file)
     except Exception as e:
         self.fail(get_typed_exception(e))
Example #44
0
    def __get_last_offset(self):
        offset_file = ospj(self.work_dir, self.offset_file)
        first_line_of_file = ""
        offset = 0
        try:
            fh = open(offset_file, 'r')
            first_line_of_file = self.__get_first_line(fh)
            offset_line = fh.readline()
            if offset_line is None or offset_line == '':
                offset = 0
            else:
                offset = int(offset_line)
            fh.close()
        except IOError:
            pass

        self.log_message.send_message(
            'debug',
            '__get_last_offset():  first_line: {}  offset {}'.format(
                first_line_of_file,
                offset
            )
        )

        return first_line_of_file, offset
Example #45
0
    def test_resolve_fetch_http_auth_token_get(self):
        logger.info(self.getTestHeader('test resolve fetch http token auth'))
        try:
            patched_requests_get_auth = None

            def mocked_request_auth_token_get_success(*args, **kwargs):
                args[0].auth = None
                args[0].headers = {}
                patched_requests_get_auth.stop()
                return args[0].get(args[1], **kwargs)

            patched_requests_get_auth = mock.patch.multiple(
                "bdbag.fetch.transports.fetch_http.requests.Session",
                get=mocked_request_auth_token_get_success,
                auth=None,
                create=True)

            patched_requests_get_auth.start()
            bdb.resolve_fetch(self.test_bag_fetch_http_dir,
                              keychain_file=ospj(self.test_config_dir,
                                                 'test-keychain-6.json'),
                              cookie_scan=False)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False)
        except Exception as e:
            self.fail(bdbag.get_typed_exception(e))
Example #46
0
 def setUp(self):
     self.work_dir = mkdtemp()
     self.warned_hosts_filename = ospj(self.work_dir, ALLOWED_WARNED_HOSTS)
     self.prefs = Prefs()
     self.prefs._Prefs__data['WORK_DIR'] = self.work_dir
     self.prefs._Prefs__data['ALLOWED_HOSTS_HOSTNAME_LOOKUP'] = 'false'
     self.allowed_hosts = AllowedHosts(self.prefs)
Example #47
0
    def test_resolve_fetch_http_auth_token_get_with_disallowed_redirects(self):
        logger.info(
            self.getTestHeader(
                'test resolve fetch http token auth with allowed redirect'))
        try:
            patched_requests_get_auth = None

            def mocked_request_auth_token_get_success(*args, **kwargs):
                headers = args[0].headers or {}
                headers.update({"Location": args[1]})
                args[0].auth = None
                args[0].headers = {}
                patched_requests_get_auth.stop()
                return BaseTest.MockResponse({}, 302, headers=headers)

            patched_requests_get_auth = mock.patch.multiple(
                "bdbag.fetch.transports.fetch_http.requests.Session",
                get=mocked_request_auth_token_get_success,
                auth=None,
                create=True)

            patched_requests_get_auth.start()
            bdb.resolve_fetch(self.test_bag_fetch_http_dir,
                              keychain_file=ospj(self.test_config_dir,
                                                 'test-keychain-7.json'),
                              cookie_scan=False)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=True)
            bdb.validate_bag(self.test_bag_fetch_http_dir, fast=False)
            output = self.stream.getvalue()
            self.assertExpectedMessages([
                "Authorization bearer token propagation on redirect is disabled"
            ], output)
        except Exception as e:
            self.fail(bdbag.get_typed_exception(e))
Example #48
0
def main(args):
    # Get fasta names

    # Get basename from fasta files and see if those are unique
    fasta_names_ref = [
        ".".join(os.path.basename(f).split(".")[0:-1])
        for f in args.fasta_files_ref
    ]
    fasta_names_mag = [
        os.path.basename(f).split(".")[0] for f in args.fasta_files_mag
    ]

    print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
        "ref_fasta_name", "mag_fasta_name", "aligned_bases_ref",
        "aligned_perc_ref", "aligned_bases_mag", "aligned_perc_mag",
        "avg_identity"))
    for ref_fasta_name in fasta_names_ref:
        for mag_fasta_name in fasta_names_mag:
            repfile = ospj(
                args.input_dir,
                "{fn1}_vs_{fn2}.report".format(fn1=ref_fasta_name,
                                               fn2=mag_fasta_name))
            mumr = MUMmerReport(repfile)
            if mumr.aligned_perc_mag >= args.min_coverage:
                print("{}\t{}\t{}\t{}\t{}\t{}\t{}".format(
                    ref_fasta_name, mag_fasta_name, mumr.aligned_bases_ref,
                    mumr.aligned_perc_ref, mumr.aligned_bases_mag,
                    mumr.aligned_perc_mag, mumr.avg_identity))
Example #49
0
 def test_update_existing_archive(self):
     args = ARGS + [ospj(self.test_archive_dir, 'test-bag.zip'), '--update']
     logfile.writelines(
         self.getTestHeader('--update an existing archive file', args))
     self._test_bad_argument_error_handling(
         args,
         ["Error: An existing bag archive cannot be updated in-place"])
Example #50
0
 def test_update_keychain_add_multi(self):
     logger.info(self.getTestHeader('test update keychain add multi'))
     keychain_file = ospj(self.test_config_dir, 'test-keychain-8.json')
     added_entries = [{
         "uri": "https://foo.bar.com/",
         "auth_type": "http-basic",
         "auth_params": {
             "auth_method": "get",
             "username": "******",
             "password": "******"
         }
     }, {
         "uri": "https://foo.bar.com/",
         "auth_type": "bearer-token",
         "auth_params": {
             "token": "bar",
             "allow_redirects_with_token": "True",
             "additional_request_headers": {
                 "X-Requested-With": "XMLHttpRequest"
             }
         }
     }]
     try:
         keychain = read_keychain(keychain_file, create_default=False)
         entries = get_auth_entries("https://foo.bar.com/", keychain)
         self.assertFalse(entries)
         updated_keychain = update_keychain(added_entries,
                                            keychain_file=keychain_file)
         logger.info("Updated keychain: %s" % json.dumps(updated_keychain))
         entries = get_auth_entries("https://foo.bar.com/",
                                    updated_keychain)
         self.assertTrue(len(entries) == 2)
     except Exception as e:
         self.fail(bdbag.get_typed_exception(e))
Example #51
0
 def test_update_keychain_single(self):
     logger.info(self.getTestHeader('test update keychain single'))
     keychain_file = ospj(self.test_config_dir, 'test-keychain-8.json')
     updated_entry = {
         "uri": "https://raw.githubusercontent.com/",
         "auth_type": "http-basic",
         "auth_params": {
             "auth_method": "get",
             "username": "******",
             "password": "******"
         }
     }
     try:
         updated_keychain = update_keychain(updated_entry,
                                            keychain_file=keychain_file)
         logger.info("Updated keychain: %s" % json.dumps(updated_keychain))
         entries = get_auth_entries("https://raw.githubusercontent.com/",
                                    updated_keychain)
         found = False
         for entry in entries:
             if entry["auth_type"] == "http-basic":
                 if entry["auth_params"]["username"] == "foo" and entry[
                         "auth_params"]["password"] == "bar!":
                     found = True
                     break
         self.assertTrue(found)
     except Exception as e:
         self.fail(bdbag.get_typed_exception(e))
Example #52
0
 def test_correct_mutation(self):
     mutations = mutation.parse_vcf(ospj(DATA_PATH, "mutation_tests.vcf"))
     assert_equals(mutations[0].type, ".")
     assert_equals(mutations[0].change, "G")
     assert_equals(mutations[2].type, "-")
     assert_equals(mutations[2].change, "A")
     assert_equals(mutations[3].type, "+")
     assert_equals(mutations[3].change, "A")
Example #53
0
    def test_normal_filter(self):
        nmuts = mutation.parse_vcf(ospj(DATA_PATH, "normal_mutation_tests.vcf"))
        for m in nmuts:
            m.ref = "C"
            m.count = 21
            m.cov = 100
        nmuts = mutation.MutationsAtSinglePosition.from_mutation_list(nmuts)

        muts = mutation.parse_vcf(ospj(DATA_PATH, "mutation_tests.vcf"))
        for m in muts:
            m.ref = "C"
            m.count = 21
            m.cov = 100
        muts = mutation.MutationsAtSinglePosition.from_mutation_list(muts)

        filt_muts = muts.filter_against_normal(nmuts)
        assert_equals(len(filt_muts), 1)
Example #54
0
 def test_mpileup_test2(self):
     test = open(ospj(DATA_PATH, "mpileup_test2.tsv")).read()
     bpdf = sufam.__main__.get_baseparser_extended_df("test", [mpileup_parser.parse(test)], "G", "GAA")
     assert_equals(int(bpdf['cov'].iloc[0]), int(bpdf.G.iloc[0]))
     assert_equals(test.count(",") + test.count("."), int(bpdf['cov'].iloc[0]))
     assert_almost_equals(0.4324, float(bpdf.val_maf.iloc[0]), places=3)
     assert_almost_equals(0.4324, float(bpdf.most_common_indel_maf.iloc[0]), places=3)
     assert_equals("+", bpdf.most_common_indel_type.iloc[0])
Example #55
0
def parallel_run_dnadiff_pairwise(fasta_files, fasta_names, output_folder):
    """Runs MUMmer's dnadiff pairwise for given fasta_files using
    multiprocessing. Uses fasta_names to organize output folders for dnadiff as
    fastaname1_vs_fastaname2."""
    assert len(fasta_files) == len(fasta_names)

    pool = Pool()
    args = []
    for i in range(len(fasta_files)):
        for j in range(i + 1, len(fasta_files)):
            out_dir = ospj(output_folder, "{fn1}_vs_{fn2}".format(
                fn1=fasta_names[i], fn2=fasta_names[j]))
            dir_utils.mkdir_p(out_dir)
            args.append((fasta_files[i], fasta_files[j], ospj(out_dir, "out")))
    pool.map(run_dnadiff_star, args)
    pool.close()
    pool.join()
Example #56
0
 def test_mpileup_test3(self):
     test = open(ospj(DATA_PATH, "mpileup_test3.tsv")).read()
     bpdf = sufam.__main__.get_baseparser_extended_df("test", [mpileup_parser.parse(test)], "G", "A")
     assert_equals(int(bpdf['cov'].iloc[0]), int(bpdf.G.iloc[0]) + int(bpdf.A.iloc[0]) + int(bpdf["T"].iloc[0]))
     assert_equals(1, int(bpdf["T"].iloc[0]))
     assert_equals("AA", bpdf.most_common_indel.iloc[0])
     assert_equals("+", bpdf.most_common_indel_type.iloc[0])
     assert_almost_equals(0.0139, float(bpdf.val_maf.iloc[0]), places=3)
     assert_almost_equals(0.0139, float(bpdf.most_common_al_maf.iloc[0]), places=3)
Example #57
0
 def test_find_best_per_group(self):
     fasta_files = [
         ospj(DATA_PATH, "sample0_gt300.fa"),
         ospj(DATA_PATH, "sample0_gt500.fa"),
     ]
     args = collections.namedtuple('Arguments', " ".join(["output_folder",
         "scg_tsvs", "fasta_files", "names", "max_missing_scg",
         "max_multicopy_scg", "groups"]))
     groupargs = args(
         output_folder=TMP_BASENAME_DIR,
         scg_tsvs=[os.path.splitext(f)[0] + "_scg.tsv" for f in fasta_files],
         fasta_files=fasta_files,
         names=[os.path.splitext(os.path.basename(f))[0] for f in fasta_files],
         max_missing_scg=2,
         max_multicopy_scg=4,
         groups=("gt300", "gt500")
     )
     main(groupargs)
Example #58
0
 def test_extract_bag_archive_tar(self):
     logger.info(self.getTestHeader('extract bag tar format'))
     try:
         bag_path = bdb.extract_bag(ospj(self.test_archive_dir, 'test-bag.tar'), temp=True)
         self.assertTrue(ospe(bag_path))
         self.assertTrue(bdb.is_bag(bag_path))
         bdb.cleanup_bag(os.path.dirname(bag_path))
     except Exception as e:
         self.fail(bdbag.get_named_exception(e))
Example #59
0
 def test_validate_profile_serialization(self):
     logger.info(self.getTestHeader('validate profile serialization'))
     try:
         bag_path = ospj(self.test_archive_dir, 'test-bag.zip')
         bdb.validate_bag_serialization(
             bag_path,
             bag_profile_path='https://raw.githubusercontent.com/ini-bdds/bdbag/master/profiles/bdbag-profile.json')
     except Exception as e:
         self.fail(bdbag.get_named_exception(e))
 def setUp(self):
     data_dir = ospj(dirname(abspath(__file__)), 'data')
     # Initialize minimal preferences dict: just enough
     # for the AllowedHosts constructor
     prefs = {
         'WORK_DIR': data_dir,
         'ALLOWED_HOSTS_HOSTNAME_LOOKUP': 'false',
     }
     self.allowed_hosts = AllowedHosts(prefs)