Esempio n. 1
0
 def __init__(self,
              params: WorkerParams,
              id_: Id = None,
              bias: t.Union[str, Bias, None] = None,
              bias_input_type: str = 'tsv'):
     """
     :param params: A dataclass containing all the parameters necessary to run the Worker.
     :param id_: Unique identifier of a Worker. If None will default to `id(self)`.
     :param bias: Explicitly provide bias.
     :param bias_input_type: Type of the object provided in `bias` argument.
     Can be either `tsv` or `dat`.
     :raises ValueError: If the `validate_config` method returns `False`.
     """
     super().__init__(params, id_)
     if params.config.mode.field_values[0] != 'ADAPT':
         raise ValueError(
             f'Incorrect mode in config: {params.config.mode.field_values[0]} != ADAPT'
         )
     self.bias = bias
     if isinstance(bias, str):
         self.bias = Bias()
         if bias_input_type == 'tsv':
             self.bias.read_bias_df(bias, overwrite=True)
         elif bias_input_type == 'dat':
             self.bias.read_adapt_output(bias, overwrite=True)
         else:
             raise ValueError(
                 f'Expected `bias_input_type` either `tsv` or `dat`. Got {bias_input_type} instead'
             )
     logging.debug(f'ADAPT {self.id}: init')
Esempio n. 2
0
 def get_bias(self, worker: MC) -> t.Optional[pd.DataFrame]:
     bias_path = worker.params.config.get_field_value('Bias_Input_File')
     if not bias_path:
         logging.debug(f'Constrainer {self.id} -- no bias for worker {worker.id}')
         return None
     bias = Bias().read_adapt_output(bias_path)
     if self.ref_states is not None:
         bias = bias.center_at_ref(self.ref_states, overwrite=False)
     return bias.bias
Esempio n. 3
0
 def test_two_pos():
     df_ini = pd.DataFrame(
         {'step': [1] * 3 + [2] * 3,
          'var': ['1-X-1-X', '1-X-2-Y', '1-X-2-Z'] * 2,
          'bias': [10, 2, 3, 20, 4, 6]}
     )
     b_ini = Bias(df_ini)
     upd = b_ini.center_at_ref({'1': 'X', '2': 'Y'}, overwrite=False)
     assert list(upd.bias['bias']) == [0, 0, 1, 0, 0, 2]
Esempio n. 4
0
 def test_one_pos():
     df_ini = pd.DataFrame(
         {'step': [1, 1, 2, 2],
          'var': ['1-X-1-X', '1-X-1-Y'] * 2,
          'bias': [1, 1, 2, 2]}
     )
     b_ini = Bias(df_ini)
     with pytest.raises(KeyError):
         b_ini.center_at_ref({'2': 'X'}, overwrite=False)
     upd = b_ini.center_at_ref({'1': 'X'}, overwrite=False)
     assert len(upd) == len(b_ini)
     assert list(upd.bias['step']) == [1, 1, 2, 2]
     assert list(upd.bias['bias']) == [0, 0, 0, 0]
Esempio n. 5
0
def test_bias_init(bias_dat, bias_tsv):
    # Empty init
    assert Bias().bias is None

    # Non-empty init
    df = pd.DataFrame(StringIO(bias_tsv))
    assert Bias(df).bias is not None

    # Init from dat
    with NamedTemporaryFile('w') as f:
        f.write(bias_dat)
        f.seek(0)
        bias = Bias()
        bias.read_adapt_output(f.name)
        assert bias.bias is not None

    # Init from tsv
    bias = Bias()
    bias.read_bias_df(StringIO(bias_tsv))
    assert bias.bias is not None
Esempio n. 6
0
    def test_min_step_larger():
        df_upd = pd.DataFrame(
            {'step': [3, 3, 4, 4],
             'var': ['X', 'Y', 'X', 'Y'],
             'bias': [1, -1, 2, -2]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 1, -1, 2, -2]
Esempio n. 7
0
def random_bias() -> Bias:
    num_steps = np.random.randint(1, 10)
    step_size = np.random.randint(3, 50)
    variables = [random_variable() for _ in range(step_size)]
    dfs = []
    for step in range(1, num_steps + 1):
        dfs.append(pd.DataFrame({
            'step': [step] * step_size,
            'var': variables,
            'bias': np.random.rand(step_size)
        }))
    return Bias(pd.concat(dfs))
Esempio n. 8
0
    def test_less_upd_vars():
        df_upd = pd.DataFrame(
            {'step': [1, 2],
             'var': ['X', 'X'],
             'bias': [1, -1]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 6
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 4]
        assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'X']
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, 0]
Esempio n. 9
0
def test_random_bias_update(random_bias):
    b = random_bias.bias
    first = min(b['step'])
    last = max(b['step'])
    if first != last:
        b1 = Bias(b)
        # Update with the same bias
        upd = b1.update(random_bias, overwrite=False).bias
        # Nothing was overwritten
        assert (b1.bias == b).all().all()
        # The length is doubled
        assert len(upd) == len(b) * 2
        # The largest step of the update is twice the largest step of initial
        assert max(upd['step']) == max(b['step']) * 2
        # The steps of the update can be inferred via the last step of the original
        assert list(upd['step']) == list(b['step']) + list(b['step'] + last)
        # The bias value of the last step is twice this value of the original (1 + 1 = 2)
        assert list(upd['bias'])[-1] == list(b['bias'])[-1] * 2
    else:
        with pytest.raises(ValueError):
            Bias(random_bias.bias).update(random_bias)

    with pytest.raises(ValueError):
        b_ = Bias(b.iloc[:len(b) // 2])
        Bias(b).update(b_)
Esempio n. 10
0
class ADAPT(Worker):
    """
    A subclass of a `Worker` where the mode `ADAPT` is explicitly specified in config.
    Compared to base `Worker`, has one extra attribute and attribute to store and manipulate bias:
    `bias` and `store_bias`.
    """
    def __init__(self,
                 params: WorkerParams,
                 id_: Id = None,
                 bias: t.Union[str, Bias, None] = None,
                 bias_input_type: str = 'tsv'):
        """
        :param params: A dataclass containing all the parameters necessary to run the Worker.
        :param id_: Unique identifier of a Worker. If None will default to `id(self)`.
        :param bias: Explicitly provide bias.
        :param bias_input_type: Type of the object provided in `bias` argument.
        Can be either `tsv` or `dat`.
        :raises ValueError: If the `validate_config` method returns `False`.
        """
        super().__init__(params, id_)
        if params.config.mode.field_values[0] != 'ADAPT':
            raise ValueError(
                f'Incorrect mode in config: {params.config.mode.field_values[0]} != ADAPT'
            )
        self.bias = bias
        if isinstance(bias, str):
            self.bias = Bias()
            if bias_input_type == 'tsv':
                self.bias.read_bias_df(bias, overwrite=True)
            elif bias_input_type == 'dat':
                self.bias.read_adapt_output(bias, overwrite=True)
            else:
                raise ValueError(
                    f'Expected `bias_input_type` either `tsv` or `dat`. Got {bias_input_type} instead'
                )
        logging.debug(f'ADAPT {self.id}: init')

    def store_bias(self):
        """
        Parse and store bias.
        Path of to the bias provided via config and automatically created during the `setup_io` call.
        :raises ValueError: If no bias path is found in `config` or the path does not exist.
        """
        bias_path = self.params.config.get_field_value('Adapt_Output_File')
        if bias_path is None:
            raise ValueError(
                f'ADAPT {self.id}: `Adapt_Output_File` is empty in the config')
        if not Path(bias_path).exists():
            raise ValueError(
                f'ADAPT {self.id}: `Adapt_Output_File` {bias_path} does not exist'
            )
        bias = Bias().read_adapt_output(bias_path, overwrite=True)
        if self.bias is None or self.bias.bias is None:
            self.bias = bias
            logging.info(f'ADAPT {self.id}: stored new bias {bias_path}')
        else:
            self.bias.update(bias, overwrite=True)
            logging.info(
                f'ADAPT {self.id}: updated existing bias with {bias_path}')
Esempio n. 11
0
    def test_more_upd_vars():
        # variable is ignored
        df_upd = pd.DataFrame(
            {'step': [1, 1, 1, 2, 2, 2],
             'var': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],
             'bias': [1, -1, -2, 2, -2, 3]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y']
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]
Esempio n. 12
0
    def test_common_case():
        df_upd = pd.DataFrame(
            {'step': [1, 1, 2, 2],
             'var': ['X', 'Y', 'X', 'Y'],
             'bias': [1, -1, 2, -2]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert (b_ini.bias == upd.bias[:4]).all().all()
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['var']) == ['X', 'Y'] * 4
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]
Esempio n. 13
0
 def __call__(self, worker: MC) -> MC:
     if worker.summary is None:
         return worker
     bias_path = worker.params.config.get_field_value('Bias_Input_File')
     if not bias_path or not Path(bias_path).exists():
         logging.warning(f'BestStateKeeper {self.id} -- no bias for worker {worker.id}')
         return worker
     bias = Bias().read_adapt_output(bias_path)
     if worker.seqs is None:
         logging.warning(f'BestStateKeeper {self.id} -- no seqs for worker {worker.id}')
         return worker
     if worker.id not in self._memory:
         self._memory[worker.id] = MCState(worker.summary, bias.bias, worker.seqs)
     else:
         prev_cov = self._memory[worker.id].Summary.coverage
         curr_cov = worker.summary.coverage
         if curr_cov > prev_cov:
             self._memory[worker.id] = MCState(worker.summary, bias.bias, worker.seqs)
     if self.dump_to_workdir:
         bias.dump(f'{worker.params.working_dir}/{self.dump_name_bias}')
         worker.seqs.to_csv(
             f'{worker.params.working_dir}/{self.dump_name_seq_count}',
             sep='\t', index=False)
     return worker
Esempio n. 14
0
 def store_bias(self):
     """
     Parse and store bias.
     Path of to the bias provided via config and automatically created during the `setup_io` call.
     :raises ValueError: If no bias path is found in `config` or the path does not exist.
     """
     bias_path = self.params.config.get_field_value('Adapt_Output_File')
     if bias_path is None:
         raise ValueError(
             f'ADAPT {self.id}: `Adapt_Output_File` is empty in the config')
     if not Path(bias_path).exists():
         raise ValueError(
             f'ADAPT {self.id}: `Adapt_Output_File` {bias_path} does not exist'
         )
     bias = Bias().read_adapt_output(bias_path, overwrite=True)
     if self.bias is None or self.bias.bias is None:
         self.bias = bias
         logging.info(f'ADAPT {self.id}: stored new bias {bias_path}')
     else:
         self.bias.update(bias, overwrite=True)
         logging.info(
             f'ADAPT {self.id}: updated existing bias with {bias_path}')
Esempio n. 15
0
 def test_upd_empty():
     upd = b_ini.update(Bias())
     assert upd is b_ini
Esempio n. 16
0
def test_manual_bias_update(df_ini):
    b_ini = Bias(df_ini)

    def test_upd_empty():
        upd = b_ini.update(Bias())
        assert upd is b_ini

    def test_common_case():
        df_upd = pd.DataFrame(
            {'step': [1, 1, 2, 2],
             'var': ['X', 'Y', 'X', 'Y'],
             'bias': [1, -1, 2, -2]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert (b_ini.bias == upd.bias[:4]).all().all()
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['var']) == ['X', 'Y'] * 4
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]

    def test_less_upd_vars():
        df_upd = pd.DataFrame(
            {'step': [1, 2],
             'var': ['X', 'X'],
             'bias': [1, -1]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 6
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 4]
        assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'X']
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, 0]

    def test_more_upd_vars():
        # variable is ignored
        df_upd = pd.DataFrame(
            {'step': [1, 1, 1, 2, 2, 2],
             'var': ['X', 'Y', 'Z', 'X', 'Y', 'Z'],
             'bias': [1, -1, -2, 2, -2, 3]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y']
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]

    def test_min_step_larger():
        df_upd = pd.DataFrame(
            {'step': [3, 3, 4, 4],
             'var': ['X', 'Y', 'X', 'Y'],
             'bias': [1, -1, 2, -2]}
        )
        upd = b_ini.update(Bias(df_upd), overwrite=False)

        assert len(upd) == 8
        assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4]
        assert list(upd.bias['bias']) == [0, 0, 1, -1, 1, -1, 2, -2]

    test_upd_empty()
    test_common_case()
    test_less_upd_vars()
    test_more_upd_vars()
    test_min_step_larger()