def __init__(self, params: WorkerParams, id_: Id = None, bias: t.Union[str, Bias, None] = None, bias_input_type: str = 'tsv'): """ :param params: A dataclass containing all the parameters necessary to run the Worker. :param id_: Unique identifier of a Worker. If None will default to `id(self)`. :param bias: Explicitly provide bias. :param bias_input_type: Type of the object provided in `bias` argument. Can be either `tsv` or `dat`. :raises ValueError: If the `validate_config` method returns `False`. """ super().__init__(params, id_) if params.config.mode.field_values[0] != 'ADAPT': raise ValueError( f'Incorrect mode in config: {params.config.mode.field_values[0]} != ADAPT' ) self.bias = bias if isinstance(bias, str): self.bias = Bias() if bias_input_type == 'tsv': self.bias.read_bias_df(bias, overwrite=True) elif bias_input_type == 'dat': self.bias.read_adapt_output(bias, overwrite=True) else: raise ValueError( f'Expected `bias_input_type` either `tsv` or `dat`. Got {bias_input_type} instead' ) logging.debug(f'ADAPT {self.id}: init')
def get_bias(self, worker: MC) -> t.Optional[pd.DataFrame]: bias_path = worker.params.config.get_field_value('Bias_Input_File') if not bias_path: logging.debug(f'Constrainer {self.id} -- no bias for worker {worker.id}') return None bias = Bias().read_adapt_output(bias_path) if self.ref_states is not None: bias = bias.center_at_ref(self.ref_states, overwrite=False) return bias.bias
def test_two_pos(): df_ini = pd.DataFrame( {'step': [1] * 3 + [2] * 3, 'var': ['1-X-1-X', '1-X-2-Y', '1-X-2-Z'] * 2, 'bias': [10, 2, 3, 20, 4, 6]} ) b_ini = Bias(df_ini) upd = b_ini.center_at_ref({'1': 'X', '2': 'Y'}, overwrite=False) assert list(upd.bias['bias']) == [0, 0, 1, 0, 0, 2]
def test_one_pos(): df_ini = pd.DataFrame( {'step': [1, 1, 2, 2], 'var': ['1-X-1-X', '1-X-1-Y'] * 2, 'bias': [1, 1, 2, 2]} ) b_ini = Bias(df_ini) with pytest.raises(KeyError): b_ini.center_at_ref({'2': 'X'}, overwrite=False) upd = b_ini.center_at_ref({'1': 'X'}, overwrite=False) assert len(upd) == len(b_ini) assert list(upd.bias['step']) == [1, 1, 2, 2] assert list(upd.bias['bias']) == [0, 0, 0, 0]
def test_bias_init(bias_dat, bias_tsv): # Empty init assert Bias().bias is None # Non-empty init df = pd.DataFrame(StringIO(bias_tsv)) assert Bias(df).bias is not None # Init from dat with NamedTemporaryFile('w') as f: f.write(bias_dat) f.seek(0) bias = Bias() bias.read_adapt_output(f.name) assert bias.bias is not None # Init from tsv bias = Bias() bias.read_bias_df(StringIO(bias_tsv)) assert bias.bias is not None
def test_min_step_larger(): df_upd = pd.DataFrame( {'step': [3, 3, 4, 4], 'var': ['X', 'Y', 'X', 'Y'], 'bias': [1, -1, 2, -2]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['bias']) == [0, 0, 1, -1, 1, -1, 2, -2]
def random_bias() -> Bias: num_steps = np.random.randint(1, 10) step_size = np.random.randint(3, 50) variables = [random_variable() for _ in range(step_size)] dfs = [] for step in range(1, num_steps + 1): dfs.append(pd.DataFrame({ 'step': [step] * step_size, 'var': variables, 'bias': np.random.rand(step_size) })) return Bias(pd.concat(dfs))
def test_less_upd_vars(): df_upd = pd.DataFrame( {'step': [1, 2], 'var': ['X', 'X'], 'bias': [1, -1]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 6 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 4] assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'X'] assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, 0]
def test_random_bias_update(random_bias): b = random_bias.bias first = min(b['step']) last = max(b['step']) if first != last: b1 = Bias(b) # Update with the same bias upd = b1.update(random_bias, overwrite=False).bias # Nothing was overwritten assert (b1.bias == b).all().all() # The length is doubled assert len(upd) == len(b) * 2 # The largest step of the update is twice the largest step of initial assert max(upd['step']) == max(b['step']) * 2 # The steps of the update can be inferred via the last step of the original assert list(upd['step']) == list(b['step']) + list(b['step'] + last) # The bias value of the last step is twice this value of the original (1 + 1 = 2) assert list(upd['bias'])[-1] == list(b['bias'])[-1] * 2 else: with pytest.raises(ValueError): Bias(random_bias.bias).update(random_bias) with pytest.raises(ValueError): b_ = Bias(b.iloc[:len(b) // 2]) Bias(b).update(b_)
class ADAPT(Worker): """ A subclass of a `Worker` where the mode `ADAPT` is explicitly specified in config. Compared to base `Worker`, has one extra attribute and attribute to store and manipulate bias: `bias` and `store_bias`. """ def __init__(self, params: WorkerParams, id_: Id = None, bias: t.Union[str, Bias, None] = None, bias_input_type: str = 'tsv'): """ :param params: A dataclass containing all the parameters necessary to run the Worker. :param id_: Unique identifier of a Worker. If None will default to `id(self)`. :param bias: Explicitly provide bias. :param bias_input_type: Type of the object provided in `bias` argument. Can be either `tsv` or `dat`. :raises ValueError: If the `validate_config` method returns `False`. """ super().__init__(params, id_) if params.config.mode.field_values[0] != 'ADAPT': raise ValueError( f'Incorrect mode in config: {params.config.mode.field_values[0]} != ADAPT' ) self.bias = bias if isinstance(bias, str): self.bias = Bias() if bias_input_type == 'tsv': self.bias.read_bias_df(bias, overwrite=True) elif bias_input_type == 'dat': self.bias.read_adapt_output(bias, overwrite=True) else: raise ValueError( f'Expected `bias_input_type` either `tsv` or `dat`. Got {bias_input_type} instead' ) logging.debug(f'ADAPT {self.id}: init') def store_bias(self): """ Parse and store bias. Path of to the bias provided via config and automatically created during the `setup_io` call. :raises ValueError: If no bias path is found in `config` or the path does not exist. """ bias_path = self.params.config.get_field_value('Adapt_Output_File') if bias_path is None: raise ValueError( f'ADAPT {self.id}: `Adapt_Output_File` is empty in the config') if not Path(bias_path).exists(): raise ValueError( f'ADAPT {self.id}: `Adapt_Output_File` {bias_path} does not exist' ) bias = Bias().read_adapt_output(bias_path, overwrite=True) if self.bias is None or self.bias.bias is None: self.bias = bias logging.info(f'ADAPT {self.id}: stored new bias {bias_path}') else: self.bias.update(bias, overwrite=True) logging.info( f'ADAPT {self.id}: updated existing bias with {bias_path}')
def test_more_upd_vars(): # variable is ignored df_upd = pd.DataFrame( {'step': [1, 1, 1, 2, 2, 2], 'var': ['X', 'Y', 'Z', 'X', 'Y', 'Z'], 'bias': [1, -1, -2, 2, -2, 3]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'] assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]
def test_common_case(): df_upd = pd.DataFrame( {'step': [1, 1, 2, 2], 'var': ['X', 'Y', 'X', 'Y'], 'bias': [1, -1, 2, -2]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert (b_ini.bias == upd.bias[:4]).all().all() assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['var']) == ['X', 'Y'] * 4 assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3]
def __call__(self, worker: MC) -> MC: if worker.summary is None: return worker bias_path = worker.params.config.get_field_value('Bias_Input_File') if not bias_path or not Path(bias_path).exists(): logging.warning(f'BestStateKeeper {self.id} -- no bias for worker {worker.id}') return worker bias = Bias().read_adapt_output(bias_path) if worker.seqs is None: logging.warning(f'BestStateKeeper {self.id} -- no seqs for worker {worker.id}') return worker if worker.id not in self._memory: self._memory[worker.id] = MCState(worker.summary, bias.bias, worker.seqs) else: prev_cov = self._memory[worker.id].Summary.coverage curr_cov = worker.summary.coverage if curr_cov > prev_cov: self._memory[worker.id] = MCState(worker.summary, bias.bias, worker.seqs) if self.dump_to_workdir: bias.dump(f'{worker.params.working_dir}/{self.dump_name_bias}') worker.seqs.to_csv( f'{worker.params.working_dir}/{self.dump_name_seq_count}', sep='\t', index=False) return worker
def store_bias(self): """ Parse and store bias. Path of to the bias provided via config and automatically created during the `setup_io` call. :raises ValueError: If no bias path is found in `config` or the path does not exist. """ bias_path = self.params.config.get_field_value('Adapt_Output_File') if bias_path is None: raise ValueError( f'ADAPT {self.id}: `Adapt_Output_File` is empty in the config') if not Path(bias_path).exists(): raise ValueError( f'ADAPT {self.id}: `Adapt_Output_File` {bias_path} does not exist' ) bias = Bias().read_adapt_output(bias_path, overwrite=True) if self.bias is None or self.bias.bias is None: self.bias = bias logging.info(f'ADAPT {self.id}: stored new bias {bias_path}') else: self.bias.update(bias, overwrite=True) logging.info( f'ADAPT {self.id}: updated existing bias with {bias_path}')
def test_upd_empty(): upd = b_ini.update(Bias()) assert upd is b_ini
def test_manual_bias_update(df_ini): b_ini = Bias(df_ini) def test_upd_empty(): upd = b_ini.update(Bias()) assert upd is b_ini def test_common_case(): df_upd = pd.DataFrame( {'step': [1, 1, 2, 2], 'var': ['X', 'Y', 'X', 'Y'], 'bias': [1, -1, 2, -2]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert (b_ini.bias == upd.bias[:4]).all().all() assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['var']) == ['X', 'Y'] * 4 assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3] def test_less_upd_vars(): df_upd = pd.DataFrame( {'step': [1, 2], 'var': ['X', 'X'], 'bias': [1, -1]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 6 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 4] assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'X'] assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, 0] def test_more_upd_vars(): # variable is ignored df_upd = pd.DataFrame( {'step': [1, 1, 1, 2, 2, 2], 'var': ['X', 'Y', 'Z', 'X', 'Y', 'Z'], 'bias': [1, -1, -2, 2, -2, 3]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['var']) == ['X', 'Y', 'X', 'Y', 'X', 'Y', 'X', 'Y'] assert list(upd.bias['bias']) == [0, 0, 1, -1, 2, -2, 3, -3] def test_min_step_larger(): df_upd = pd.DataFrame( {'step': [3, 3, 4, 4], 'var': ['X', 'Y', 'X', 'Y'], 'bias': [1, -1, 2, -2]} ) upd = b_ini.update(Bias(df_upd), overwrite=False) assert len(upd) == 8 assert list(upd.bias['step']) == [1, 1, 2, 2, 3, 3, 4, 4] assert list(upd.bias['bias']) == [0, 0, 1, -1, 1, -1, 2, -2] test_upd_empty() test_common_case() test_less_upd_vars() test_more_upd_vars() test_min_step_larger()