def _get_vencode(self, amount, sample_type, parsed, thresholds, n_samples=10000, using=None): data = self._get_data(sample_type, parsed, thresholds) if self.algorithm == "sampling": vencodes = internals.Vencodes(data, algorithm="sampling", number_of_re=self.k, n_samples=n_samples, using=using) elif self.algorithm == "heuristic": vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=self.k, stop=3) else: raise AttributeError("Algorithm '{}' not recognized".format( self.algorithm)) vencodes.next(amount=amount) if vencodes.vencodes: return vencodes else: raise exceptions.NoVencodeError("No VEnCodes found for {}!".format( self.cell_type))
def setUpClass(cls): cls.data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type="promoters", nrows=20000) cls.data.make_data_celltype_specific("Hepatocyte") cls.data.filter_by_target_celltype_activity(threshold=1) cls.data.filter_by_reg_element_sparseness(threshold=0) cls.data.define_non_target_celltypes_inactivity(threshold=0) cls.data.sort_sparseness() cls.vencodes = internals.Vencodes(cls.data, algorithm="heuristic", number_of_re=4, stop=3) cls.vencodes.next(amount=2)
for celltype in tqdm(setup.re_list, desc="Completed: "): data.make_data_celltype_specific(celltype) data_copy = data.copy() for k in [1, 2]: results_celltype = [] for n in range(50): choice = random.sample(range(3), k=k) # chooses a random int from 0 to 2, to later choose a donor. data.filter_by_target_celltype_activity(threshold=setup.target_celltype_activity, donors=choice) data.filter_by_reg_element_sparseness(threshold=setup.reg_element_sparseness) data.define_non_target_celltypes_inactivity(threshold=setup.non_target_celltypes_inactivity) if setup.algorithm != "sampling": data.sort_sparseness() if setup.algorithm == "sampling": vencodes = internals.Vencodes(data, algorithm="sampling", number_of_re=setup.ven_size, n_samples=10000) elif setup.algorithm == "heuristic": vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=setup.ven_size, stop=3) else: raise AttributeError("Algorithm '{}' not recognized".format(setup.algorithm)) vencodes.next(amount=1) if vencodes.vencodes: donors_vencode_data = vencodes.celltype_donors_data.loc[vencodes.vencodes[0]] assess_if_not_vencode = np.any(donors_vencode_data == 0, axis=0) result = any(assess_if_not_vencode) results_celltype.append(not result) else: results_celltype.append("") data = data_copy results_final[celltype + str(k)] = results_celltype
data_second.make_data_celltype_specific(celltype) data_second.filter_by_target_celltype_activity( threshold=setup.second_target_celltype_activity) data_second.filter_by_reg_element_sparseness( threshold=setup.second_reg_element_sparseness) data_second.define_non_target_celltypes_inactivity( threshold=setup.second_non_target_celltypes_inactivity) data_second.sort_sparseness() # Deal with possible dictionaries in celltype list: if isinstance(celltype, dict): celltype = list(celltype.keys())[0] # Launch VEnCode search: vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=setup.ven_size, stop=3) vencodes.next_heuristic2_vencode(data_second, amount=setup.ven_number) # Determine e-values: if vencodes.vencodes: vencodes.determine_e_values() e_values = list(vencodes.e_values.values()) if len(e_values) != setup.ven_number: for i in range(len(e_values), setup.ven_number): e_values.append("") results[celltype] = e_values else: results[celltype] = [""] * 20 # create a directory to store results
threshold=setup.second_target_celltype_activity) data_second.filter_by_reg_element_sparseness( threshold=setup.second_reg_element_sparseness) data_second.define_non_target_celltypes_inactivity( threshold=setup.second_non_target_celltypes_inactivity) data_second.sort_sparseness() # Deal with possible dictionaries in celltype list: if isinstance(celltype, dict): celltype = list(celltype.keys())[0] data_second_original = data_second.copy() results[celltype] = [] for k in range(1, 5): # Launch VEnCode search: vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=k, stop=3) vencodes.next_heuristic2_vencode(data_second, amount=setup.ven_number) # Determine e-values: if vencodes.vencodes: vencodes.determine_e_values() best = [] e_values = vencodes.e_values.copy() for i in range(5): try: best_partial = max(e_values, key=lambda key: e_values[key]) except ValueError: break best.append(best_partial) try:
def setUp(self): self.vencodes = internals.Vencodes(self.data, algorithm="heuristic", number_of_re=4)
def setUp(self): self.vencodes = internals.Vencodes(self.data, algorithm="sampling", number_of_re=4, n_samples=10000)