예제 #1
0
 def _get_vencode(self,
                  amount,
                  sample_type,
                  parsed,
                  thresholds,
                  n_samples=10000,
                  using=None):
     data = self._get_data(sample_type, parsed, thresholds)
     if self.algorithm == "sampling":
         vencodes = internals.Vencodes(data,
                                       algorithm="sampling",
                                       number_of_re=self.k,
                                       n_samples=n_samples,
                                       using=using)
     elif self.algorithm == "heuristic":
         vencodes = internals.Vencodes(data,
                                       algorithm="heuristic",
                                       number_of_re=self.k,
                                       stop=3)
     else:
         raise AttributeError("Algorithm '{}' not recognized".format(
             self.algorithm))
     vencodes.next(amount=amount)
     if vencodes.vencodes:
         return vencodes
     else:
         raise exceptions.NoVencodeError("No VEnCodes found for {}!".format(
             self.cell_type))
예제 #2
0
 def setUpClass(cls):
     cls.data = internals.DataTpm(file="parsed", sample_types="primary cells", data_type="promoters", nrows=20000)
     cls.data.make_data_celltype_specific("Hepatocyte")
     cls.data.filter_by_target_celltype_activity(threshold=1)
     cls.data.filter_by_reg_element_sparseness(threshold=0)
     cls.data.define_non_target_celltypes_inactivity(threshold=0)
     cls.data.sort_sparseness()
     cls.vencodes = internals.Vencodes(cls.data, algorithm="heuristic", number_of_re=4, stop=3)
     cls.vencodes.next(amount=2)
예제 #3
0
for celltype in tqdm(setup.re_list, desc="Completed: "):
    data.make_data_celltype_specific(celltype)
    data_copy = data.copy()
    for k in [1, 2]:
        results_celltype = []
        for n in range(50):
            choice = random.sample(range(3), k=k)  # chooses a random int from 0 to 2, to later choose a donor.
            data.filter_by_target_celltype_activity(threshold=setup.target_celltype_activity, donors=choice)
            data.filter_by_reg_element_sparseness(threshold=setup.reg_element_sparseness)
            data.define_non_target_celltypes_inactivity(threshold=setup.non_target_celltypes_inactivity)
            if setup.algorithm != "sampling":
                data.sort_sparseness()

            if setup.algorithm == "sampling":
                vencodes = internals.Vencodes(data, algorithm="sampling", number_of_re=setup.ven_size, n_samples=10000)
            elif setup.algorithm == "heuristic":
                vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=setup.ven_size, stop=3)
            else:
                raise AttributeError("Algorithm '{}' not recognized".format(setup.algorithm))
            vencodes.next(amount=1)
            if vencodes.vencodes:
                donors_vencode_data = vencodes.celltype_donors_data.loc[vencodes.vencodes[0]]
                assess_if_not_vencode = np.any(donors_vencode_data == 0, axis=0)
                result = any(assess_if_not_vencode)
                results_celltype.append(not result)
            else:
                results_celltype.append("")
            data = data_copy
        results_final[celltype + str(k)] = results_celltype
예제 #4
0
    data_second.make_data_celltype_specific(celltype)
    data_second.filter_by_target_celltype_activity(
        threshold=setup.second_target_celltype_activity)
    data_second.filter_by_reg_element_sparseness(
        threshold=setup.second_reg_element_sparseness)
    data_second.define_non_target_celltypes_inactivity(
        threshold=setup.second_non_target_celltypes_inactivity)
    data_second.sort_sparseness()

    # Deal with possible dictionaries in celltype list:
    if isinstance(celltype, dict):
        celltype = list(celltype.keys())[0]

    # Launch VEnCode search:
    vencodes = internals.Vencodes(data,
                                  algorithm="heuristic",
                                  number_of_re=setup.ven_size,
                                  stop=3)
    vencodes.next_heuristic2_vencode(data_second, amount=setup.ven_number)

    # Determine e-values:
    if vencodes.vencodes:
        vencodes.determine_e_values()
        e_values = list(vencodes.e_values.values())
        if len(e_values) != setup.ven_number:
            for i in range(len(e_values), setup.ven_number):
                e_values.append("")
        results[celltype] = e_values
    else:
        results[celltype] = [""] * 20

# create a directory to store results
예제 #5
0
        threshold=setup.second_target_celltype_activity)
    data_second.filter_by_reg_element_sparseness(
        threshold=setup.second_reg_element_sparseness)
    data_second.define_non_target_celltypes_inactivity(
        threshold=setup.second_non_target_celltypes_inactivity)
    data_second.sort_sparseness()

    # Deal with possible dictionaries in celltype list:
    if isinstance(celltype, dict):
        celltype = list(celltype.keys())[0]
    data_second_original = data_second.copy()
    results[celltype] = []
    for k in range(1, 5):
        # Launch VEnCode search:
        vencodes = internals.Vencodes(data,
                                      algorithm="heuristic",
                                      number_of_re=k,
                                      stop=3)
        vencodes.next_heuristic2_vencode(data_second, amount=setup.ven_number)

        # Determine e-values:
        if vencodes.vencodes:
            vencodes.determine_e_values()
            best = []
            e_values = vencodes.e_values.copy()
            for i in range(5):
                try:
                    best_partial = max(e_values, key=lambda key: e_values[key])
                except ValueError:
                    break
                best.append(best_partial)
                try:
예제 #6
0
 def setUp(self):
     self.vencodes = internals.Vencodes(self.data, algorithm="heuristic", number_of_re=4)
예제 #7
0
 def setUp(self):
     self.vencodes = internals.Vencodes(self.data, algorithm="sampling", number_of_re=4, n_samples=10000)