Beispiel #1
0
 def to_csv(self, path=None):
     """ Get the results from this validation as a CSV file. """
     if path:
         results_directory = path
     else:
         filename = self._filename()
         results_directory = dhs.check_if_and_makefile(os.path.join(
             "Validations", self.database, filename),
                                                       path_type="parent3")
     self.results.to_csv(results_directory, index=False, sep=";")
     print("Results stored in {}".format(results_directory))
Beispiel #2
0
from VEnCode import internals
from VEnCode.utils import dir_and_file_handling as dhs
from VEnCode.utils import general_utils as ghs

if __name__ == "__main__":
    celltype_number = [
        20, 80, 100, 154, 200, 250, 350, 450, 550, 650, 800, 1000
    ]
    promoter_number = range(1, 11)  # number of promoters ranging from x to y
    e_values = pd.DataFrame(index=promoter_number, columns=celltype_number)
    for i in celltype_number:
        print("Starting number of cell types: {}".format(i))
        for z in promoter_number:
            print("Starting number of promoters: {}".format(z))
            data = pd.DataFrame(np.zeros(shape=(z, i)), dtype=np.int8)
            e_value_raw = internals.Vencodes.vencode_mc_simulation(data,
                                                                   reps=1000)
            e_value = ghs.e_value_normalizer(
                e_value_raw, z,
                i)  # use this line if we want normalized e-values
            e_values.loc[z, i] = e_value
    try:
        file_name = dhs.check_if_and_makefile(os.path.join(
            "E-value statistics", "e_value statistics norm"),
                                              path_type="parent3")
    except:
        file_name = "e_value statistics norm.csv"
    with open(file_name, 'w') as f:
        e_values.to_csv(f, sep=";")
Beispiel #3
0
                         primary_cell_list,
                         celltype_exclude=primary_exclude_list,
                         not_include=primary_not_include_codes,
                         partial_exclude=primary_cells_supersets,
                         sample_types="primary cells",
                         second_parser=None,
                         conservative=True,
                         log_level="info",
                         enhancers=enhancer_names_db,
                         skiprows=None,
                         nrows=None)

data_copy = init.data.copy()
init.data = init.merge_donors_into_celltypes()
for celltype in tqdm(primary_cell_list):
    # file name:
    filename = "{}_tpm_enhancers".format(celltype)
    results_directory = dhs.check_if_and_makefile(os.path.join(
        "Files", "Dbs", filename),
                                                  path_type="parent3")
    # Data
    data_celltype = init.data[celltype]
    init.data.drop(celltype, axis=1, inplace=True)
    init.data = pd.concat([init.data, data_copy[init.codes[celltype]]], axis=1)
    data_set = util.df_filter_by_expression(init.data, init.codes[celltype],
                                            0.0001)
    data_set.to_csv(results_directory, sep=";")

    init.data.drop(data_copy[init.codes[celltype]], axis=1, inplace=True)
    init.data = pd.concat([init.data, data_celltype], axis=1)
Beispiel #4
0
            if setup.algorithm == "sampling":
                vencodes = internals.Vencodes(data, algorithm="sampling", number_of_re=setup.ven_size, n_samples=10000)
            elif setup.algorithm == "heuristic":
                vencodes = internals.Vencodes(data, algorithm="heuristic", number_of_re=setup.ven_size, stop=3)
            else:
                raise AttributeError("Algorithm '{}' not recognized".format(setup.algorithm))
            vencodes.next(amount=1)
            if vencodes.vencodes:
                donors_vencode_data = vencodes.celltype_donors_data.loc[vencodes.vencodes[0]]
                assess_if_not_vencode = np.any(donors_vencode_data == 0, axis=0)
                result = any(assess_if_not_vencode)
                results_celltype.append(not result)
            else:
                results_celltype.append("")
            data = data_copy
        results_final[celltype + str(k)] = results_celltype

# create a directory to store results
results_directory = d_f_handling.check_if_and_makefile(os.path.join(
    "Z-values analysis", "{} primary {} {}".format("Three donors", setup.data_type, setup.algorithm)),
    path_type="parent3")

# Set up the important information to include in the file
info_list = [attr for attr in dir(setup) if not callable(getattr(setup, attr)) and not attr.startswith("__")]
info_dict = {}
for item in info_list:
    info_dict[item] = getattr(setup, item)

# write the information and results
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory, info_dict, deprecated=False)
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory, results_final, deprecated=False, method="a")
Beispiel #5
0
    # Determine e-values:
    if vencodes.vencodes:
        vencodes.determine_e_values()
        e_values = list(vencodes.e_values.values())
        if len(e_values) != setup.ven_number:
            for i in range(len(e_values), setup.ven_number):
                e_values.append("")
        results[celltype] = e_values
    else:
        results[celltype] = [""] * 20

# create a directory to store results
results_directory = d_f_handling.check_if_and_makefile(os.path.join(
    "E-values analysis",
    "{} {} k{} {}".format(setup.celltype_type, "Heuristic2", setup.ven_size,
                          algorithm)),
                                                       path_type="parent3")

# Set up the important information to include in the file
info_list = [
    attr for attr in dir(setup)
    if not callable(getattr(setup, attr)) and not attr.startswith("__")
]
info_dict = {}
for item in info_list:
    info_dict[item] = getattr(setup, item)

# write the information and results
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory,
                                                      info_dict,
Beispiel #6
0
    sample_types = "cell lines"
else:
    raise AttributeError("Celltype_type - {} - currently not supported".format(
        setup.celltype_type))

results = {}
data = internals.DataTpm(file="parsed",
                         sample_types=sample_types,
                         data_type=setup.first_data_type)
data_second = internals.DataTpm(file="parsed",
                                sample_types=sample_types,
                                data_type=setup.second_data_type)

# create a directory to store results
results_directory = d_f_handling.check_if_and_makefile(os.path.join(
    "E-values table", "{} {}".format(setup.celltype_type, "Heuristic2")),
                                                       path_type="parent3")

# Set up the important information to include in the file
info_list = [
    attr for attr in dir(setup)
    if not callable(getattr(setup, attr)) and not attr.startswith("__")
]
info_dict = {}
for item in info_list:
    info_dict[item] = getattr(setup, item)

# write the information to a file
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory,
                                                      info_dict,
                                                      deprecated=False)
                                      number_of_re=k,
                                      stop=3)
        vencodes.next_heuristic2_vencode(data_second)

        # Determine if a VEnCode was found:
        if vencodes.vencodes:
            for v in range(k, 11):
                results[celltype].append(1)
            break
        else:
            results[celltype].append(0)

# create a directory to store results
results_directory = d_f_handling.check_if_and_makefile(os.path.join(
    "VEnCode Search",
    "Heuristic2 {}-{} {}".format(setup.first_data_type, setup.second_data_type,
                                 setup.celltype_type)),
                                                       path_type="parent3")

# Set up the important information to include in the file
info_list = [
    attr for attr in dir(setup)
    if not callable(getattr(setup, attr)) and not attr.startswith("__")
]
info_dict = {}
for item in info_list:
    info_dict[item] = getattr(setup, item)

# write the information and results
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory,
                                                      info_dict,
Beispiel #8
0
# Now you don't need to change anything else
setup = SetUp()

data = internals.DataTpm(file=setup.file_name,
                         sample_types="primary cells",
                         data_type=setup.data_type)
data.merge_donors_primary(exclude_target=False)
data_original = data.copy()
data_cancer = internals.DataTpm(file=setup.file_name,
                                sample_types="cell lines",
                                data_type=setup.data_type)

for celltype in tqdm(cancer_celltype_list, desc="Completed: "):
    data.add_celltype(celltype, file=data_cancer)
    data.make_data_celltype_specific(celltype)
    data.filter_by_target_celltype_activity(threshold=0.0001, binarize=False)

    if isinstance(celltype, dict):
        celltype = list(celltype.keys())[0]
    celltype = gen_utils.str_replace_multi(celltype, {
        ":": "-",
        "/": "-"
    })  # those symbols can't be in file names
    directory = dir_handlers.check_if_and_makefile(os.path.join(
        "Files", "Dbs", "{}_tpm_{}-1".format(celltype, setup.data_type)),
                                                   path_type="parent3")

    data.data.to_csv(directory, sep=";")
    data = data_original.copy()
Beispiel #9
0
        else:
            raise AttributeError("Algorithm '{}' not recognized".format(setup.algorithm))
        vencodes.next(amount=1)

        # Determine if a VEnCode was found:
        if vencodes.vencodes:
            for v in range(k, 11):
                results[celltype].append(1)
            break
        else:
            results[celltype].append(0)

# create a directory to store results
results_directory = d_f_handling.check_if_and_makefile(os.path.join(
    "VEnCode Search",
    "{} {} {} sp{} act{} inact{}".format(setup.celltype_type, setup.data_type, setup.algorithm,
                                         setup.reg_element_sparseness,
                                         setup.target_celltype_activity,
                                         setup.non_target_celltypes_inactivity)), path_type="parent3")

# Set up the important information to include in the file
info_list = [attr for attr in dir(setup) if not callable(getattr(setup, attr)) and not attr.startswith("__")]
info_dict = {}
for item in info_list:
    info_dict[item] = getattr(setup, item)

# write the information and results
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory, info_dict, deprecated=False)
VEnCode.utils.dir_and_file_handling.write_dict_to_csv(results_directory, results, deprecated=False, method="a")
print("File saved in: {}".format(results_directory))