def reference(): """ Generate a reference file (used for signature matrix in CIBERSORT) using immune cell lines and tumor cell lines. """ np_gene_dictionary = {} tumor_present = False if args.TUMORS != None and len(args.TUMORS) > 0: tumor_present = True np_gene_dictionary = mixtures.get_relevant_information(np_gene_dictionary, tumor_present, args.CELL_LINES, CELL_LINES_INPUT, args.TUMORS, TUMORS_INPUT) separate_values_matrix = mixtures.from_dictionary_to_matrix(np_gene_dictionary) cell_lines_matrix = mixtures.get_separated_for_normalization(separate_values_matrix, tumor_present, CELL_LINES_INPUT, TUMORS_INPUT) cell_lines_matrix = quantile_normalisation.quantile_normalize_separately(cell_lines_matrix) separate_values_matrix = mixtures.gather_separated_normalized_data(separate_values_matrix, cell_lines_matrix) separate_values_matrix = quantile_normalisation.algo(separate_values_matrix) mixtures.save_separate_matrix(np_gene_dictionary, separate_values_matrix, tumor_present, config.REFERENCE, config.REFERENCE_TUMOR, CELL_LINES_INPUT, TUMORS_INPUT)
def pure_cells(): """ Generate a pure cell lines file (used for LLSR) using immune cell lines and tumor cell lines. """ np_gene_dictionary = {} tumor_present = False if args.TUMORS != None and len(args.TUMORS) > 0: tumor_present = True np_gene_dictionary = mixtures.get_relevant_information(np_gene_dictionary, tumor_present, args.CELL_LINES, CELL_LINES_INPUT, args.TUMORS, TUMORS_INPUT) separate_values_matrix = mixtures.from_dictionary_to_matrix(np_gene_dictionary) cell_lines_matrix = mixtures.get_separated_for_normalization(separate_values_matrix, tumor_present, CELL_LINES_INPUT, TUMORS_INPUT) cell_lines_matrix = quantile_normalisation.quantile_normalize_separately(cell_lines_matrix) all_cell_lines_combined = mixtures.combine_separated_normalized_data(cell_lines_matrix, separate_values_matrix) mixtures.save_combined_matrix(np_gene_dictionary, all_cell_lines_combined, tumor_present, config.COMBINED_CELLS, config.COMBINED_CELLS_TUMOR, CELL_LINES_INPUT, TUMORS_INPUT)
def mixes(): """ Generate a simulation file with spiked in tumor. This should be generated by using mixture files and tumor files. """ np_gene_dictionary = {} tumor_present = False if args.TUMORS != None and len(args.TUMORS) > 0: tumor_present = True np_gene_dictionary = mixtures.get_relevant_information(np_gene_dictionary, tumor_present, args.MIXTURES, MIXTURES_INPUT, args.TUMORS, TUMORS_INPUT) separate_values_matrix = mixtures.from_dictionary_to_matrix(np_gene_dictionary) cell_lines_matrix = mixtures.get_separated_for_normalization(separate_values_matrix, tumor_present, MIXTURES_INPUT, TUMORS_INPUT) """ Calculating the total mRNA in each mixture. Used later in Abbas algorithm. """ #file_handler.write_probe_values([MIXA, MIXB, MIXC, MIXD], ["MIX A", "MIX B", "MIX C", "MIX D"], "probe_values_mixtures") cell_lines_matrix = quantile_normalisation.quantile_normalize_separately(cell_lines_matrix) all_cell_lines_combined = mixtures.combine_separated_normalized_data(cell_lines_matrix, separate_values_matrix) np_gene_dictionary = mixtures.from_matrix_to_dictionary(all_cell_lines_combined, np_gene_dictionary) """ The data is then iterated over 0 to 100 percent tumor content with intervals of 5 percent. Each iteration is written to file. """ if tumor_present == False: file_handler.write_combined_mixtures(np_gene_dictionary, config.MIXTURE, MIXTURES_INPUT) else : if len(args.ITERATION) != 6: print("\n[ ERROR ] - You have not given 6 iteration numbers for tumor and noise, e.g.: -i 0 100 5 0 100 5\n") sys.exit(1) start_tumor = int(args.ITERATION[0]); stop_tumor = int(args.ITERATION[1]); step_tumor = int(args.ITERATION[2]) start_noise = int(args.ITERATION[3]); stop_noise = int(args.ITERATION[4]); step_noise = int(args.ITERATION[5]) for tumor_content in range(start_tumor, stop_tumor, step_tumor): for noise_amount in range(start_noise, stop_noise, step_noise): fixed_tumor_matrix = [] for i in range(len(all_cell_lines_combined)): temp_list = [] for k in range(len(all_cell_lines_combined[i]) - 1): temp_list.append((all_cell_lines_combined[i][k] * (1-(tumor_content/100))) + all_cell_lines_combined[i][len(all_cell_lines_combined[i])-1] * (tumor_content/100)) if noise_amount > 0: temp_list = noise.add_noise_controlled(temp_list, noise_amount) fixed_tumor_matrix.append(temp_list) np_gene_dictionary = mixtures.from_matrix_to_dictionary(fixed_tumor_matrix, np_gene_dictionary) file_handler.write_combined_mixtures_tumor(np_gene_dictionary, config.MIXTURES, tumor_content, noise_amount, MIXTURES_INPUT) print("--- Generated simulation file with " + str(tumor_content) + "% tumor content. " + str(int((stop_tumor - tumor_content) / step_tumor)) + " files remaining.")