def reference(): """ Generate a reference file (used for signature matrix in CIBERSORT) using immune cell lines and tumor cell lines. """ np_gene_dictionary = {} tumor_present = False if args.TUMORS != None and len(args.TUMORS) > 0: tumor_present = True np_gene_dictionary = mixtures.get_relevant_information(np_gene_dictionary, tumor_present, args.CELL_LINES, CELL_LINES_INPUT, args.TUMORS, TUMORS_INPUT) separate_values_matrix = mixtures.from_dictionary_to_matrix(np_gene_dictionary) cell_lines_matrix = mixtures.get_separated_for_normalization(separate_values_matrix, tumor_present, CELL_LINES_INPUT, TUMORS_INPUT) cell_lines_matrix = quantile_normalisation.quantile_normalize_separately(cell_lines_matrix) separate_values_matrix = mixtures.gather_separated_normalized_data(separate_values_matrix, cell_lines_matrix) separate_values_matrix = quantile_normalisation.algo(separate_values_matrix) mixtures.save_separate_matrix(np_gene_dictionary, separate_values_matrix, tumor_present, config.REFERENCE, config.REFERENCE_TUMOR, CELL_LINES_INPUT, TUMORS_INPUT)
def combine_separated_normalized_data(CELL_LINES_MATRIX, SEPARATE_VALUES_MATRIX): """ Gather all the normalized gene values (from each mixture/cell line) back together to a matrix containing everyone. Each mixture/cell line is combined by calculated the average score. Quantile normalize the whole matrix before return. """ all_cell_lines_combined = np.zeros(shape=(len(CELL_LINES_MATRIX[0]), len(CELL_LINES_MATRIX))) for i in range(len(SEPARATE_VALUES_MATRIX)): for cell_line in range(len(CELL_LINES_MATRIX)): avg = 0.0 for replicate in range(len(CELL_LINES_MATRIX[cell_line][i])): avg += CELL_LINES_MATRIX[cell_line][i][replicate] all_cell_lines_combined[i][cell_line] = avg / float(len(CELL_LINES_MATRIX[cell_line][i])) return quantile_normalisation.algo(all_cell_lines_combined)