Esempio n. 1
0
def reference():

	""" Generate a reference file (used for signature matrix in CIBERSORT) using immune cell lines
	and tumor cell lines.
	"""

	np_gene_dictionary = {}
	tumor_present = False

	if args.TUMORS != None and len(args.TUMORS) > 0:
		tumor_present = True

	np_gene_dictionary = mixtures.get_relevant_information(np_gene_dictionary, tumor_present, args.CELL_LINES, CELL_LINES_INPUT, args.TUMORS, TUMORS_INPUT)

	separate_values_matrix = mixtures.from_dictionary_to_matrix(np_gene_dictionary)

	cell_lines_matrix = mixtures.get_separated_for_normalization(separate_values_matrix, tumor_present, CELL_LINES_INPUT, TUMORS_INPUT)

	cell_lines_matrix = quantile_normalisation.quantile_normalize_separately(cell_lines_matrix)

	separate_values_matrix = mixtures.gather_separated_normalized_data(separate_values_matrix, cell_lines_matrix)

	separate_values_matrix = quantile_normalisation.algo(separate_values_matrix)

	mixtures.save_separate_matrix(np_gene_dictionary, separate_values_matrix, tumor_present, config.REFERENCE, config.REFERENCE_TUMOR, CELL_LINES_INPUT, TUMORS_INPUT)
Esempio n. 2
0
def combine_separated_normalized_data(CELL_LINES_MATRIX, SEPARATE_VALUES_MATRIX):

	""" Gather all the normalized gene values (from each mixture/cell line) back together to a matrix containing everyone.
	Each mixture/cell line is combined by calculated the average score.

	Quantile normalize the whole matrix before return.
	"""

	all_cell_lines_combined = np.zeros(shape=(len(CELL_LINES_MATRIX[0]), len(CELL_LINES_MATRIX)))
	
	for i in range(len(SEPARATE_VALUES_MATRIX)):

		for cell_line in range(len(CELL_LINES_MATRIX)):

			avg = 0.0

			for replicate in range(len(CELL_LINES_MATRIX[cell_line][i])):

				avg += CELL_LINES_MATRIX[cell_line][i][replicate]

			all_cell_lines_combined[i][cell_line] = avg / float(len(CELL_LINES_MATRIX[cell_line][i]))

	return quantile_normalisation.algo(all_cell_lines_combined)