Example #1
0
 gene_copynum_matrix = gene_depth_matrix * 1.0 / (marker_coverages +
                                                  (marker_coverages == 0))
 #
 # convert gene_samples to list:
 gene_samples = gene_samples.tolist()
 #
 # convert gene names to numpy array:
 gene_names = numpy.array(gene_names)
 #
 # indexes for different subject pairs
 desired_samples = gene_samples
 #
 desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
     sample_order_map, desired_samples)
 #
 snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
     desired_samples, snp_samples)
 gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
     desired_samples, gene_samples)
 #
 same_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
     snp_sample_idx_map, desired_same_subject_idxs)
 same_subject_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
     gene_sample_idx_map, desired_same_subject_idxs)
 #
 diff_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
     snp_sample_idx_map, desired_diff_subject_idxs)
 diff_subject_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
     gene_sample_idx_map, desired_diff_subject_idxs)
 #
 between_host_gene_idxs = [
 ]  # store idxs of genes that change between hosts
Example #2
0
sys.stderr.write("Done! (%d genes)\n" % len(reference_genes))

print reference_genes[0:10]
print gene_names[0:10]

# Calculate matrix of number of genes that differ
sys.stderr.write("Calculate gene hamming matrix...\n")
# Either: for all genes in pan-genome
gene_hamming_matrix, num_opportunities = gene_diversity_utils.calculate_coverage_based_gene_hamming_matrix(
    gene_depth_matrix, marker_coverages, min_log2_fold_change=4)
#
# Or: just the subset from the MIDAS reference genome
#gene_hamming_matrix = diversity_utils.calculate_coverage_based_gene_hamming_matrix(gene_depth_matrix[reference_gene_idxs,:], marker_coverages, min_log2_fold_change=4)
#

sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    high_coverage_samples, samples)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
high_coverage_same_sample_idxs, high_coverage_same_subject_idxs, high_coverage_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, high_coverage_samples)

same_sample_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_sample_idxs)
same_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_subject_idxs)
diff_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_diff_subject_idxs)

hamming_timepoints = gene_hamming_matrix[same_subject_idxs]
hamming_timepoints.sort()