Ejemplo n.º 1
0
###################################################################################################### 

# get the intersection of gene_samples and snp_samples satisfying piS being low and coverage being high both both. 
desired_samples = numpy.array(list(set(snp_samples) & set(gene_samples[marker_coverages>min_coverage])))   
# figure out what the indexes are for the desired samples in the snp_samples and gene_samples 
snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map(desired_samples, snp_samples)
gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map(desired_samples, gene_samples)

### time pair idxs where patients may have up to 3 time points:

# Calculate which pairs of idxs belong to different time points for both high cov and low piS
# time_pair_idxs is comprised of 2 arrays. The first array corresponds to indecies for the 1st visno. The second array corresponds to indecies for the second or 3d visno
time_pair_idxs, visno_snps_genes, day_snps_genes = parse_midas_data.calculate_time_pairs(subject_sample_time_map, desired_samples)

# since the time_pair_idx are in terms of desired samples ordering,I need to convert the desired_samples idxs to the snp_sample and gene_sample orders. 
time_pair_snp_idxs= parse_midas_data.apply_sample_index_map_to_indices(snp_sample_idx_map, time_pair_idxs) #use these idxs to get the relevant fields from total_fixation_matrix
time_pair_gene_idxs= parse_midas_data.apply_sample_index_map_to_indices(gene_sample_idx_map, time_pair_idxs) # use these idxs to get the relevant fields from gene_hamming_matrix


#### time pair idxs where patients can have exactly 1 time point (so that points plotted are iid)

time_pair_idxs_unique, visno_snps_genes_unique, day_snps_genes_unique = parse_midas_data.calculate_unique_time_pairs(subject_sample_time_map, desired_samples)
time_pair_snp_idxs_unique= parse_midas_data.apply_sample_index_map_to_indices(snp_sample_idx_map, time_pair_idxs_unique)
time_pair_gene_idxs_unique= parse_midas_data.apply_sample_index_map_to_indices(gene_sample_idx_map, time_pair_idxs_unique)


### different patient idx: 
# to compare results to time_pair idxs, we want different patient pair idxs. This helps us to contextualize if we are seeing events within patients that resemble replacements or modifications. 

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
#desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(subject_sample_map, desired_samples)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
desired_same_sample_idxs, desired_same_subject_idxs, desired_diff_subject_idxs = parse_midas_data.calculate_ordered_subject_pairs(
    sample_order_map, desired_samples)

snp_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, snp_samples)
gene_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    desired_samples, gene_samples)

same_sample_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_sample_idxs)
same_sample_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_same_sample_idxs)

same_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_same_subject_idxs)
same_subject_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_same_subject_idxs)

diff_subject_snp_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    snp_sample_idx_map, desired_diff_subject_idxs)
diff_subject_gene_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    gene_sample_idx_map, desired_diff_subject_idxs)

same_subject_ploidy_changes = []
Ejemplo n.º 3
0
# Only plot samples above a certain depth threshold
high_coverage_samples = samples[median_coverages >= min_coverage]

high_coverage_low_pi_samples = samples[(median_coverages >= min_coverage) *
                                       (pis <= 1e-03)]

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
high_coverage_same_sample_idxs, high_coverage_same_subject_idxs, high_coverage_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, high_coverage_samples)

sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    high_coverage_samples, samples)

same_sample_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_sample_idxs)
#
same_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_same_subject_idxs)
#
diff_subject_idxs = parse_midas_data.apply_sample_index_map_to_indices(
    sample_idx_map, high_coverage_diff_subject_idxs)

# Calculate which pairs of idxs belong to the same sample, which to the same subject
# and which to different subjects
high_coverage_low_pi_same_sample_idxs, high_coverage_low_pi_same_subject_idxs, high_coverage_low_pi_diff_subject_idxs = parse_midas_data.calculate_subject_pairs(
    subject_sample_map, high_coverage_low_pi_samples)

low_pi_sample_idx_map = parse_midas_data.calculate_sample_idx_map(
    high_coverage_low_pi_samples, samples)