コード例 #1
0
gene_copynum_matrix = gene_depth_matrix * 1.0 / (marker_coverages +
                                                 (marker_coverages == 0))

clipped_gene_copynum_matrix = numpy.clip(gene_depth_matrix, 0.1,
                                         1e09) / (marker_coverages + 0.1 *
                                                  (marker_coverages == 0))

low_copynum_matrix = (gene_copynum_matrix <= 3)
good_copynum_matrix = (gene_copynum_matrix >= 0.5) * (
    gene_copynum_matrix <= 3)  # why isn't this till 2? NRG

prevalence_idxs = (parse_midas_data.calculate_unique_samples(
    subject_sample_map, gene_samples)) * (marker_coverages >= min_coverage)

prevalences = gene_diversity_utils.calculate_fractional_gene_prevalences(
    gene_depth_matrix[:, prevalence_idxs], marker_coverages[prevalence_idxs])

pangenome_prevalences = numpy.array(prevalences, copy=True)
pangenome_prevalences.sort()

# Calculate matrix of number of genes that differ
sys.stderr.write("Calculating matrix of gene differences...\n")
gene_gain_matrix, gene_loss_matrix, gene_opportunity_matrix = gene_diversity_utils.calculate_coverage_based_gene_hamming_matrix_gain_loss(
    gene_reads_matrix, gene_depth_matrix, marker_coverages)

gene_difference_matrix = gene_gain_matrix + gene_loss_matrix

# Now need to make the gene samples and snp samples match up
desired_samples = gene_samples

num_haploids = len(desired_samples)
コード例 #2
0
pangenome_genes = set(gene_names)

for marker_gene in marker_genes:
    print marker_gene, marker_gene in pangenome_genes
    
reference_gene_idxs = numpy.array([gene_name in reference_genes for gene_name in gene_names])
metaphlan2_gene_idxs = numpy.array([gene_name in metaphlan2_genes for gene_name in gene_names])
marker_gene_idxs = numpy.array([gene_name in marker_genes for gene_name in gene_names])

print marker_genes

print marker_gene_idxs.sum()

sample_idxs = (parse_midas_data.calculate_unique_samples(subject_sample_map, gene_samples))*(marker_coverages>=min_coverage)

prevalences = gene_diversity_utils.calculate_fractional_gene_prevalences(gene_depth_matrix[:,sample_idxs], marker_coverages[sample_idxs],min_copynum=0.3)

reference_prevalences = prevalences[reference_gene_idxs]
metaphlan2_prevalences = prevalences[metaphlan2_gene_idxs]
marker_prevalences = prevalences[marker_gene_idxs]

print marker_prevalences

pangenome_xs, pangenome_survivals = stats_utils.calculate_unnormalized_survival_from_vector(prevalences, min_x=0, max_x=1)

reference_xs, reference_survivals = stats_utils.calculate_unnormalized_survival_from_vector(reference_prevalences, min_x=0, max_x=1)

metaphlan2_xs, metaphlan2_survivals = stats_utils.calculate_unnormalized_survival_from_vector(metaphlan2_prevalences, min_x=0, max_x=1)

marker_xs, marker_survivals = stats_utils.calculate_unnormalized_survival_from_vector(marker_prevalences, min_x=0, max_x=1)