Beispiel #1
0
def bipartition(reads):
    positions = reads.get_positions()
    # create genotypes over your variants: all heterozygous (=1)
    genotypes = canonic_index_list_to_biallelic_gt_list([1] * len(positions))
    # genotype likelihoods are None
    genotype_likelihoods = [None] * len(positions)
    # create empty pedigree
    pedigree = Pedigree(NumericSampleIds())
    # add one individual to pedigree
    pedigree.add_individual('individual0', genotypes, genotype_likelihoods)
    # recombination cost vector, irrelevant if one using one individual
    recombcost = [1] * len(positions)

    # run the core phasing algorithm, creating a DP table
    dp_table = PedigreeDPTable(reads,
                               recombcost,
                               pedigree,
                               distrust_genotypes=False)
    phasing, transmission_vector = dp_table.get_super_reads()
    #print('PHASING')
    #print(phasing[0])
    #print(phasing[0][0])
    #print(phasing[0][1])
    mec_score = dp_table.get_optimal_cost()
    eprint("MEC Score:", mec_score)
    eprint("MEC Score / readset length:",
           float(mec_score) / float(readset_length))

    # In case the bi-partition of reads is of interest:
    partition = dp_table.get_optimal_partitioning()
    #print(partition)
    eprint("partition fraction:", sum(partition) / float(len(partition)))

    return phasing, partition
Beispiel #2
0
def phase_pedigree(reads, recombcost, pedigree, distrust_genotypes=False, positions=None):
	rs = string_to_readset_pedigree(reads)
	dp_table = PedigreeDPTable(rs, recombcost, pedigree, distrust_genotypes, positions)
	superreads_list, transmission_vector = dp_table.get_super_reads()
	cost = dp_table.get_optimal_cost()
	for superreads in superreads_list:
		for sr in superreads:
			print(sr)
	print('Cost:', dp_table.get_optimal_cost())
	print('Transmission vector:', transmission_vector)
	print('Partition:', dp_table.get_optimal_partitioning())
	return superreads_list, transmission_vector, cost