Beispiel #1
0
def calculate_euchromatin_ratios(kmer_generator):
    euchromatin_ratio = {}
    euchromatin_abundance = {
        kmer: abundance
        for kmer, abundance in kmer_generator.kmer_counters[False].items()
    }
    for kmer in euchromatin_abundance:
        euchromatin_abundance[
            kmer] = euchromatin_abundance[kmer] + euchromatin_abundance.get(
                reverse_complement(kmer), 0)

    heterochromatin_abundance = {
        kmer: abundance
        for kmer, abundance in kmer_generator.kmer_counters[True].items()
    }
    for kmer in heterochromatin_abundance:
        heterochromatin_abundance[kmer] = heterochromatin_abundance[
            kmer] + heterochromatin_abundance.get(reverse_complement(kmer), 0)
    for kmer in euchromatin_abundance:
        if kmer not in heterochromatin_abundance:
            euchromatin_ratio[kmer] = 1
        else:
            euchromatin_ratio[kmer] = float(euchromatin_abundance[kmer] /
                                            (heterochromatin_abundance[kmer] +
                                             euchromatin_abundance[kmer]))
    return euchromatin_ratio
Beispiel #2
0
def _get_pcr_products(fwd_primers_locations, rev_primers_locations,
                      max_pcr_product_length):
    fwd_ranges, fwr_primers = kmer_locations_to_ranges(
        fwd_primers_locations,
        reverse=False,
        extend_range=max_pcr_product_length)
    rev_ranges, rev_primers = kmer_locations_to_ranges(
        rev_primers_locations,
        reverse=True,
        extend_range=max_pcr_product_length)
    #     print(fwd_ranges)
    pcr_products = []
    intersection = fwd_ranges.intersect(rev_ranges, strandedness="opposite")
    #     print(intersection)
    for chrom in intersection.chromosomes:
        df = intersection[chrom].df

        for index in range(df.shape[0]):
            line = df.iloc[index]
            try:
                frw_primer = fwr_primers[(line.Chromosome.encode(),
                                          line.Start)]
                rev_primer = rev_primers[(line.Chromosome.encode(), line.End)]
            except KeyError:
                continue
            if frw_primer.strand != rev_primer.strand and frw_primer.seq != reverse_complement(
                    rev_primer.seq):
                pcr_products.append((frw_primer, rev_primer))
    return pcr_products
Beispiel #3
0
def _get_pcr_products_old(fwd_primers_locations, rev_primers_locations,
                          max_pcr_product_length):

    fwd_locs = [
        PrimerAndLocation(0, loc.seq, loc.chrom_location,
                          loc.is_heterochromatic)
        for loc in fwd_primers_locations
    ]
    locs = fwd_locs
    rev_locs = [
        PrimerAndLocation(1, loc.seq, loc.chrom_location,
                          loc.is_heterochromatic)
        for loc in rev_primers_locations
    ]
    locs.extend(rev_locs)
    locs = sorted(locs,
                  key=lambda PrimerAndLocation:
                  (PrimerAndLocation.chrom_location))
    # pcr_products = [(loc1, loc2) for loc1, loc2 in zip(locs[:-1], locs[1:]) if (loc1.strand == 0) and (loc1.strand != loc2.strand) and (loc1.chrom_location[0] == loc2.chrom_location[0]) and abs(loc1.chrom_location[1] - loc2.chrom_location[1]) < max_pcr_product_length]
    pcr_products = []

    for loc1, loc2 in zip(locs[:-1], locs[1:]):
        if ((loc1.strand == 0) and (loc1.strand != loc2.strand)
                and (loc1.chrom_location[0] == loc2.chrom_location[0])
                and (loc1.seq != reverse_complement(loc2.seq))
                and (abs(loc1.chrom_location[1] - loc2.chrom_location[1]) <
                     max_pcr_product_length)):

            pcr_products.append((loc1, loc2))
    return pcr_products
Beispiel #4
0
def get_total_nondimer_pcr_products(pcr_products):
    non_dimer_pcr_products = []
    for product in pcr_products:
        fwd_primer = product[0].seq
        rev_primer = product[1].seq
        if fwd_primer == reverse_complement(rev_primer):
            continue
        else:
            non_dimer_pcr_products.append(product)
    return non_dimer_pcr_products
Beispiel #5
0
def filter_out_kmers_by_revcomp(combination, sorted_primer_locations):
    fwd_primer = combination[0]
    rev_primer = combination[1]
    if fwd_primer == reverse_complement(rev_primer):
        num_products_fwd = len(sorted_primer_locations[fwd_primer])
        num_products_rev = len(sorted_primer_locations[rev_primer])
        if num_products_fwd < num_products_rev:
            return fwd_primer
        else:
            return rev_primer
Beispiel #6
0
def filter_kmers_by_revcomp(kmers, kmer_counters, count_heterochromatic=False):
    filt_out_kmers = []
    for combination in combinations(kmers, 2):
        fwd_primer = combination[0]
        rev_primer = combination[1]
        if fwd_primer.decode() == reverse_complement(rev_primer).decode():
            num_products_fwd = kmer_counters[False][fwd_primer]
            num_products_rev = kmer_counters[False][rev_primer]
            if count_heterochromatic:
                num_products_fwd += kmer_counters[True][fwd_primer]
                num_products_rev += kmer_counters[True][rev_primer]
            if num_products_fwd < num_products_rev:
                filt_out_kmers.append(fwd_primer)
            else:
                filt_out_kmers.append(rev_primer)
    kmers = [kmer for kmer in kmers if kmer not in filt_out_kmers]
    return kmers
Beispiel #7
0
def main():
    args = get_args()
    genome_fhand = args['genome_fhand']
    heterochromatic_regions_fhand = args['regions_fhand']
    kmer_len = args['kmer_size']
    cache_dir = Path(args['cache_dir'])
    top_kmers = args['top_kmers']
    num_sets = args['num_sets']
    report_fhand = args['report_fhand']
    forced_kmers = args['forced_kmers']
    if not cache_dir.exists():
        cache_dir.mkdir(exist_ok=True)
    pcr_products_fhand = args['products_fhand']

    kmers_to_keep = None
    if forced_kmers:
        forced_kmers = [kmer.encode() for kmer in forced_kmers]
        kmers_to_keep = forced_kmers + [
            reverse_complement(kmer) for kmer in forced_kmers
        ]

    heterocromatic_fpath = heterochromatic_regions_fhand.name if heterochromatic_regions_fhand else None
    kmers, kmers_locations = get_kmers(genome_fhand.name,
                                       heterocromatic_fpath,
                                       kmer_len,
                                       cache_dir,
                                       num_kmers_to_keep=top_kmers,
                                       kmers_to_keep=kmers_to_keep)
    if forced_kmers:
        kmers = forced_kmers

    primer_combinations = select_primers_combinations(
        kmers, num_compatible_groups=num_sets)

    product_results = get_pcr_products_in_sets(primer_combinations,
                                               kmers,
                                               kmers_locations,
                                               max_pcr_product_length=10000)

    pickle.dump(product_results, pcr_products_fhand, pickle.HIGHEST_PROTOCOL)

    stats = get_stats_by_pair_in_sets(product_results,
                                      kmers_locations=kmers_locations)

    write_stats_in_excel(report_fhand.name, stats)
Beispiel #8
0
def get_pcr_products(kmer_locations, primers, max_pcr_product_length=5000):
    pcr_products = {}
    for combination in combinations(primers, 2):
        fwd_primers = combination
        fwd_primers_locations = []
        rev_primers = [reverse_complement(primer) for primer in combination]
        rev_primers_locations = []

        for primer in fwd_primers:
            fwd_primers_locations.extend(kmer_locations.get(primer, ""))

        for primer in rev_primers:
            rev_primers_locations.extend(kmer_locations.get(primer, ""))

        pcr_products[combination] = _get_pcr_products(
            fwd_primers_locations,
            rev_primers_locations,
            max_pcr_product_length=max_pcr_product_length)
    return pcr_products
Beispiel #9
0
def kmer_locations_to_ranges(kmer_locations, reverse=False, extend_range=1):
    chroms = []
    starts = []
    ends = []
    strand = '-' if reverse else '+'
    primer_strand = 1 if reverse else 0
    seqs = []
    is_hetero = []
    indexed_primers = {}
    for loc in kmer_locations:
        chroms.append(loc.chrom_location[0].decode())

        if reverse:
            ends.append(loc.chrom_location[1])
            start = loc.chrom_location[1] - extend_range
            if start < 0:
                start = 0
            starts.append(start)
        else:
            starts.append(loc.chrom_location[1])
            ends.append(loc.chrom_location[1] + extend_range)

        seqs.append(reverse_complement(loc.seq) if reverse else loc.seq)
        is_hetero = loc.is_heterochromatic
        indexed_primers[(loc.chrom_location)] = PrimerAndLocation(
            primer_strand, loc.seq, loc.chrom_location, loc.is_heterochromatic)

    df = pd.DataFrame({
        'Chromosome': chroms,
        'Start': starts,
        'End': ends,
        'seq': seqs,
        'Strand': strand,
        'is_het': is_hetero
    })

    return pr.PyRanges(df), indexed_primers
Beispiel #10
0
def _get_union_sites_for_kmer(kmer, kmer_locations):
    forward_unions = len(kmer_locations[kmer])
    rev_kmer = reverse_complement(kmer)
    rev_unions = len(kmer_locations[rev_kmer])

    return forward_unions + rev_unions
Beispiel #11
0
def get_revcomp_locs(kmer, kmer_generator, get_heterochromatic_locs=False):
    return kmer_generator.kmer_counters[False][reverse_complement(kmer)]