def call_on_real_families(paths, splitword):
    #if not path:
    #path = "D:\Gal\MultiCrisper\Eilon familis\gray one\Solyc05g009500.2.1.txt-format.holeGenomeWithExtars-s2pN\_sites.txt"
    genes_sg_dict = {}
    sg_genes_dict = {}
    sgNames = []
    sgList = []
    for p in paths:
        f = open(p)
        res = []
        for line in f:
            if line[0] != ">":
                res += [line[:-4]]
        f.close()
        gene_name = p.split(".txt")[0]
        gene_name = gene_name.split(splitword)[1]
        genes_sg_dict[gene_name] = res
        for sg in res:
            if sg in sg_genes_dict:
                sg_genes_dict[sg] = sg_genes_dict[sg] + [gene_name]
            else:
                sg_genes_dict[sg] = [gene_name]
            if sg not in sgNames:
                sgNames.append(sg)
                sgList.append(sg)
    print(bottemsUpAlgorithm.call_it_all(sgList, sgNames, sg_genes_dict))
def call_it_all_wighted(genesList, genesNames, input_sg_genes_dict,
                        input_genes_sg_dict, Omega, protodist_outfile,
                        pylip_temps_path):
    upgmaTree, distance_matrix = return_UPGMA(
        genesList, genesNames, protodist_outfile,
        pylip_temps_path)  #to uncomment when using wighted
    bottemsUpAlgorithm.fill_leaves_sets(
        upgmaTree
    )  # as apposed to the intermediate algorithem, here leaves are genes
    fill_sg_genes_dict(input_sg_genes_dict)
    fill_genes_sg_dict(input_genes_sg_dict)
    #making the sgList for Algorithm B:
    sgList = list(input_sg_genes_dict.keys())
    sgNames = copy.deepcopy(sgList)
    best_permutations_DS = bottemsUpAlgorithm.call_it_all(
        sgList, sgNames, input_sg_genes_dict, Omega, df_targets
    )  ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current

    best_permutations_DS.sort(
        key=lambda item: len(item[3]),
        reverse=True)  # or (len(item[2]) and item[1]))  . sort for the print

    res = find_w_set_cover(
        best_permutations_DS,
        distance_matrix)  ##if the output of the intermadiante is wanted

    return res
def bottem_up(node, current_sg_genes_dict, current_genes_sg_dict, sgList,
              sgNames, Omega):
    '''caling the buttoms up algorithem with a sg genes dict sutable for the subtree'''
    if node.colour == 'b':
        return
    ##making the genes_sg dict for this subtree and the sg_genes_dict to send to the intermadiate algorithm
    if not (current_sg_genes_dict):
        current_sg_genes_dict = {}
    if not (current_genes_sg_dict):
        current_genes_sg_dict = {}
    if not (sgList):
        sgList = []
    if not (sgNames):
        sgNames = []
    for leaf in node.leaves_DS:  ##leaf here is a gene. taking only the relevant genes
        current_genes_sg_dict[leaf.name] = genes_sg_dict[leaf.name]
        ##filling the sg genes dict
        for sg in current_genes_sg_dict[leaf.name]:
            current_sg_genes_dict[sg] = sg_genes_dict[
                sg]  ##the checking if this sg is already in the dict just be more expensive overall
            if sg not in sgList:
                sgList.append(sg)
                sgNames.append(sg)

    ##second, find the key sequence##
    current_res = None
    if len(current_genes_sg_dict) < 2:  #only one gene

        current_best_perm, lowest_of_widest = bottemsUpAlgorithm.find_best_sg_for_single_gene(
            leaf.name,
            sgList)  #lowest_of_widest is not in use in this function
    else:
        #get the set cover from the bottem up algorithm
        current_res = bottemsUpAlgorithm.call_it_all(
            sgList, sgNames, current_sg_genes_dict, Omega
        )  ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current_genes_sg_dict, Omega) #current best perm is a tuple with the perm and metedata of this perm
        current_best_perm = current_res[
            0]  #the best sg at the current set cover
        if current_res == None:
            return
    global best_permutations_DS
    if (current_res):
        #remove unnided candidates from the current_res
        remove_unrelevant_candidates(current_res)
        #continue
        best_permutations_DS += current_res  ##if there is set cover also at the bottemsUpAlgorithm
    else:
        best_permutations_DS += [current_best_perm]
    node.set_colour('b')
    ##continue up##
    if (node.parent) and not (stopping_condition(current_best_perm)):
        bottem_up(node.parent, current_sg_genes_dict, current_genes_sg_dict,
                  sgList, sgNames,
                  Omega)  ##this line is adopted to the rapper algorithm
def call_using_CasSites(dirp,
                        Omega=0.11,
                        min_length=20,
                        max_length=20,
                        start_with_G=False,
                        on_redundant=False,
                        redundant_genes=[]):
    '''in dirp will be a list files. In each there will be a sequences in FASTA format'''
    #genes_list = []
    genes_sg_dict = {}
    sg_genes_dict = {}
    sgNames = []
    sgList = []
    for p in os.listdir(dirp):
        #if p[-11:-1]== "RNAfile.tx":  ##g gene file
        if "RNAfile.tx" in p:
            gene_name = p.split(".txt")[0]
            #print(gene_name)
            if (on_redundant):
                if gene_name not in redundant_genes:
                    continue
            f = open(dirp + "\\" + p)
            next(f)
            gene = f.read()
            gene.replace('/n', '')
            #oledr version:
            #for line in f: #only 1 line left
            #genes_list.append(gene_name,line)
            #	gene = line.rstrip()
            f.close()
            genes_sg_dict[gene_name] = CasSites.get_sites(
                gene, min_length, max_length, start_with_G)
            for sg in genes_sg_dict[gene_name]:
                if sg in sg_genes_dict:
                    sg_genes_dict[sg] = sg_genes_dict[sg] + [gene_name]
                else:
                    sg_genes_dict[sg] = [gene_name]
                if sg not in sgNames:
                    sgNames.append(sg)
                    sgList.append(sg)

    return (bottemsUpAlgorithm.call_it_all(sgList, sgNames, sg_genes_dict,
                                           Omega))
def call_it_all(genesList,
                genesNames,
                input_sg_genes_dict,
                input_genes_sg_dict,
                Omega,
                protodist_outfile,
                pylip_temps_path,
                df_targets,
                cfd_dict=None,
                PS_number=12):
    fill_sg_genes_dict(input_sg_genes_dict)
    fill_genes_sg_dict(input_genes_sg_dict)
    sgList = list(input_sg_genes_dict.keys())
    sgNames = copy.deepcopy(sgList)
    best_permutations_DS = bottemsUpAlgorithm.call_it_all(
        sgList, sgNames, input_sg_genes_dict, Omega, df_targets, cfd_dict,
        PS_number
    )  ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current

    best_permutations_DS.sort(
        key=lambda item: item.cut_expectation,
        reverse=True)  # or (len(item[2]) and item[1]))  . sort for the print

    return best_permutations_DS
Ejemplo n.º 6
0
def E_top_down(
    res,
    node,
    Omega,
    sg_genes_dict,
    df_targets,
    internal_node_candidates=10,
    cfd_dict=None,
    PS_number=12
):  #(res, node, current_sg_genes_dict, current_genes_sg_dict, sgList, sgNames, Omega, df_targets)
    '''
	:param node:
	:param current_genes_sg_dict:
	:param Omega: can be removed already??
	:return:
	'''
    #if len(node.polymorphic_sites_set) < 11: #change to 12!
    ##making the genes_sg dict for this subtree and the sg_genes_dict to send to the intermadiate algorithm
    current_sg_genes_dict = dict()
    current_genes_sg_dict = dict()
    sgList = list()
    sgNames = list()
    for leaf in node.node_targets_DS:  ##leaf here is a gene. taking only the relevant genes
        current_genes_sg_dict[leaf.name] = genes_sg_dict[leaf.name]
        ##filling the sg genes dict
        for sg in current_genes_sg_dict[leaf.name]:
            #current_sg_genes_dict[sg] = sg_genes_dict[sg] ###here is the abnormality!! ##the checking if this sg is already in the dict just be more expensive overall

            #will the folowing be clearer?
            #untab the folowing 3 lines:
            if sg in current_sg_genes_dict:
                current_sg_genes_dict[sg] = current_sg_genes_dict[sg] + [
                    leaf.name
                ]
            else:
                current_sg_genes_dict[sg] = [
                    leaf.name
                ]  ###here is the abnormality!! ##the checking if this sg is already in the dict just be more expensive overall

            if sg not in sgList:
                sgList.append(sg)
                sgNames.append(sg)
    if len(node.node_targets_DS) < 11 and len(
            node.node_targets_DS
    ) > 1:  #should change the first parameter to at least 10
        best_permutations_DS = bottemsUpAlgorithm.call_it_all(
            sgList, sgNames, current_sg_genes_dict, Omega, df_targets,
            cfd_dict, PS_number
        )  ##call_it_all(sgList, sgNames, input_sg_genes_dict, Omega)## Naive.find_Uno_sgRNA(current)
        if not (best_permutations_DS):
            return

        best_permutations_DS.sort(key=lambda item: item.cut_expectation,
                                  reverse=True)
        current_best_perm = best_permutations_DS[:
                                                 internal_node_candidates]  #the best sg at the current set cover
        res.append(
            Subgroup_res(get_genes_list(best_permutations_DS),
                         current_best_perm, node.name))

    #else:
    if not node.clades:
        return
    if node.clades[0]:
        E_top_down(res, node.clades[0], Omega, sg_genes_dict, df_targets,
                   internal_node_candidates, cfd_dict, PS_number)
    if node.clades[1]:
        E_top_down(res, node.clades[1], Omega, sg_genes_dict, df_targets,
                   internal_node_candidates, cfd_dict, PS_number)