コード例 #1
ファイル: Voter.py プロジェクト: mbardoe/VotingSIm
def main():
    trumpRace = {
        'white': .5,
        'African American': 0.01,
        'Hispanic': 0.01,
        'Asian': 0.1,
        'Other': 0.2
    HillRace = {
        'white': .2,
        'African American': 0.5,
        'Hispanic': 0.5,
        'Asian': 0.1,
        'Other': 0.2
    c = Candidate.Candidate("Trump", .7, trumpRace)
    d = Candidate.Candidate("Hilary", .4, HillRace)
    ##vp=VotingProfile.VotingProfile([c,d], [.3,.7], [.1, .2], [.4,.6],[.1, .1], .2)
    ##print vp.__reNorm__([1,2,3])
    reg = Region.Region("Region 1", 30, [.3, .3, .2, .1, .1], 45.0, 8.0, .5,
                        .05, [c, d], [.3, .7], [.1, .1], [.4, .6], [.1, .1],
                        .2, [.8, .2])
    voter = Voter(reg, "white", 45, .8, .5, .6, [.3, .7], .8, False)
    print voter
コード例 #2
ファイル: LP.py プロジェクト: MayroseLab/MulticrisprScripts
def testCplexParliminaries():
    genes_list = [[
    ]]  #one gene for now
    candidates_list = [
    Crispys_CplexModel(candidates_list, genes_list, cfd_funct, 1)
コード例 #3
    def parse_file(self):
        for idx, line in enumerate(self.file):
            if (idx == 0):
                num_candidates = int(line)
            elif (idx > 0 and idx <= num_candidates):
                candidate_data = line.split(',')
                candidate_number = int(candidate_data[0])
                candidate_name = candidate_data[1].strip()
                candidate = Candidate.Candidate(candidate_number, candidate_name)
            elif (idx == num_candidates + 1):
                meta_election_info = line.split(',')
                number_of_voters = int(meta_election_info[0])
                sum_of_vote_count = int(meta_election_info[1])
                number_of_unique_orders = int(meta_election_info[2])
                self.election.init_meta_info(number_of_voters, sum_of_vote_count, number_of_unique_orders)
                vote_info = line.split(',', 1)
                count = int(vote_info[0])

                # clean the preferences
                raw_pref_string = vote_info[1].strip()

                if ('{' in raw_pref_string and '}' in raw_pref_string):
                    raw_pref_string = self.remove_indifferent_votes(raw_pref_string)

                preference_list = raw_pref_string.split(',')
                # convert preference string into ints
                preference_list = list(map(int, preference_list))
                vote = Vote.Vote(count, preference_list)
コード例 #4
def pitch_estimate(audio_file, config):
    file_name = audio_file.name
    audio_path = audio_file.path
    ops_output_path = config.ops_output_path
    ops_config_path = config.ops_config_path
    feature_path = ops_output_path + file_name + '.csv'

    # openSmile pitch estimation
    if not os.path.isfile(feature_path):
        Smile.extract_ops_features(file_name, audio_path, ops_config_path,

    # read csv file created by openSmile
    with open(feature_path, 'rb') as csvfile:
        reader = csv.reader(csvfile, delimiter=';', quotechar='|')

        # interpret pitch
        pitch_values = []
        first = True
        # for each pitch-value in time-window
        for row in reader:
            # ignore first row in reader
            if not first:
                pitch_value = int(round(float(row[2])))
            first = False
    pitch = Candidate.find_pitch_candidate(pitch_values, config.min_freq)
    return pitch
コード例 #5
def create_a_new_candidate_and_fill_fields(current_seq, genes_sg_dict, df,
    genes_score_dict = {}
    targets_dict = {}
    number_of_node_genes = len(genes_sg_dict)
    for gene_name, genes_targets_list in genes_sg_dict.items():
        prob_gene_will_not_cut = 1  ##eazier to calculate
        list_of_targets = [
        ]  ##for later knowing where the perm might cut in each gene
        for target in genes_targets_list:  ##sg_lst_of_gene: list of the sg of the gene
            distance_candidate_target = df(current_seq, target)
            candidate_target_cut_prob = 1 - distance_candidate_target  ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far
            if not Stage2.stopping_condition(candidate_target_cut_prob, Omega):
                prob_gene_will_not_cut = prob_gene_will_not_cut * (
                    1 - candidate_target_cut_prob
                )  #lowering the not cut prob in each sgRNA
                #for each candidate, find the probability to cut in the gene with the lowest probability to be cut
        prob_gene_cut = 1 - prob_gene_will_not_cut
        if prob_gene_cut > Omega:
            genes_score_dict[gene_name] = prob_gene_cut
        fraction_of_cut = len(genes_score_dict) / number_of_node_genes
        if not len(list_of_targets) == 0:
            match_sites_dict_value = []
            for target_site in list_of_targets:
                    two_sequs_differeces(current_seq, target_site)
            targets_dict[gene_name] = match_sites_dict_value
    res = Candidate.Candidate(current_seq, fraction_of_cut, prob_gene_cut,
                              genes_score_dict, targets_dict)
    return res
コード例 #6
def find_best_sg_for_single_gene_naiveMC_returns_single(gene_name, sg_lst):
    ''' the older version, sutable for when naive didn't make set cover
	:param current_genes_sg_dict: a dictionary with only on key
	:return: current_best_perm, lowest_widest_prob. current_best_perm is of the form: (max_seq, fraction genes being cut among all the genes, probability to cut all the genes in genes list, genes_list, match_sites_list]), lowest_widest_prob
    return Candidate.Candidate(sg_lst[0], 1, {gene_name: 1}, {gene_name: []})
コード例 #7
def find_set_cover(best_permutations_DS,
    '''for now, might won't work in a case when there is a gene that isn't covered by any of the permutations in the best_permutations_DS. not finished. can make it more readble'''
    temp_best_perm_DS = copy.copy(best_permutations_DS)
    res = list()  #[temp_best_perm_DS[0]]
    if genes_sg_dict:
        for gene, targets in genes_sg_dict.items():
            if len(targets) == 0:
                print("no targets for gene " + gene)
            c = Candidate.Candidate(targets[0])

    uncovered_genes = set()
    for sg, genesLst in sg_genes_dict.items():
        for gene in genesLst:
    while (len(uncovered_genes)) > 0 and len(temp_best_perm_DS) > 0:
        #for gene in uncovered_genes:
        ##going over all the permutations, and return the permutation that cover the maximal amount of genes haven't been covered yet, in the highest probability among the maximal covered permutations
        #print('len uncovered genes', len(uncovered_genes))
        best_current_perm, best_num_of_coverd, best_prob_of_covered = None, 0, 0  #best_current_perm is the hole tuple
        i = 0
        while i < (len(temp_best_perm_DS)):
            new_genes_coverd = list()  #0
            for gene, score in temp_best_perm_DS[i].genes_score_dict.items():
                if gene in uncovered_genes and score >= thr:
            if len(new_genes_coverd) == 0:
                i += 1
                #del temp_best_perm_DS[i]
            elif len(
            ) >= best_num_of_coverd:  ## and temp_best_perm_DS[i][2] > best_prob_of_covered:  ##need to check if 2 is the right index, and not 1.
                if len(
                ) > best_num_of_coverd or prob_cover > best_prob_of_covered:  # cover more gene or cover the same amount with greater prob.
                    prob_cover = prob_cover_genes_lst(temp_best_perm_DS[i],
                    #if prob_cover > best_prob_of_covered:
                    best_num_of_coverd, best_prob_of_covered = len(
                        new_genes_coverd), prob_cover
                    best_current_perm = temp_best_perm_DS[i]
            i += 1
        if (best_current_perm):
            for gene, score in best_current_perm.genes_score_dict.items():
                if gene in uncovered_genes and score >= thr:  #there is a probability that this gene had already been covered bya prevuis sgRNA
    return res
コード例 #8
def find_best_sg_for_single_gene(gene_name, sg_lst):
	:param current_genes_sg_dict: a dictionary with only on key
	:return: current_best_perm, lowest_widest_prob. current_best_perm is of the form: (max_seq, fraction genes being cut among all the genes, probability to cut all the genes in genes list, genes_list, match_sites_list]), lowest_widest_prob
    return [
        Candidate.Candidate(sg_lst[0], 1, {gene_name: 1},
                            {gene_name: [[sg_lst[0], {}]]})
def initializeCandidates():
    candict = {'Rick Santorum': ['Santorum', '#teamsantorum', '#santorum'],
               'Ron Paul': ['Ron Paul', '#ronpaul2012'],
               'Newt Gingrich': ['Gingrich', '#withnewt'],
               'Mitt Romney': ['Romney', '#mitt2012', '#mittromney']
    candidates = list()
    for name in candict.keys():
        candidates.append(Candidate.makeCandidate(name, candict[name]))
    return candidates
コード例 #10
def main():
    trumpRace = {
        'white': .5,
        'African American': 0.01,
        'Hispanic': 0.01,
        'Asian': 0.1,
        'Other': 0.2
    HillRace = {
        'white': .2,
        'African American': 0.5,
        'Hispanic': 0.5,
        'Asian': 0.1,
        'Other': 0.2
    c = Candidate.Candidate("Trump", .7, trumpRace)
    d = Candidate.Candidate("Hilary", .4, HillRace)

    vp = VotingProfile([c, d], [.3, .7], [.1, .2], .4, .1, .2, [.2, .8])
    print vp.__reNorm__([1, 2, 3])
コード例 #11
def call_CplexCovers(list_of_candidates,

    list_of_candidates, genes_lst = pickle.load(open(list_of_candidates,
                                                     'rb')), pickle.load(
                                                         open(genes_lst, 'rb'))
    genes_sg_dict, sg_genes_dict = pickle.load(open(
        genes_sg_dict, 'rb')), pickle.load(open(sg_genes_dict, 'rb'))
    #if len(list_of_candidates) < 100: #for debuging
    #	print("less than 20 sgRNAs")
    #	return
    if len(list_of_candidates) > 999 - len(genes_lst):
        list_of_candidates = list_of_candidates[:999 - len(genes_lst)]
    #add a representor to each of the genes, to avoid the case of no answer from CPLEX
    if genes_sg_dict:
        for gene, targets in genes_sg_dict.items():
            if len(targets) == 0:
                print("no targets for gene " + gene)
            c = Candidate.Candidate(targets[0])
        if method == "SC":
            cplex_problem_object = CplexCovers.CplexSetCover(
                list_of_candidates, genes_lst, thr)
        elif method == "F_SC":
            cplex_problem_object = CplexCovers.Cplex_fuzzy_set_cover(
                list_of_candidates, genes_lst, thr)
        elif 'BC' in method:
            method = method.split('_')
            cover_size = int(method[1])
            best_score, group = bounded_cover(list_of_candidates, genes_lst,
                                              cover_size, thr)
            return (best_score, group)

            print("which method?")
    except CplexError as exc:
        print("no solution")

    return cover_from_cplex_promblem_obj(list_of_candidates,
コード例 #12
    def generateNewPopulation(self):
        offsprings = []
        N = (N_percent / 100) * Popln_Size
        num_offsprings = 0

        # Repeat until we have N offsprings
        while num_offsprings < N:
            # Select parents via specified selection mech.
            if Selection == 'RW':
                parent1 = self.rouletteWheelSelection()
                parent2 = self.rouletteWheelSelection()
            else:  # 'TS'
                parent1, parent2 = self.tournamentSelection()

            # Create offspring via crossover
            offspring = Candidate(parent1.crossover(parent2),

            # Mutate if required
            if (random.random() < (Mutation_percent / 100)):

            # print("parent1", parent1)
            # print("parent2", parent2)
            # print("offspring", offspring)

            num_offsprings += 1

        # Replace N weakest candidates from the population with offsprings

        # Sort new population by fitness
        self.population = self.sortCandidatesByFitness(self.population)
コード例 #13
ファイル: Stage2.py プロジェクト: shiranab/CRISPys
def fill_leaves_sets(tree, sg_genes_dict):
    '''this version is not competable to genes tree.
	can be combine with fill_distance_from_leaves_function'''
    ##fill the first line of nodes
    for leaf in tree.leaves_DS:  ##node_targets_DS is a python array
        current_candidate = Candidate.Candidate(leaf.name)
        )  #sg_genes_dict[leaf.name] is a list of genes which this target site is on
        leaf.candidates_DS[leaf.name] = current_candidate
        node = leaf
        while (node.parent):
            for leaf in node.node_targets_DS:
                if leaf not in node.parent.node_targets_DS:
            node = node.parent
コード例 #14
def ExtractFeatureWords(filename):
    result = Reader.readfile(filename)
    result_split = Segmentation.SplitCluster(result)
    delete_result = Candidate.DeleteRepetition(result_split)
    candidate = Candidate.BuildClass(delete_result)
    Candidate.CalLenScore(candidate, 2, 15)
    Candidate.CalSupScore(candidate, result_split)
    Candidate.CalPosScore(candidate, result_split)
    for i in range(len(candidate)):
        for j in range(len(candidate[i])):
    candidate_list = Candidate.GenCandidateList(candidate)
    sorted_candidate_list = Candidate.CandidateListSort(candidate_list)

    # extracted_word = Candidate.CutByRank(sorted_candidate_list,0.5)
    # PrintExtractedWord(extracted_word)

    extracted_word = Candidate.CutByScore(sorted_candidate_list, 2.9)
    # PrintExtractedWord(extracted_word)

    # extracted_word = Candidate.CutByRankAndScore(sorted_candidate_list,0.05,2.9)
    # PrintExtractedWord(extracted_word)

    extracted_result = Candidate.ExtractedWordDeleteRepetition(extracted_word)
    # PrintExtractedResult(extracted_result)

    # print("")
    # print("")
    # print("")
    # print("")
    # extracted_word = Candidate.CutByScore(sorted_candidate_list,2.0)
    # print("the result of cut by score: ")
    # for i in range(len(extracted_word)):
    #     print("")
    #     for j in range(len(extracted_word[i])):
    #         print(extracted_word[i][j])
    return extracted_result
コード例 #15
ファイル: Stage2.py プロジェクト: shiranab/CRISPys
def call_it_all(sgList,
    best_permutations_DS = []
    if len(sgList) == 1:
        print("only one sgRNA in the group")
        genes = input_sg_genes_dict[sgList[0]]
        c = Candidate.Candidate(sgList[0])
        upgmaTree = return_upgma(sgList, sgNames, df_targets, cfd_dict)
        fill_leaves_sets(upgmaTree, input_sg_genes_dict)
        top_down(best_permutations_DS, upgmaTree.root, Omega,
                 input_sg_genes_dict, df_targets, cfd_dict, PS_number)
    return best_permutations_DS
コード例 #16
def fill_leaves_sets_Genes_tree_as_well(tree, sg_genes_dict, genes_tree=False):
    '''can be combine with fill_distance_from_leaves_function'''
    ##fill the first line of nodes
    for leaf in tree.leaves_DS:  ##node_targets_DS is a python array
        if not (genes_tree):
            )  #sg_genes_dict[leaf.name] is a list of genes which this target site is on
            current_candidate = Candidate.Candidate(leaf.name)
            leaf.candidates_DS[leaf.name] = current_candidate
            #'node_targets_DS' will be used to hold the genes; it is set to an empty list when the node is cunstracted. Maybe if this algorithm will be really bottems up, it will changed.

        node = leaf
        while (node.parent):
            for leaf in node.node_targets_DS:
                if leaf not in node.parent.node_targets_DS:
            node = node.parent
コード例 #17
def create_a_new_candidate_and_fill_fields(current_seq, genes_sg_dict, df,
    #next stage of work: add lowest_cut_site_prob
    genes_score_dict = {}
    targets_dict = {}
    number_of_node_genes = len(genes_sg_dict)
    for gene_name, genes_targets_list in genes_sg_dict.items():
        prob_gene_will_not_cut = 1  ##eazier to calculate
        list_of_targets = [
        ]  ##for later knowing where the perm might cut in each gene
        for target in genes_targets_list:  ##sg_lst_of_gene: list of the sg of the gene
            distance_candidate_target = df(current_seq, target)
            ##the old one: sg_cut_prob = 1/(distance_perm_sg+1) ##assuming distance of 1 is 100% cut. a lot of heuristics in this line: to be changed
            candidate_target_cut_prob = 1 - distance_candidate_target  ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far
            #real sg_prob : left for later
            # to change this line so it will be compatible with the Bottem up stopping condition
            #if candidate_target_cut_prob > Omega /5: #want to add only the sagnificants sg to this list. might need to change the thr. should be a thr that is not higher than the on in the stopping condition
            if not BU.stopping_condition(candidate_target_cut_prob, Omega):
                prob_gene_will_not_cut = prob_gene_will_not_cut * (
                    1 - candidate_target_cut_prob
                )  #lowering the not cut prob in each sgRNA
                #for each permutation, find the probability to cut in the gene with the lowest probability to be cut
        prob_gene_cut = 1 - prob_gene_will_not_cut
        if prob_gene_cut > Omega:
            genes_score_dict[gene_name] = prob_gene_cut
        fraction_of_cut = len(genes_score_dict) / number_of_node_genes
        #make the match_site_dict
        if not len(list_of_targets) == 0:
            match_sites_dict_value = []
            for target_site in list_of_targets:
                    two_sequs_differeces(current_seq, target_site)
            targets_dict[gene_name] = match_sites_dict_value
    res = Candidate.Candidate(current_seq, fraction_of_cut, prob_gene_cut,
                              genes_score_dict, targets_dict)
    return res
コード例 #18
                    help='Enter a type of values to edit your CV (n/e/exp)')
                    help='Enter the link for your social account')
result = parser.parse_args()

if (result.username and result.password and (result.register) == 'c'):
    Users.RegistrationC(result.username, result.password)
elif (result.username and result.password and (result.register) == 'hr'):
    Users.RegistrationHR(result.username, result.password)
elif (result.username and result.password and result.type):
    Users.Login(result.username, result.password, result.type)

elif (Users.CheckIfLoginHR(result.username)):
    if (result.search):
    elif (result.id and result.status):
        HR.UpdateStatus(result.id, result.status)
    elif (result.id and result.note):
        HR.AddNotes(result.id, result.note)

elif (Users.CheckIfLoginC(result.username)):
    if (result.name and result.education and result.pic and result.id
            and result.exp):
        Candidate.CreateCV(result.name, result.education, result.pic,
                           result.id, result.exp, result.sLink)
    elif (result.id and result.value and result.option):
        Candidate.EditCV(result.id, result.value, result.option)
    elif (result.id):
コード例 #19
def main():
    trumpRace = {
        'White': .3,
        'African American': 0.01,
        'Hispanic': 0.01,
        'Asian': 0.1,
        'Other': 0.2
    HillRace = {
        'White': .2,
        'African American': 0.35,
        'Hispanic': 0.35,
        'Asian': 0.1,
        'Other': 0.2
    CruzRace = {
        'White': .3,
        'African American': 0.1,
        'Hispanic': 0.3,
        'Asian': 0.1,
        'Other': 0.2
    KasichRace = {
        'White': .3,
        'African American': 0.1,
        'Hispanic': 0.1,
        'Asian': 0.1,
        'Other': 0.2
    BernieRace = {
        'White': .3,
        'African American': 0.2,
        'Hispanic': 0.3,
        'Asian': 0.3,
        'Other': 0.3
    c = Candidate.Candidate("Trump", .7, trumpRace)
    d = Candidate.Candidate("Hillary", .4, HillRace)
    e = Candidate.Candidate("Cruz", .8, CruzRace)
    f = Candidate.Candidate("Kasich", .55, KasichRace)
    g = Candidate.Candidate("Sanders", .2, BernieRace)
    candidates = [c, d, e, f, g]

    for candidate in candidates:
        pickle.dump(candidate, open(candidate.name + ".cand", "wb"))

    r1 = Region.Region(
        [.4, .2, .1, .1, .05],  ## race breakdown
        50,  ## avg Age
        10,  ## variation in age
        .6,  ## prob voting
        .1,  ## variation in voting
        candidates,  ## candidates
        [1.8, 1.8, .4, .1, .3],  ## candidate pref
        [.4, .4, .2, .01, .07],  ## variation in candidate pref
        .6,  # spectrum
        .07,  # variation in spectrum
        .1,  # immigrant prob
        [.05, .4, .4, .2, .2])  ## immigrant pref)

    r2 = Region.Region(
        [.3, .3, .2, .1, .1],  ## race breakdown
        40,  ## avg Age
        7,  ## variation in age
        .5,  ## prob voting
        .1,  ## variation in voting
        candidates,  ## candidates
        [.3, .4, .4, .2, .6],  ## candidate pref
        [.1, .1, .1, .1, .2],  ## variation in candidate pref
        .4,  # spectrum
        .15,  # variation in spectrum
        .2,  # immigrant prob
        [.05, .4, .4, .2, .2])  ## immigrant pref)
    r3 = Region.Region(
        [.4, .1, .3, .4, .1],  ## race breakdown
        45,  ## avg Age
        3,  ## variation in age
        .54,  ## prob voting
        .1,  ## variation in voting
        candidates,  ## candidates
        [.2, .4, .5, .2, .4],  ## candidate pref
        [.05, .1, .1, .05, .1],  ## variation in candidate pref
        .5,  # spectrum
        .2,  # variation in spectrum
        .3,  # immigrant prob
        [.05, .4, .4, .2, .2])  ## immigrant pref)
    r4 = Region.Region(
        [.1, .6, .1, .2, .3],  ## race breakdown
        4,  ## avg Age
        3,  ## variation in age
        .64,  ## prob voting
        .1,  ## variation in voting
        candidates,  ## candidates
        [.1, .4, .3, .2, .4],  ## candidate pref
        [.03, .1, .1, .05, .1],  ## variation in candidate pref
        .5,  # spectrum
        .2,  # variation in spectrum
        .1,  # immigrant prob
        [.05, .4, .4, .2, .2])  ## immigrant pref)
    regions = [r1, r2, r3, r4]

    for region in regions:
        pickle.dump(region, open(region.name + ".rgn", "wb"))
コード例 #20
def return_candidates(list_of_targets,
    dict_of_different_places = wheres_the_differences_linear(
        list_of_targets, df == Metric.CRISTA
    )  ##node_targets_DS is a python array. where_the_differences.
    node.polymorphic_sites = dict_of_different_places
    #list_of_different_places = list(node.polymorphic_sites)
    if len(dict_of_different_places) > 12:
        return None
    list_of_different_places = list(dict_of_different_places.items())
    list_of_different_places.sort(key=lambda item: item[0])
    ##going over all the permutations
    list_of_perms_sequs = all_perms(initial_seq, None,
    perm_grades = [
    ]  #a list of tuples: (candidate_str,fraction_of_cut, cut_expectation, genes_list)
    #find for all permutation which genes it cut
    widest_perm_prob = 0  ## for the stopping condition. the permutation with the highst propobility to cut all the sgRNA, without considering Omega.
    lowest_of_widest_perm = 0  ## the probability the widest candidate_str will cut in the lowest cut probability sgRNA for this widest candidate_str.
    for candidate_str in list_of_perms_sequs:
        targets_dict = {
        }  # a list of tuples: (gene name, list of target of this gene that might be cut by the candidate_str)
        wide_perm_prob = 1
        lowest_of_wide_perm = 1
        genes_covering = [
        ]  #a list of tuples: (gene name, probability to be cut).
        for gene, targets_lst_of_gene in genes_sg_dict.items(
        ):  ##find out if this gene i couched by the sgRNA seq
            prob_gene_will_not_cut = 1  ##eazier to calculate
            list_of_targets = [
            ]  ##for later knowing where the candidate_str might cut in each gene
            num_of_cuts_per_gene = 0  #in use only in the single gene version
            for target in targets_lst_of_gene:  ##targets_lst_of_gene: list of the target of the gene
                if df == Metric.CRISTA:
                    #distance_candidate_target = df(candidate_str[3:-6], target)
                    distance_candidate_target = df(candidate_str[3:-6], target)

                    distance_candidate_target = df(candidate_str, target,
                ##the old one: candidate_cut_prob = 1/(distance_candidate_target+1) ##assuming distance of 1 is 100% cut. a lot of heuristics in this line: to be changed
                if distance_candidate_target == 1:  #this line was changed resenetly.. it used to be "if distance_candidate_target == 0:"
                candidate_cut_prob = 1.0 - distance_candidate_target  ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far
                #real sg_prob : left for later
                #change this line
                #if candidate_cut_prob > Omega /5: #want to add only the sagnificants target to this list. might need to change the thr. should be a thr that is not higher than the on in the stopping condition
                #if not BU.stopping_condition(candidate_cut_prob, Omega):
                sg_site_differents = two_sequs_differeces(
                    candidate_str, target)
                #perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents]
                list_of_targets.append([target, sg_site_differents])
                prob_gene_will_not_cut = prob_gene_will_not_cut * (
                    1 - candidate_cut_prob
                )  #lowering the not cut prob in each sgRNA
                num_of_cuts_per_gene += candidate_cut_prob
                #for each permutation, find the probability to cut in the gene with the lowest probability to be cut
                #if lowest_of_wide_perm > candidate_cut_prob:
                #    lowest_of_wide_perm = candidate_cut_prob

            prob_gene_cut = 1 - prob_gene_will_not_cut
            if len(list_of_targets) > 0:
                    gene] = list_of_targets  #targets of this gene to be cleaved by the current candidate

            if (for_single_gene):
                genes_covering.append((gene, num_of_cuts_per_gene))
                genes_covering.append((gene, prob_gene_cut))
            #wide_perm_prob *= prob_gene_cut  ##for the lowest of widest

        #num_of_cut = 0
        #wont_cut_prob = 1  #the probability the permutationed sequence will not cut all of the genes, that the probability each of them will be cut is greater then Omega
        cut_expection = 0.0  ##the probability the permutationed sequence will cut all of the genes, that the probability each of them will be cut is greater then Omega
        #genes_list = []  # a list of genes considered cut by this sequence
        genes_score_dict = {
        }  # a dict of genes: genes considered cut by this sequence, and cut prob
        for tuple in genes_covering:  #tuple : (gene name, probability to be cut)
            #if tuple[1] >= Omega:
            #num_of_cut += 1
            cut_expection += tuple[1]  ## the prob to cut all the genes
            genes_score_dict[tuple[0]] = tuple[1]
        #cut_expectation = 1 - wont_cut_prob
        #fraction_of_cut = num_of_cut/len(genes_sg_dict)  #len(genes_sg_dict) == num of genes

        ##updating the targets dict##
        #for key in perm_grades[k].targets_dict.keys():  #for each gene in the the targets_dict - a list of lists. each sub list: [gene, list_of_targets]
        #    for j in range(len(perm_grades[k].targets_dict[key])): #for any match site of this gene from this specipic target. usually there is only one gene
        #       sg_site_differents = two_sequs_differeces(perm_grades[k].seq, perm_grades[k].targets_dict[key][j])
        #      perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents]

        if cut_expection >= 1:  #is this condition necessary?
            current_candidate = Candidate.Candidate(candidate_str,

    del list_of_perms_sequs
    #best_perms_DS = find_max(perm_grades, genes_sg_dict)  #after the set cover
    ##for finding where are the differences between the target and the DNA:

    #for k in range(len(perm_grades)): # a "best candidate_str" looks like this: (max_seq, max_fraction, max_cut_prob, genes_list, targets_dict])
    #    for key in perm_grades[k].targets_dict.keys():  #for each gene in the the targets_dict - a list of lists. each sub list: [gene, list_of_targets]
    #       for j in range(len(perm_grades[k].targets_dict[key])): #for any match site of this gene from this specipic target. usually there is only one gene
    #          sg_site_differents = two_sequs_differeces(perm_grades[k].seq, perm_grades[k].targets_dict[key][j])
    #         perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents]
    #return perm_grades, lowest_of_widest_perm, set()
    return perm_grades
コード例 #21
    def process(self, directory, output, feature_type, candidate_type, verbose,
                meta, arff):
        Processes pulsar candidates of the type specified by 'candidate_type'.
        Writes the features of each candidate found to a single file, 'output'.
        directory          -    the directory containing the candidates to process.
        output             -    the file to write the features to.
        feature_type       -    the type of features to generate.
                                feature_type = 1 generates 12 features from Eatough et al., MNRAS, 407, 4, 2010.
                                feature_type = 2 generates 22 features from Bates et al., MNRAS, 427, 2, 2012.
                                feature_type = 3 generates 22 features from Thornton, PhD Thesis, Univ. Manchester, 2013.
                                feature_type = 4 generates 6 features from Lee et al., MNRAS, 333, 1, 2013.
                                feature_type = 5 generates 6 features from Morello et al., MNRAS, 433, 2, 2014.
                                feature_type = 6 generates 8 features from Lyon et al.,2015.
                                feature_type = 7 obtains raw integrated (folded) profile data.
                                feature_type = 8 obtains raw DM-SNR Curve data.
        candidate_type     -    the type of candidate file being processed.
                                candidate_type = 1 assumes PHCX candidates output by the pipeline described by
                                                 Morello et al., MNRAS 443, 2, 2014.
                                candidate_type = 2 assumes gnuzipped ('.gz') PHCX candidates produced by the
                                                 pipeline described by Thornton., PhD Thesis, Univ. Manchester, 2013.
                                candidate_type = 3 assumes PFD files output by the LOTAAS and similar surveys in the
                                                 presto PFD format.
                                candidate_type = 4 assumes PHCX candidates output by the SKA SA pipeline.
        verbose            -    debug logging flag, if true output statements will be verbose.
        meta               -    a flag that when set to true, indicates that meta information will be retained
                                in the output files produced by this code. So if meta is set to true, then each line
                                of features will have the full path to the candidate they belong to included. Otherwise
                                they will not, making it hard to find which features belong to which candidate.
        arff               -    a flag that when set to true, indicates that meta output data will be written in ARFF format.

        # Used to monitor feature creation statistics.
        candidatesProcessed = 0
        successes = 0
        failures = 0

        print "\n\t*************************"
        print "\t| Searching Recursively |"
        print "\t*************************"

        # Check the type of candidate file used.
        if (candidate_type == 1):
            print "\tSearching for candidates with file extension: ", self.phcxRegex
            fileTypeRegexes = [self.phcxRegex]
        elif (candidate_type == 2):
            print "\tSearching for candidates with file extension: ", self.gzPhcxRegex
            fileTypeRegexes = [self.gzPhcxRegex]
        elif (candidate_type == 3):
            print "\tSearching for candidates with file extension: ", self.pfdRegex
            fileTypeRegexes = [self.pfdRegex]
        elif (candidate_type == 4):
            print "\tSearching for candidates with file extension: ", self.phcxRegex
            fileTypeRegexes = [self.phcxRegex]
            print "\tNo candidate file type provided, exiting..."

        print "\tSearching: ", directory

        start = datetime.datetime.now(
        )  # Used to measure feature generation time.

        # For each type of file this program recognises
        for filetype in fileTypeRegexes:

            # Loop through the specified directory
            for root, subFolders, filenames in os.walk(directory):

                # If the file type matches one of those this program recognises
                for filename in fnmatch.filter(filenames, filetype):

                    cand = os.path.join(
                        root, filename)  # Gets full path to the candidate.

                    # If the file does not have the expected suffix (file extension), skip to the next.
                    if (cand.endswith(filetype.replace("*", "")) == False):

                    candidatesProcessed += 1

                    if (candidatesProcessed %
                            10000 == 0):  # Every 10,000 candidates

                        # This 'if' statement is used to provide useful feedback on feature
                        # generation. But it is also used to write the features collected so far,
                        # to the output file at set intervals. This helps a) reduce memory load, and
                        # b) reduce disc load (by writing out lots of features in one go, as opposed
                        # to one by one).

                        print "\tCandidates processed: ", candidatesProcessed

                        # Write out the features collected so far.
                        outputText = ""
                        for s in self.featureStore:
                            outputText += s + "\n"

                            output, outputText
                        )  # Write all 10,000 entries to the output file.
                        self.featureStore = [
                        ]  # Clear the feature store, freeing up memory.


                        # Create the candidate object.
                        c = Candidate.Candidate(cand, str(directory + cand))

                        # Get the features from the candidate.
                        features = c.getFeatures(feature_type, candidate_type,

                        # If the user would like the output to be in ARFF format, then each candidate
                        # has to be associated with a label. Since this code cannot know the true label
                        # of a candidate, here the unknown label '?' is appended as a additional feature.
                        if (arff and feature_type > 0 and feature_type < 7):

                        # Store the features so it can later be written to the specified output file.
                        if (meta):
                            # Store with meta information - basically this means including the candidate
                            #                               name (full path) with each feature set. This means that
                            #                               each set of features will be linked to a candidate,
                            #                               useful for certain investigations (i.e. why a specific
                            #                               candidate achieved particular feature values).
                            self.storeFeatureMeta(cand, features)
                                cand, features)  # Store only the feature data.

                    except Exception as e:  # Catch *all* exceptions.
                        print "\tError reading candidate data :\n\t", sys.exc_info(
                        print self.format_exception(e)
                        print "\t", cand, " did not have features generated."
                        failures += 1

                    successes += 1

        # Save any remaining features, since its possible that some features
        # were not written to the output file in the loop above.

        if (len(self.featureStore) > 0):

            outputText = ""

            for s in self.featureStore:

                outputText += s + "\n"

            self.appendToFile(output, outputText)
            self.featureStore = []

        # Finally get the time that the procedure finished.
        end = datetime.datetime.now()

        # Output feature generation statistics.
        print "\tCompleted candidate search."

        print "\n\t******************************"
        print "\t| Feature Generation Results |"
        print "\t******************************"
        print "\tCandidates processed:\t", candidatesProcessed
        print "\tSuccesses:\t", successes
        print "\tFailures:\t", failures
        print "\tExecution time: ", str(end - start)
コード例 #22
def find_w_set_cover_heated(best_permutations_DS,
    '''for now, might won't work in a case when there is a gene that isn't covered by any of the permutations in the best_permutations_DS. not finished. can make it more readble'''
    temp_best_perm_DS = copy.copy(best_permutations_DS)
    res = list()  #[temp_best_perm_DS[0]]
    gene_names_lst = distance_matrix.names
    if genes_sg_dict:
        for gene, targets in genes_sg_dict.items():
            if len(targets) == 0:
                print("no targets for gene " + gene)
            c = Candidate.Candidate(targets[0])

    uncovered_genes = set()
    for sg, genesLst in sg_genes_dict.items():
        for gene in genesLst:
    while (len(uncovered_genes)) > 0 and len(temp_best_perm_DS) > 0:
        best_current_perm, best_num_of_coverd, best_prob_of_covered = None, 0, 0  #best_current_perm is the hole tuple
        best_w = len(uncovered_genes)
        i = 0
        while i < (len(temp_best_perm_DS)):
            #find utility of sgRNA
            new_genes_coverd = list()
            for gene, score in temp_best_perm_DS[i].genes_score_dict.items():
                if gene in uncovered_genes and score >= thr:
                        gene)  #new_genes_coverd.append((gene, score)) #
            if len(new_genes_coverd) == 0:
                i += 1
            #compute the weight
            price = 0  # the lower the price, the lighter the set
            deniminator = 0  # deniminator = len(new_genes_coveres)*(len(new_genes_coveres) - 1)
            for j in range(len(new_genes_coverd)):
                score_j = temp_best_perm_DS[i].genes_score_dict[
                for k in range(i, len(new_genes_coverd)):
                    score_k = temp_best_perm_DS[i].genes_score_dict[
                    curr_avg = (1 - score_j + 1 - score_k) / 2
                    deniminator += curr_avg
                    index_j, index_k = gene_names_lst.index(
                        new_genes_coverd[j]), gene_names_lst.index(
                    dist = distance_matrix[index_j, index_k]
                    price += (dist**alfa) * curr_avg
            if deniminator == 0:
                price = 1  # sure?
                price = price / deniminator
            w = price / len(new_genes_coverd)

            #del temp_best_perm_DS[i]
            #keep the best sgRNA in current iteration
            if w <= best_w:  ## and temp_best_perm_DS[i][2] > best_prob_of_covered:  ##need to check if 2 is the right index, and not 1.
                #if len(new_genes_coverd) > best_num_of_coverd or prob_cover > best_prob_of_covered: # cover more gene or cover the same amount with greater prob.
                prob_cover = prob_cover_genes_lst(temp_best_perm_DS[i],
                if prob_cover > best_prob_of_covered:
                    best_w, best_prob_of_covered = w, prob_cover
                    best_current_perm = temp_best_perm_DS[i]
            i += 1
        #add the best sgRNA to res
        if (best_current_perm):
            for gene, score in best_current_perm.genes_score_dict.items():
                if gene in uncovered_genes and score >= thr:  #there is a probability that this gene had already been covered bya prevuis sgRNA
    return res
コード例 #23
	def addCandidate(self, filename):
		filename - name of the Candidate file.

		Candidate file has the following minimum content:

			Period  P fP

		Where P is the candidate period, and fP is the fractional error.

		Optional lines are:

		Phase  ph  d_ph
		Width log10_w dlog10_w
		Acceleration  a d_a
		CircBinary log10_bp dlog10_bp log10_ba log10d_ba
		Scattering log10_s dlog10_s
		DM dm d_dm 

		In each case the parameter and desired perior is given, so that parameter is searched over x +/- dx
		self.Cand = Candidate.Candidate(filename)

		if(self.Cand.FitCircBinary == True):
			self.CosOrbit = gpuarray.empty(self.InterpBinarySteps+1, np.float64)
                        self.SinOrbit = gpuarray.empty(self.InterpBinarySteps+1, np.float64)

			self.CPUCosOrbit, self.CPUSinOrbit = self.KeplersOrbit(0)
			self.CosOrbit = gpuarray.to_gpu(np.float64(self.CPUCosOrbit))
                        self.SinOrbit = gpuarray.to_gpu(np.float64(self.CPUSinOrbit))

		if(self.Cand.FitEccBinary == True):

			print self.Cand.pmin[7], self.Cand.pmax[7]
			self.MinInterpEcc = self.Cand.pmin[7]
			self.MaxInterpEcc = self.Cand.pmax[7]
			self.InterpEccStepSize = 1
			self.NumInterpEccSteps = 1
			if(self.MaxInterpEcc - self.MinInterpEcc > 10.0**-10):
                                self.NumInterpEccSteps = 100
                                self.InterpEccStepSize = (self.MaxInterpEcc - self.MinInterpEcc)/self.NumInterpEccSteps

                        print "Interp details:", self.MinInterpEcc, self.MaxInterpEcc, 10.0**self.MinInterpEcc, 10.0**self.MaxInterpEcc, self.NumInterpEccSteps, self.InterpEccStepSize

			self.CosOrbit = []
			self.SinOrbit = []

			self.CPUCosOrbit = []
                        self.CPUSinOrbit = []

			for i in range(self.NumInterpEccSteps):

				Ecc = 10.0**(self.MinInterpEcc + i*self.InterpEccStepSize)
				print "Computing Ecc: ", i, self.MinInterpEcc + i*self.InterpEccStepSize, Ecc
				COrbit, SOrbit = self.KeplersOrbit(Ecc)


				self.CosOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64))
				self.SinOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64))
				self.CosOrbit[i]  = gpuarray.to_gpu(np.float64(self.CPUCosOrbit[i]))
				self.SinOrbit[i]  = gpuarray.to_gpu(np.float64(self.CPUSinOrbit[i]))

		if(self.Cand.FitGRBinary == True or self.Cand.FitPKBinary == True):

			print self.Cand.pmin[7], self.Cand.pmax[7]

                        self.MinInterpEcc = self.Cand.pmin[7]
                        self.MaxInterpEcc = self.Cand.pmax[7]

			self.NumInterpEccSteps = 1
			self.InterpEccStepSize = 1
			if(self.MaxInterpEcc - self.MinInterpEcc > 10.0**-10):
				self.NumInterpEccSteps = 100
	                        self.InterpEccStepSize = (self.MaxInterpEcc - self.MinInterpEcc)/self.NumInterpEccSteps

			print "Interp details:", self.MinInterpEcc, self.MaxInterpEcc, 10.0**self.MinInterpEcc, 10.0**self.MaxInterpEcc, self.NumInterpEccSteps, self.InterpEccStepSize

                        self.CosOrbit = []
                        self.SinOrbit = []
			self.TrueAnomaly = []

                        self.CPUCosOrbit = []
                        self.CPUSinOrbit = []
			self.CPUTrueAnomaly = []

                        for i in range(self.NumInterpEccSteps):

				Ecc = 10.0**(self.MinInterpEcc + i*self.InterpEccStepSize)

                                print "Computing Ecc: ", i, self.MinInterpEcc + i*self.InterpEccStepSize, Ecc
                                COrbit, SOrbit = self.KeplersOrbit(Ecc)


                                self.CosOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64))
                                self.SinOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64))

                                self.CosOrbit[i]  = gpuarray.to_gpu(np.float64(self.CPUCosOrbit[i]))
                                self.SinOrbit[i]  = gpuarray.to_gpu(np.float64(self.CPUSinOrbit[i]))

				#double sqr1me2 = sqrt(1-Ecc*Ecc);
				#double cume = CosBinarySignal-Ecc;
				#double onemecu = 1.0-Ecc*CosBinarySignal;

                        	#//double sae = sqr1me2*SinBinarySignal/onemecu;
                        	#//double cae = cume/onemecu;

                       	 	#double ae = TrueAnomaly; //atan2(sae, cae);
                       	 	#//ae = ae + 2*M_PI - trunc((ae+2*M_PI)/(2*M_PI))*2*M_PI;
				sae = np.sqrt(1.0 - Ecc*Ecc)*SOrbit/(1.0 - Ecc*COrbit)
				cae = (COrbit - Ecc)/(1.0 - Ecc*COrbit)
				self.CPUTrueAnomaly.append(np.arctan2(sae, cae)%(2*np.pi))

				self.TrueAnomaly.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64))
				self.TrueAnomaly[i] = gpuarray.to_gpu(np.float64(self.CPUTrueAnomaly[i]))
コード例 #24
def page_candidate():
    current_candidates = Candidate(serialize=False)
    return render_template('candidate.html',selected_menu_item='Candidate',candidates=current_candidates)
コード例 #25
 def initPopulation(self):
     popln = []
     for i in range(Popln_Size):
         popln.append(Candidate(num_games=Num_Games, max_moves=Max_Moves))
     return popln
コード例 #26
def return_candidates(list_of_targets,
    dict_of_different_places = wheres_the_differences_linear(
    )  ##node_targets_DS is a python array. where_the_differences.
    node.polymorphic_sites = dict_of_different_places
    #list_of_different_places = list(node.polymorphic_sites)
    if len(dict_of_different_places) > 12:  #change to 12
        return None
    list_of_different_places = list(dict_of_different_places.items())
    list_of_different_places.sort(key=lambda item: item[0])
    ##going over all the permutations
    list_of_perms_sequs = all_perms(initial_seq, None,
    perm_grades = [
    ]  #a list of tuples: (candidate_str,fraction_of_cut, cut_expectation, genes_list)
    for candidate_str in list_of_perms_sequs:
        targets_dict = {
        }  # a list of tuples: (gene name, list of target of this gene that might be cut by the candidate_str)
        genes_covering = [
        ]  #a list of tuples: (gene name, probability to be cut).
        for gene, targets_lst_of_gene in genes_sg_dict.items(
        ):  ##find out if this gene i couched by the sgRNA seq
            prob_gene_will_not_cut = 1  ##eazier to calculate
            list_of_targets = [
            ]  ##for later knowing where the candidate_str might cut in each gene
            num_of_cuts_per_gene = 0  #in use only in the single gene version
            for target in targets_lst_of_gene:  ##targets_lst_of_gene: list of the target of the gene
                distance_candidate_target = df(candidate_str, target, cfd_dict)
                candidate_cut_prob = 1 - distance_candidate_target  ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far
                sg_site_differents = two_sequs_differeces(
                    candidate_str, target)
                list_of_targets.append([target, sg_site_differents])
                prob_gene_will_not_cut = prob_gene_will_not_cut * (
                    1 - candidate_cut_prob
                )  #lowering the not cut prob in each sgRNA
                num_of_cuts_per_gene += candidate_cut_prob
            prob_gene_cut = 1 - prob_gene_will_not_cut
            if len(list_of_targets) > 0:
                    gene] = list_of_targets  #targets of this gene to be cleaved by the current candidate
            if (for_single_gene):
                genes_covering.append((gene, num_of_cuts_per_gene))
                genes_covering.append((gene, prob_gene_cut))
        cut_expection = 0  ##the probability the permutationed sequence will cut all of the genes, that the probability each of them will be cut is greater then Omega
        genes_score_dict = {
        }  # a dict of genes: genes considered cut by this sequence, and cut prob
        for tuple in genes_covering:  #tuple : (gene name, probability to be cut)
            cut_expection += tuple[1]  ## the prob to cut all the genes
            genes_score_dict[tuple[0]] = tuple[1]
        if cut_expection >= 1:  #is this condition necessary?
            current_candidate = Candidate.Candidate(candidate_str,
    del list_of_perms_sequs
    return perm_grades
コード例 #27
def main(args):
##      // There are four required command line arguments: p_graph (.1, .2, .3),
##      // p_malicious (.15, .30, .45), p_txDistribution (.01, .05, .10), 
##      // and numRounds (10, 20). You should try to test your CompliantNode
##      // code for all 3x3x3x2 = 54 combinations.
    numNodes = 100;
    p_graph = float(args[1]) # // parameter for random graph: 
                      # // prob. that an edge will exist
    p_malicious = float(args[2]) # // prob. that a node will be set to be malicious
    p_txDistribution = float(args[3])  #// probability of assigning an initial transaction to each node 
    numRounds = float(args[4])  #// number of simulation rounds your nodes will run for

 #     // pick which nodes are malicious and which are compliant
    nodes = [None for i in range(numNodes)]
    mal = 0
    com = 0
    for i in range(numNodes):
         if(random.random() < p_malicious):
 #           // When you are ready to try testing with malicious nodes, replace the
 #           // instantiation below with an instantiation of a MaliciousNode
            nodes[i] = MaliciousNode(p_graph, p_malicious, p_txDistribution, numRounds)
            mal = mal + 1
            nodes[i] = CompliantNode(p_graph, p_malicious, p_txDistribution, numRounds)
            com = com + 1
    print("{} malicious nodes and {} compliant nodes".format(mal,com))

    followees = [[1 if random.random() < p_graph and i!= j else 0 for i in range(numNodes)] for j in range(numNodes)]
    for i in range(numNodes):

##      // initialize a set of 500 valid Transactions with random ids
    numTx = 500
    validTxIds = []
    for i in range(numTx):

##      // distribute the 500 Transactions throughout the nodes, to initialize
##      // the starting state of Transactions each node has heard. The distribution
##      // is random with probability p_txDistribution for each Transaction-Node pair.

    for i in range(numNodes):
        pendingTransactions = []
        for txid in validTxIds:
          if (random.random() < p_txDistribution): #// p_txDistribution is .01, .05, or .10.

    numRounds = int(numRounds)
    for round in range(numRounds):
##         // gather all the proposals into a map. The key is the index of the node receiving
##         // proposals. The value is an List containing pairs. The first
##         // element is the id of the transaction being proposed and the second
##         // element is the index # of the node proposing the transaction.
         allProposals = {}
         ## allProposals = { idx1: [cand1, cand2, ...], idx2: [...] }

         for i in range(numNodes):
            proposals = nodes[i].sendToFollowers()
            for tx in proposals:
               if (tx not in validTxIds):
                  break  #// ensure that each tx is actually valid

               for j in range(numNodes):
                  if (not followees[j][i]):
                    break ## tx only matters if j follows i

                  if (j not in allProposals):
                    allProposals[j] = [] ## key: j; value: [list]

                  candidate = Candidate(tx, i)
##         // Distribute the Proposals to their intended recipients as Candidates
         for i in range(numNodes):
            if (i in allProposals):

            transactions = nodes[i].sendToFollowers()

            # save to file
            filename = "results/"+"round_"+str(round)+"_node_"+str(i)+".txt"
            f = open(filename,"w+")
            for tx in transactions:

 #     // print results
    for i in range(numNodes):
      transactions = nodes[i].sendToFollowers()
      print("Transaction ids that Node " + str(i) + " believes consensus on: ", end="")
      for tx in transactions:
         print("{} ".format(tx.id), end='')
      print("{} transactions".format(len(transactions)))