def main(): trumpRace = { 'white': .5, 'African American': 0.01, 'Hispanic': 0.01, 'Asian': 0.1, 'Other': 0.2 } HillRace = { 'white': .2, 'African American': 0.5, 'Hispanic': 0.5, 'Asian': 0.1, 'Other': 0.2 } c = Candidate.Candidate("Trump", .7, trumpRace) d = Candidate.Candidate("Hilary", .4, HillRace) ##vp=VotingProfile.VotingProfile([c,d], [.3,.7], [.1, .2], [.4,.6],[.1, .1], .2) ##print vp.__reNorm__([1,2,3]) reg = Region.Region("Region 1", 30, [.3, .3, .2, .1, .1], 45.0, 8.0, .5, .05, [c, d], [.3, .7], [.1, .1], [.4, .6], [.1, .1], .2, [.8, .2]) voter = Voter(reg, "white", 45, .8, .5, .6, [.3, .7], .8, False) print voter print(str(voter.vote())) print(str(voter.likelyVoter()))
def testCplexParliminaries(): genes_list = [[ "AAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAAAAAAAAAAA", "AAAAAAAAAAAAAAAAAAAA" ]] #one gene for now candidates_list = [ Candidate("AAAAAAAAAAAAAAAAAAAA"), Candidate("AAAAAAAAAAAAAAAAAAAA") ] Crispys_CplexModel(candidates_list, genes_list, cfd_funct, 1)
def parse_file(self): for idx, line in enumerate(self.file): if (idx == 0): num_candidates = int(line) self.election.set_num_candidates(num_candidates) elif (idx > 0 and idx <= num_candidates): candidate_data = line.split(',') candidate_number = int(candidate_data[0]) candidate_name = candidate_data[1].strip() candidate = Candidate.Candidate(candidate_number, candidate_name) self.election.add_candidate(candidate) elif (idx == num_candidates + 1): meta_election_info = line.split(',') number_of_voters = int(meta_election_info[0]) sum_of_vote_count = int(meta_election_info[1]) number_of_unique_orders = int(meta_election_info[2]) self.election.init_meta_info(number_of_voters, sum_of_vote_count, number_of_unique_orders) else: vote_info = line.split(',', 1) count = int(vote_info[0]) # clean the preferences raw_pref_string = vote_info[1].strip() if ('{' in raw_pref_string and '}' in raw_pref_string): raw_pref_string = self.remove_indifferent_votes(raw_pref_string) preference_list = raw_pref_string.split(',') # convert preference string into ints preference_list = list(map(int, preference_list)) vote = Vote.Vote(count, preference_list) self.election.add_vote(vote)
def pitch_estimate(audio_file, config): file_name = audio_file.name audio_path = audio_file.path ops_output_path = config.ops_output_path ops_config_path = config.ops_config_path feature_path = ops_output_path + file_name + '.csv' # openSmile pitch estimation if not os.path.isfile(feature_path): Smile.extract_ops_features(file_name, audio_path, ops_config_path, ops_output_path) # read csv file created by openSmile with open(feature_path, 'rb') as csvfile: reader = csv.reader(csvfile, delimiter=';', quotechar='|') # interpret pitch pitch_values = [] first = True # for each pitch-value in time-window for row in reader: # ignore first row in reader if not first: pitch_value = int(round(float(row[2]))) pitch_values.append(pitch_value) first = False pitch = Candidate.find_pitch_candidate(pitch_values, config.min_freq) return pitch
def create_a_new_candidate_and_fill_fields(current_seq, genes_sg_dict, df, Omega): genes_score_dict = {} targets_dict = {} number_of_node_genes = len(genes_sg_dict) for gene_name, genes_targets_list in genes_sg_dict.items(): prob_gene_will_not_cut = 1 ##eazier to calculate list_of_targets = [ ] ##for later knowing where the perm might cut in each gene for target in genes_targets_list: ##sg_lst_of_gene: list of the sg of the gene distance_candidate_target = df(current_seq, target) candidate_target_cut_prob = 1 - distance_candidate_target ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far if not Stage2.stopping_condition(candidate_target_cut_prob, Omega): list_of_targets.append(target) prob_gene_will_not_cut = prob_gene_will_not_cut * ( 1 - candidate_target_cut_prob ) #lowering the not cut prob in each sgRNA #for each candidate, find the probability to cut in the gene with the lowest probability to be cut prob_gene_cut = 1 - prob_gene_will_not_cut if prob_gene_cut > Omega: genes_score_dict[gene_name] = prob_gene_cut fraction_of_cut = len(genes_score_dict) / number_of_node_genes if not len(list_of_targets) == 0: match_sites_dict_value = [] for target_site in list_of_targets: match_sites_dict_value.append([ target_site, two_sequs_differeces(current_seq, target_site) ]) targets_dict[gene_name] = match_sites_dict_value res = Candidate.Candidate(current_seq, fraction_of_cut, prob_gene_cut, genes_score_dict, targets_dict) return res
def find_best_sg_for_single_gene_naiveMC_returns_single(gene_name, sg_lst): ''' the older version, sutable for when naive didn't make set cover :param current_genes_sg_dict: a dictionary with only on key :return: current_best_perm, lowest_widest_prob. current_best_perm is of the form: (max_seq, fraction genes being cut among all the genes, probability to cut all the genes in genes list, genes_list, match_sites_list]), lowest_widest_prob ''' # return Candidate.Candidate(sg_lst[0], 1, {gene_name: 1}, {gene_name: []})
def find_set_cover(best_permutations_DS, sg_genes_dict, thr, genes_sg_dict=None): '''for now, might won't work in a case when there is a gene that isn't covered by any of the permutations in the best_permutations_DS. not finished. can make it more readble''' temp_best_perm_DS = copy.copy(best_permutations_DS) res = list() #[temp_best_perm_DS[0]] if genes_sg_dict: for gene, targets in genes_sg_dict.items(): if len(targets) == 0: print("no targets for gene " + gene) genes_name_lst.remove(gene) continue c = Candidate.Candidate(targets[0]) c.fill_default_fildes(sg_genes_dict[targets[0]]) temp_best_perm_DS.append(c) uncovered_genes = set() for sg, genesLst in sg_genes_dict.items(): for gene in genesLst: uncovered_genes.add(gene) while (len(uncovered_genes)) > 0 and len(temp_best_perm_DS) > 0: #print(uncovered_genes) #for gene in uncovered_genes: #print(gene) ##going over all the permutations, and return the permutation that cover the maximal amount of genes haven't been covered yet, in the highest probability among the maximal covered permutations #print('len uncovered genes', len(uncovered_genes)) best_current_perm, best_num_of_coverd, best_prob_of_covered = None, 0, 0 #best_current_perm is the hole tuple i = 0 while i < (len(temp_best_perm_DS)): new_genes_coverd = list() #0 for gene, score in temp_best_perm_DS[i].genes_score_dict.items(): if gene in uncovered_genes and score >= thr: new_genes_coverd.append(gene) #uncovered_genes.remove(gene) if len(new_genes_coverd) == 0: i += 1 continue #del temp_best_perm_DS[i] elif len( new_genes_coverd ) >= best_num_of_coverd: ## and temp_best_perm_DS[i][2] > best_prob_of_covered: ##need to check if 2 is the right index, and not 1. #print(new_genes_coverd) if len( new_genes_coverd ) > best_num_of_coverd or prob_cover > best_prob_of_covered: # cover more gene or cover the same amount with greater prob. prob_cover = prob_cover_genes_lst(temp_best_perm_DS[i], new_genes_coverd) #if prob_cover > best_prob_of_covered: best_num_of_coverd, best_prob_of_covered = len( new_genes_coverd), prob_cover best_current_perm = temp_best_perm_DS[i] i += 1 if (best_current_perm): res.append(best_current_perm) for gene, score in best_current_perm.genes_score_dict.items(): if gene in uncovered_genes and score >= thr: #there is a probability that this gene had already been covered bya prevuis sgRNA uncovered_genes.remove(gene) return res
def find_best_sg_for_single_gene(gene_name, sg_lst): ''' :param current_genes_sg_dict: a dictionary with only on key :return: current_best_perm, lowest_widest_prob. current_best_perm is of the form: (max_seq, fraction genes being cut among all the genes, probability to cut all the genes in genes list, genes_list, match_sites_list]), lowest_widest_prob ''' return [ Candidate.Candidate(sg_lst[0], 1, {gene_name: 1}, {gene_name: [[sg_lst[0], {}]]}) ]
def initializeCandidates(): candict = {'Rick Santorum': ['Santorum', '#teamsantorum', '#santorum'], 'Ron Paul': ['Ron Paul', '#ronpaul2012'], 'Newt Gingrich': ['Gingrich', '#withnewt'], 'Mitt Romney': ['Romney', '#mitt2012', '#mittromney'] } candidates = list() for name in candict.keys(): candidates.append(Candidate.makeCandidate(name, candict[name])) return candidates
def main(): trumpRace = { 'white': .5, 'African American': 0.01, 'Hispanic': 0.01, 'Asian': 0.1, 'Other': 0.2 } HillRace = { 'white': .2, 'African American': 0.5, 'Hispanic': 0.5, 'Asian': 0.1, 'Other': 0.2 } c = Candidate.Candidate("Trump", .7, trumpRace) d = Candidate.Candidate("Hilary", .4, HillRace) vp = VotingProfile([c, d], [.3, .7], [.1, .2], .4, .1, .2, [.2, .8]) print vp.__reNorm__([1, 2, 3])
def call_CplexCovers(list_of_candidates, genes_lst, thr, method, genes_sg_dict=None, sg_genes_dict=None): list_of_candidates, genes_lst = pickle.load(open(list_of_candidates, 'rb')), pickle.load( open(genes_lst, 'rb')) genes_sg_dict, sg_genes_dict = pickle.load(open( genes_sg_dict, 'rb')), pickle.load(open(sg_genes_dict, 'rb')) #if len(list_of_candidates) < 100: #for debuging # print("less than 20 sgRNAs") # return if len(list_of_candidates) > 999 - len(genes_lst): list_of_candidates = list_of_candidates[:999 - len(genes_lst)] #add a representor to each of the genes, to avoid the case of no answer from CPLEX if genes_sg_dict: for gene, targets in genes_sg_dict.items(): if len(targets) == 0: print("no targets for gene " + gene) genes_name_lst.remove(gene) continue c = Candidate.Candidate(targets[0]) c.fill_default_fildes(sg_genes_dict[targets[0]]) list_of_candidates.append(c) #print(list_of_candidates) try: if method == "SC": cplex_problem_object = CplexCovers.CplexSetCover( list_of_candidates, genes_lst, thr) elif method == "F_SC": cplex_problem_object = CplexCovers.Cplex_fuzzy_set_cover( list_of_candidates, genes_lst, thr) elif 'BC' in method: method = method.split('_') cover_size = int(method[1]) best_score, group = bounded_cover(list_of_candidates, genes_lst, cover_size, thr) return (best_score, group) else: print("which method?") print(method) except CplexError as exc: print("no solution") return return cover_from_cplex_promblem_obj(list_of_candidates, cplex_problem_object)
def generateNewPopulation(self): offsprings = [] N = (N_percent / 100) * Popln_Size num_offsprings = 0 # Repeat until we have N offsprings while num_offsprings < N: # Select parents via specified selection mech. if Selection == 'RW': parent1 = self.rouletteWheelSelection() parent2 = self.rouletteWheelSelection() else: # 'TS' parent1, parent2 = self.tournamentSelection() # Create offspring via crossover offspring = Candidate(parent1.crossover(parent2), num_games=Num_Games, max_moves=Max_Moves) # Mutate if required if (random.random() < (Mutation_percent / 100)): print("Mutation!") offspring.mutate() # print("parent1", parent1) # print("parent2", parent2) # print("offspring", offspring) offsprings.append(offspring) num_offsprings += 1 # Replace N weakest candidates from the population with offsprings self.deleteNLastReplacement(offsprings) # Sort new population by fitness self.population = self.sortCandidatesByFitness(self.population)
def fill_leaves_sets(tree, sg_genes_dict): '''this version is not competable to genes tree. can be combine with fill_distance_from_leaves_function''' ##fill the first line of nodes for leaf in tree.leaves_DS: ##node_targets_DS is a python array leaf.add_node_target(leaf.name) current_candidate = Candidate.Candidate(leaf.name) current_candidate.fill_default_fildes(sg_genes_dict[leaf.name]) leaf.set_candidates_DS( ) #sg_genes_dict[leaf.name] is a list of genes which this target site is on leaf.candidates_DS[leaf.name] = current_candidate node = leaf while (node.parent): for leaf in node.node_targets_DS: if leaf not in node.parent.node_targets_DS: node.parent.add_node_target(leaf) node = node.parent
def ExtractFeatureWords(filename): result = Reader.readfile(filename) result_split = Segmentation.SplitCluster(result) delete_result = Candidate.DeleteRepetition(result_split) candidate = Candidate.BuildClass(delete_result) Candidate.CalLenScore(candidate, 2, 15) Candidate.CalSupScore(candidate, result_split) Candidate.CalPosScore(candidate, result_split) for i in range(len(candidate)): for j in range(len(candidate[i])): candidate[i][j].CalScore() candidate_list = Candidate.GenCandidateList(candidate) sorted_candidate_list = Candidate.CandidateListSort(candidate_list) # extracted_word = Candidate.CutByRank(sorted_candidate_list,0.5) # PrintExtractedWord(extracted_word) extracted_word = Candidate.CutByScore(sorted_candidate_list, 2.9) # PrintExtractedWord(extracted_word) # extracted_word = Candidate.CutByRankAndScore(sorted_candidate_list,0.05,2.9) # PrintExtractedWord(extracted_word) extracted_result = Candidate.ExtractedWordDeleteRepetition(extracted_word) # PrintExtractedResult(extracted_result) # print("") # print("") # print("") # print("") # # extracted_word = Candidate.CutByScore(sorted_candidate_list,2.0) # print("the result of cut by score: ") # for i in range(len(extracted_word)): # print("") # for j in range(len(extracted_word[i])): # print(extracted_word[i][j]) return extracted_result
def call_it_all(sgList, sgNames, input_sg_genes_dict, Omega, df_targets, cfd_dict=None, PS_number=12): best_permutations_DS = [] if len(sgList) == 1: print("only one sgRNA in the group") genes = input_sg_genes_dict[sgList[0]] c = Candidate.Candidate(sgList[0]) c.fill_default_fildes(genes) best_permutations_DS.append(c) else: upgmaTree = return_upgma(sgList, sgNames, df_targets, cfd_dict) fill_leaves_sets(upgmaTree, input_sg_genes_dict) fill_PS(upgmaTree.root) top_down(best_permutations_DS, upgmaTree.root, Omega, input_sg_genes_dict, df_targets, cfd_dict, PS_number) return best_permutations_DS
def fill_leaves_sets_Genes_tree_as_well(tree, sg_genes_dict, genes_tree=False): '''can be combine with fill_distance_from_leaves_function''' ##fill the first line of nodes for leaf in tree.leaves_DS: ##node_targets_DS is a python array leaf.add_node_target(leaf.name) if not (genes_tree): leaf.set_candidates_DS( ) #sg_genes_dict[leaf.name] is a list of genes which this target site is on current_candidate = Candidate.Candidate(leaf.name) current_candidate.fill_default_fildes(sg_genes_dict[leaf.name]) leaf.candidates_DS[leaf.name] = current_candidate else: #'node_targets_DS' will be used to hold the genes; it is set to an empty list when the node is cunstracted. Maybe if this algorithm will be really bottems up, it will changed. leaf.add_node_target[leaf.name] node = leaf while (node.parent): for leaf in node.node_targets_DS: if leaf not in node.parent.node_targets_DS: node.parent.add_node_target(leaf) node = node.parent
def create_a_new_candidate_and_fill_fields(current_seq, genes_sg_dict, df, Omega): #next stage of work: add lowest_cut_site_prob genes_score_dict = {} targets_dict = {} number_of_node_genes = len(genes_sg_dict) for gene_name, genes_targets_list in genes_sg_dict.items(): prob_gene_will_not_cut = 1 ##eazier to calculate list_of_targets = [ ] ##for later knowing where the perm might cut in each gene for target in genes_targets_list: ##sg_lst_of_gene: list of the sg of the gene distance_candidate_target = df(current_seq, target) ##the old one: sg_cut_prob = 1/(distance_perm_sg+1) ##assuming distance of 1 is 100% cut. a lot of heuristics in this line: to be changed candidate_target_cut_prob = 1 - distance_candidate_target ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far #real sg_prob : left for later # to change this line so it will be compatible with the Bottem up stopping condition #if candidate_target_cut_prob > Omega /5: #want to add only the sagnificants sg to this list. might need to change the thr. should be a thr that is not higher than the on in the stopping condition if not BU.stopping_condition(candidate_target_cut_prob, Omega): list_of_targets.append(target) prob_gene_will_not_cut = prob_gene_will_not_cut * ( 1 - candidate_target_cut_prob ) #lowering the not cut prob in each sgRNA #for each permutation, find the probability to cut in the gene with the lowest probability to be cut prob_gene_cut = 1 - prob_gene_will_not_cut if prob_gene_cut > Omega: genes_score_dict[gene_name] = prob_gene_cut fraction_of_cut = len(genes_score_dict) / number_of_node_genes #make the match_site_dict if not len(list_of_targets) == 0: match_sites_dict_value = [] for target_site in list_of_targets: match_sites_dict_value.append([ target_site, two_sequs_differeces(current_seq, target_site) ]) targets_dict[gene_name] = match_sites_dict_value res = Candidate.Candidate(current_seq, fraction_of_cut, prob_gene_cut, genes_score_dict, targets_dict) return res
dest='option', help='Enter a type of values to edit your CV (n/e/exp)') parser.add_argument('-sl', dest='sLink', help='Enter the link for your social account') result = parser.parse_args() if (result.username and result.password and (result.register) == 'c'): Users.RegistrationC(result.username, result.password) elif (result.username and result.password and (result.register) == 'hr'): Users.RegistrationHR(result.username, result.password) elif (result.username and result.password and result.type): Users.Login(result.username, result.password, result.type) elif (Users.CheckIfLoginHR(result.username)): if (result.search): HR.Serach(result.search) elif (result.id and result.status): HR.UpdateStatus(result.id, result.status) elif (result.id and result.note): HR.AddNotes(result.id, result.note) elif (Users.CheckIfLoginC(result.username)): if (result.name and result.education and result.pic and result.id and result.exp): Candidate.CreateCV(result.name, result.education, result.pic, result.id, result.exp, result.sLink) elif (result.id and result.value and result.option): Candidate.EditCV(result.id, result.value, result.option) elif (result.id): Candidate.CheckMyStatus(result.id)
def main(): trumpRace = { 'White': .3, 'African American': 0.01, 'Hispanic': 0.01, 'Asian': 0.1, 'Other': 0.2 } HillRace = { 'White': .2, 'African American': 0.35, 'Hispanic': 0.35, 'Asian': 0.1, 'Other': 0.2 } CruzRace = { 'White': .3, 'African American': 0.1, 'Hispanic': 0.3, 'Asian': 0.1, 'Other': 0.2 } KasichRace = { 'White': .3, 'African American': 0.1, 'Hispanic': 0.1, 'Asian': 0.1, 'Other': 0.2 } BernieRace = { 'White': .3, 'African American': 0.2, 'Hispanic': 0.3, 'Asian': 0.3, 'Other': 0.3 } c = Candidate.Candidate("Trump", .7, trumpRace) d = Candidate.Candidate("Hillary", .4, HillRace) e = Candidate.Candidate("Cruz", .8, CruzRace) f = Candidate.Candidate("Kasich", .55, KasichRace) g = Candidate.Candidate("Sanders", .2, BernieRace) candidates = [c, d, e, f, g] for candidate in candidates: pickle.dump(candidate, open(candidate.name + ".cand", "wb")) r1 = Region.Region( "Region_1", [.4, .2, .1, .1, .05], ## race breakdown 50, ## avg Age 10, ## variation in age .6, ## prob voting .1, ## variation in voting candidates, ## candidates [1.8, 1.8, .4, .1, .3], ## candidate pref [.4, .4, .2, .01, .07], ## variation in candidate pref .6, # spectrum .07, # variation in spectrum .1, # immigrant prob [.05, .4, .4, .2, .2]) ## immigrant pref) r2 = Region.Region( "Region_2", [.3, .3, .2, .1, .1], ## race breakdown 40, ## avg Age 7, ## variation in age .5, ## prob voting .1, ## variation in voting candidates, ## candidates [.3, .4, .4, .2, .6], ## candidate pref [.1, .1, .1, .1, .2], ## variation in candidate pref .4, # spectrum .15, # variation in spectrum .2, # immigrant prob [.05, .4, .4, .2, .2]) ## immigrant pref) r3 = Region.Region( "Region_3", [.4, .1, .3, .4, .1], ## race breakdown 45, ## avg Age 3, ## variation in age .54, ## prob voting .1, ## variation in voting candidates, ## candidates [.2, .4, .5, .2, .4], ## candidate pref [.05, .1, .1, .05, .1], ## variation in candidate pref .5, # spectrum .2, # variation in spectrum .3, # immigrant prob [.05, .4, .4, .2, .2]) ## immigrant pref) r4 = Region.Region( "Region_4", [.1, .6, .1, .2, .3], ## race breakdown 4, ## avg Age 3, ## variation in age .64, ## prob voting .1, ## variation in voting candidates, ## candidates [.1, .4, .3, .2, .4], ## candidate pref [.03, .1, .1, .05, .1], ## variation in candidate pref .5, # spectrum .2, # variation in spectrum .1, # immigrant prob [.05, .4, .4, .2, .2]) ## immigrant pref) regions = [r1, r2, r3, r4] for region in regions: pickle.dump(region, open(region.name + ".rgn", "wb"))
def return_candidates(list_of_targets, initial_seq, genes_sg_dict, Omega, df, node, for_single_gene=False, cfd_dict=None, PS_number=12): dict_of_different_places = wheres_the_differences_linear( list_of_targets, df == Metric.CRISTA ) ##node_targets_DS is a python array. where_the_differences. node.polymorphic_sites = dict_of_different_places #list_of_different_places = list(node.polymorphic_sites) if len(dict_of_different_places) > 12: return None list_of_different_places = list(dict_of_different_places.items()) list_of_different_places.sort(key=lambda item: item[0]) ##going over all the permutations list_of_perms_sequs = all_perms(initial_seq, None, list_of_different_places) perm_grades = [ ] #a list of tuples: (candidate_str,fraction_of_cut, cut_expectation, genes_list) #find for all permutation which genes it cut widest_perm_prob = 0 ## for the stopping condition. the permutation with the highst propobility to cut all the sgRNA, without considering Omega. lowest_of_widest_perm = 0 ## the probability the widest candidate_str will cut in the lowest cut probability sgRNA for this widest candidate_str. for candidate_str in list_of_perms_sequs: targets_dict = { } # a list of tuples: (gene name, list of target of this gene that might be cut by the candidate_str) wide_perm_prob = 1 lowest_of_wide_perm = 1 genes_covering = [ ] #a list of tuples: (gene name, probability to be cut). for gene, targets_lst_of_gene in genes_sg_dict.items( ): ##find out if this gene i couched by the sgRNA seq prob_gene_will_not_cut = 1 ##eazier to calculate list_of_targets = [ ] ##for later knowing where the candidate_str might cut in each gene num_of_cuts_per_gene = 0 #in use only in the single gene version for target in targets_lst_of_gene: ##targets_lst_of_gene: list of the target of the gene if df == Metric.CRISTA: #distance_candidate_target = df(candidate_str[3:-6], target) distance_candidate_target = df(candidate_str[3:-6], target) else: distance_candidate_target = df(candidate_str, target, cfd_dict) ##the old one: candidate_cut_prob = 1/(distance_candidate_target+1) ##assuming distance of 1 is 100% cut. a lot of heuristics in this line: to be changed if distance_candidate_target == 1: #this line was changed resenetly.. it used to be "if distance_candidate_target == 0:" continue candidate_cut_prob = 1.0 - distance_candidate_target ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far #real sg_prob : left for later #change this line #if candidate_cut_prob > Omega /5: #want to add only the sagnificants target to this list. might need to change the thr. should be a thr that is not higher than the on in the stopping condition #if not BU.stopping_condition(candidate_cut_prob, Omega): sg_site_differents = two_sequs_differeces( candidate_str, target) #perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents] list_of_targets.append([target, sg_site_differents]) prob_gene_will_not_cut = prob_gene_will_not_cut * ( 1 - candidate_cut_prob ) #lowering the not cut prob in each sgRNA num_of_cuts_per_gene += candidate_cut_prob #for each permutation, find the probability to cut in the gene with the lowest probability to be cut #if lowest_of_wide_perm > candidate_cut_prob: # lowest_of_wide_perm = candidate_cut_prob prob_gene_cut = 1 - prob_gene_will_not_cut if len(list_of_targets) > 0: targets_dict[ gene] = list_of_targets #targets of this gene to be cleaved by the current candidate if (for_single_gene): genes_covering.append((gene, num_of_cuts_per_gene)) else: genes_covering.append((gene, prob_gene_cut)) #wide_perm_prob *= prob_gene_cut ##for the lowest of widest #num_of_cut = 0 #wont_cut_prob = 1 #the probability the permutationed sequence will not cut all of the genes, that the probability each of them will be cut is greater then Omega cut_expection = 0.0 ##the probability the permutationed sequence will cut all of the genes, that the probability each of them will be cut is greater then Omega #genes_list = [] # a list of genes considered cut by this sequence genes_score_dict = { } # a dict of genes: genes considered cut by this sequence, and cut prob for tuple in genes_covering: #tuple : (gene name, probability to be cut) #if tuple[1] >= Omega: #num_of_cut += 1 cut_expection += tuple[1] ## the prob to cut all the genes genes_score_dict[tuple[0]] = tuple[1] #cut_expectation = 1 - wont_cut_prob #fraction_of_cut = num_of_cut/len(genes_sg_dict) #len(genes_sg_dict) == num of genes ##updating the targets dict## #for key in perm_grades[k].targets_dict.keys(): #for each gene in the the targets_dict - a list of lists. each sub list: [gene, list_of_targets] # for j in range(len(perm_grades[k].targets_dict[key])): #for any match site of this gene from this specipic target. usually there is only one gene # sg_site_differents = two_sequs_differeces(perm_grades[k].seq, perm_grades[k].targets_dict[key][j]) # perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents] if cut_expection >= 1: #is this condition necessary? current_candidate = Candidate.Candidate(candidate_str, cut_expection, genes_score_dict, targets_dict) perm_grades.append(current_candidate) del list_of_perms_sequs #best_perms_DS = find_max(perm_grades, genes_sg_dict) #after the set cover ##for finding where are the differences between the target and the DNA: #for k in range(len(perm_grades)): # a "best candidate_str" looks like this: (max_seq, max_fraction, max_cut_prob, genes_list, targets_dict]) # for key in perm_grades[k].targets_dict.keys(): #for each gene in the the targets_dict - a list of lists. each sub list: [gene, list_of_targets] # for j in range(len(perm_grades[k].targets_dict[key])): #for any match site of this gene from this specipic target. usually there is only one gene # sg_site_differents = two_sequs_differeces(perm_grades[k].seq, perm_grades[k].targets_dict[key][j]) # perm_grades[k].targets_dict[key][j] = [perm_grades[k].targets_dict[key][j], sg_site_differents] #return perm_grades, lowest_of_widest_perm, set() #print(perm_grades) return perm_grades
def process(self, directory, output, feature_type, candidate_type, verbose, meta, arff): """ Processes pulsar candidates of the type specified by 'candidate_type'. Writes the features of each candidate found to a single file, 'output'. Parameters: directory - the directory containing the candidates to process. output - the file to write the features to. feature_type - the type of features to generate. feature_type = 1 generates 12 features from Eatough et al., MNRAS, 407, 4, 2010. feature_type = 2 generates 22 features from Bates et al., MNRAS, 427, 2, 2012. feature_type = 3 generates 22 features from Thornton, PhD Thesis, Univ. Manchester, 2013. feature_type = 4 generates 6 features from Lee et al., MNRAS, 333, 1, 2013. feature_type = 5 generates 6 features from Morello et al., MNRAS, 433, 2, 2014. feature_type = 6 generates 8 features from Lyon et al.,2015. feature_type = 7 obtains raw integrated (folded) profile data. feature_type = 8 obtains raw DM-SNR Curve data. candidate_type - the type of candidate file being processed. candidate_type = 1 assumes PHCX candidates output by the pipeline described by Morello et al., MNRAS 443, 2, 2014. candidate_type = 2 assumes gnuzipped ('.gz') PHCX candidates produced by the pipeline described by Thornton., PhD Thesis, Univ. Manchester, 2013. candidate_type = 3 assumes PFD files output by the LOTAAS and similar surveys in the presto PFD format. candidate_type = 4 assumes PHCX candidates output by the SKA SA pipeline. verbose - debug logging flag, if true output statements will be verbose. meta - a flag that when set to true, indicates that meta information will be retained in the output files produced by this code. So if meta is set to true, then each line of features will have the full path to the candidate they belong to included. Otherwise they will not, making it hard to find which features belong to which candidate. arff - a flag that when set to true, indicates that meta output data will be written in ARFF format. Return: N/A """ # Used to monitor feature creation statistics. candidatesProcessed = 0 successes = 0 failures = 0 print "\n\t*************************" print "\t| Searching Recursively |" print "\t*************************" # Check the type of candidate file used. if (candidate_type == 1): print "\tSearching for candidates with file extension: ", self.phcxRegex fileTypeRegexes = [self.phcxRegex] elif (candidate_type == 2): print "\tSearching for candidates with file extension: ", self.gzPhcxRegex fileTypeRegexes = [self.gzPhcxRegex] elif (candidate_type == 3): print "\tSearching for candidates with file extension: ", self.pfdRegex fileTypeRegexes = [self.pfdRegex] elif (candidate_type == 4): print "\tSearching for candidates with file extension: ", self.phcxRegex fileTypeRegexes = [self.phcxRegex] else: print "\tNo candidate file type provided, exiting..." sys.exit() print "\tSearching: ", directory start = datetime.datetime.now( ) # Used to measure feature generation time. # For each type of file this program recognises for filetype in fileTypeRegexes: # Loop through the specified directory for root, subFolders, filenames in os.walk(directory): # If the file type matches one of those this program recognises for filename in fnmatch.filter(filenames, filetype): cand = os.path.join( root, filename) # Gets full path to the candidate. # If the file does not have the expected suffix (file extension), skip to the next. if (cand.endswith(filetype.replace("*", "")) == False): continue candidatesProcessed += 1 if (candidatesProcessed % 10000 == 0): # Every 10,000 candidates # This 'if' statement is used to provide useful feedback on feature # generation. But it is also used to write the features collected so far, # to the output file at set intervals. This helps a) reduce memory load, and # b) reduce disc load (by writing out lots of features in one go, as opposed # to one by one). print "\tCandidates processed: ", candidatesProcessed # Write out the features collected so far. outputText = "" for s in self.featureStore: outputText += s + "\n" self.appendToFile( output, outputText ) # Write all 10,000 entries to the output file. self.featureStore = [ ] # Clear the feature store, freeing up memory. try: # Create the candidate object. c = Candidate.Candidate(cand, str(directory + cand)) # Get the features from the candidate. features = c.getFeatures(feature_type, candidate_type, verbose) # If the user would like the output to be in ARFF format, then each candidate # has to be associated with a label. Since this code cannot know the true label # of a candidate, here the unknown label '?' is appended as a additional feature. if (arff and feature_type > 0 and feature_type < 7): features.append("?") # Store the features so it can later be written to the specified output file. if (meta): # Store with meta information - basically this means including the candidate # name (full path) with each feature set. This means that # each set of features will be linked to a candidate, # useful for certain investigations (i.e. why a specific # candidate achieved particular feature values). self.storeFeatureMeta(cand, features) else: self.storeFeatureNoMeta( cand, features) # Store only the feature data. except Exception as e: # Catch *all* exceptions. print "\tError reading candidate data :\n\t", sys.exc_info( )[0] print self.format_exception(e) print "\t", cand, " did not have features generated." failures += 1 continue successes += 1 # Save any remaining features, since its possible that some features # were not written to the output file in the loop above. if (len(self.featureStore) > 0): outputText = "" for s in self.featureStore: outputText += s + "\n" self.appendToFile(output, outputText) self.featureStore = [] # Finally get the time that the procedure finished. end = datetime.datetime.now() # Output feature generation statistics. print "\tCompleted candidate search." print "\n\t******************************" print "\t| Feature Generation Results |" print "\t******************************" print "\tCandidates processed:\t", candidatesProcessed print "\tSuccesses:\t", successes print "\tFailures:\t", failures print "\tExecution time: ", str(end - start)
def find_w_set_cover_heated(best_permutations_DS, sg_genes_dict, thr, distance_matrix, alfa, genes_sg_dict=None): '''for now, might won't work in a case when there is a gene that isn't covered by any of the permutations in the best_permutations_DS. not finished. can make it more readble''' temp_best_perm_DS = copy.copy(best_permutations_DS) res = list() #[temp_best_perm_DS[0]] gene_names_lst = distance_matrix.names if genes_sg_dict: for gene, targets in genes_sg_dict.items(): if len(targets) == 0: print("no targets for gene " + gene) genes_name_lst.remove(gene) continue c = Candidate.Candidate(targets[0]) c.fill_default_fildes(sg_genes_dict[targets[0]]) temp_best_perm_DS.append(c) uncovered_genes = set() for sg, genesLst in sg_genes_dict.items(): for gene in genesLst: uncovered_genes.add(gene) while (len(uncovered_genes)) > 0 and len(temp_best_perm_DS) > 0: best_current_perm, best_num_of_coverd, best_prob_of_covered = None, 0, 0 #best_current_perm is the hole tuple best_w = len(uncovered_genes) i = 0 while i < (len(temp_best_perm_DS)): #find utility of sgRNA new_genes_coverd = list() for gene, score in temp_best_perm_DS[i].genes_score_dict.items(): if gene in uncovered_genes and score >= thr: new_genes_coverd.append( gene) #new_genes_coverd.append((gene, score)) # if len(new_genes_coverd) == 0: i += 1 continue #compute the weight price = 0 # the lower the price, the lighter the set deniminator = 0 # deniminator = len(new_genes_coveres)*(len(new_genes_coveres) - 1) for j in range(len(new_genes_coverd)): score_j = temp_best_perm_DS[i].genes_score_dict[ new_genes_coverd[j]] for k in range(i, len(new_genes_coverd)): score_k = temp_best_perm_DS[i].genes_score_dict[ new_genes_coverd[k]] curr_avg = (1 - score_j + 1 - score_k) / 2 deniminator += curr_avg index_j, index_k = gene_names_lst.index( new_genes_coverd[j]), gene_names_lst.index( new_genes_coverd[k]) dist = distance_matrix[index_j, index_k] price += (dist**alfa) * curr_avg if deniminator == 0: price = 1 # sure? else: price = price / deniminator w = price / len(new_genes_coverd) #del temp_best_perm_DS[i] #keep the best sgRNA in current iteration if w <= best_w: ## and temp_best_perm_DS[i][2] > best_prob_of_covered: ##need to check if 2 is the right index, and not 1. #print(new_genes_coverd) #if len(new_genes_coverd) > best_num_of_coverd or prob_cover > best_prob_of_covered: # cover more gene or cover the same amount with greater prob. prob_cover = prob_cover_genes_lst(temp_best_perm_DS[i], new_genes_coverd) if prob_cover > best_prob_of_covered: best_w, best_prob_of_covered = w, prob_cover best_current_perm = temp_best_perm_DS[i] i += 1 #add the best sgRNA to res if (best_current_perm): res.append(best_current_perm) for gene, score in best_current_perm.genes_score_dict.items(): if gene in uncovered_genes and score >= thr: #there is a probability that this gene had already been covered bya prevuis sgRNA uncovered_genes.remove(gene) return res
def addCandidate(self, filename): ''' filename - name of the Candidate file. Candidate file has the following minimum content: Period P fP Where P is the candidate period, and fP is the fractional error. Optional lines are: Phase ph d_ph Width log10_w dlog10_w Acceleration a d_a CircBinary log10_bp dlog10_bp log10_ba log10d_ba Scattering log10_s dlog10_s DM dm d_dm In each case the parameter and desired perior is given, so that parameter is searched over x +/- dx ''' self.Cand = Candidate.Candidate(filename) if(self.Cand.FitCircBinary == True): self.CosOrbit = gpuarray.empty(self.InterpBinarySteps+1, np.float64) self.SinOrbit = gpuarray.empty(self.InterpBinarySteps+1, np.float64) self.CPUCosOrbit, self.CPUSinOrbit = self.KeplersOrbit(0) self.CosOrbit = gpuarray.to_gpu(np.float64(self.CPUCosOrbit)) self.SinOrbit = gpuarray.to_gpu(np.float64(self.CPUSinOrbit)) if(self.Cand.FitEccBinary == True): print self.Cand.pmin[7], self.Cand.pmax[7] self.MinInterpEcc = self.Cand.pmin[7] self.MaxInterpEcc = self.Cand.pmax[7] self.InterpEccStepSize = 1 self.NumInterpEccSteps = 1 if(self.MaxInterpEcc - self.MinInterpEcc > 10.0**-10): self.NumInterpEccSteps = 100 self.InterpEccStepSize = (self.MaxInterpEcc - self.MinInterpEcc)/self.NumInterpEccSteps print "Interp details:", self.MinInterpEcc, self.MaxInterpEcc, 10.0**self.MinInterpEcc, 10.0**self.MaxInterpEcc, self.NumInterpEccSteps, self.InterpEccStepSize self.CosOrbit = [] self.SinOrbit = [] self.CPUCosOrbit = [] self.CPUSinOrbit = [] for i in range(self.NumInterpEccSteps): Ecc = 10.0**(self.MinInterpEcc + i*self.InterpEccStepSize) print "Computing Ecc: ", i, self.MinInterpEcc + i*self.InterpEccStepSize, Ecc COrbit, SOrbit = self.KeplersOrbit(Ecc) self.CPUCosOrbit.append(COrbit) self.CPUSinOrbit.append(SOrbit) self.CosOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64)) self.SinOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64)) self.CosOrbit[i] = gpuarray.to_gpu(np.float64(self.CPUCosOrbit[i])) self.SinOrbit[i] = gpuarray.to_gpu(np.float64(self.CPUSinOrbit[i])) if(self.Cand.FitGRBinary == True or self.Cand.FitPKBinary == True): print self.Cand.pmin[7], self.Cand.pmax[7] self.MinInterpEcc = self.Cand.pmin[7] self.MaxInterpEcc = self.Cand.pmax[7] self.NumInterpEccSteps = 1 self.InterpEccStepSize = 1 if(self.MaxInterpEcc - self.MinInterpEcc > 10.0**-10): self.NumInterpEccSteps = 100 self.InterpEccStepSize = (self.MaxInterpEcc - self.MinInterpEcc)/self.NumInterpEccSteps print "Interp details:", self.MinInterpEcc, self.MaxInterpEcc, 10.0**self.MinInterpEcc, 10.0**self.MaxInterpEcc, self.NumInterpEccSteps, self.InterpEccStepSize self.CosOrbit = [] self.SinOrbit = [] self.TrueAnomaly = [] self.CPUCosOrbit = [] self.CPUSinOrbit = [] self.CPUTrueAnomaly = [] for i in range(self.NumInterpEccSteps): Ecc = 10.0**(self.MinInterpEcc + i*self.InterpEccStepSize) print "Computing Ecc: ", i, self.MinInterpEcc + i*self.InterpEccStepSize, Ecc COrbit, SOrbit = self.KeplersOrbit(Ecc) self.CPUCosOrbit.append(COrbit) self.CPUSinOrbit.append(SOrbit) self.CosOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64)) self.SinOrbit.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64)) self.CosOrbit[i] = gpuarray.to_gpu(np.float64(self.CPUCosOrbit[i])) self.SinOrbit[i] = gpuarray.to_gpu(np.float64(self.CPUSinOrbit[i])) #double sqr1me2 = sqrt(1-Ecc*Ecc); #double cume = CosBinarySignal-Ecc; #double onemecu = 1.0-Ecc*CosBinarySignal; #//double sae = sqr1me2*SinBinarySignal/onemecu; #//double cae = cume/onemecu; #double ae = TrueAnomaly; //atan2(sae, cae); #//ae = ae + 2*M_PI - trunc((ae+2*M_PI)/(2*M_PI))*2*M_PI; sae = np.sqrt(1.0 - Ecc*Ecc)*SOrbit/(1.0 - Ecc*COrbit) cae = (COrbit - Ecc)/(1.0 - Ecc*COrbit) self.CPUTrueAnomaly.append(np.arctan2(sae, cae)%(2*np.pi)) self.TrueAnomaly.append(gpuarray.empty(self.InterpBinarySteps+1, np.float64)) self.TrueAnomaly[i] = gpuarray.to_gpu(np.float64(self.CPUTrueAnomaly[i]))
def page_candidate(): current_candidates = Candidate(serialize=False) return render_template('candidate.html',selected_menu_item='Candidate',candidates=current_candidates)
def initPopulation(self): popln = [] for i in range(Popln_Size): popln.append(Candidate(num_games=Num_Games, max_moves=Max_Moves)) return popln
def return_candidates(list_of_targets, initial_seq, genes_sg_dict, Omega, df, node, for_single_gene=False, cfd_dict=None, PS_number=12): dict_of_different_places = wheres_the_differences_linear( list_of_targets ) ##node_targets_DS is a python array. where_the_differences. node.polymorphic_sites = dict_of_different_places #list_of_different_places = list(node.polymorphic_sites) if len(dict_of_different_places) > 12: #change to 12 return None list_of_different_places = list(dict_of_different_places.items()) list_of_different_places.sort(key=lambda item: item[0]) ##going over all the permutations list_of_perms_sequs = all_perms(initial_seq, None, list_of_different_places) perm_grades = [ ] #a list of tuples: (candidate_str,fraction_of_cut, cut_expectation, genes_list) for candidate_str in list_of_perms_sequs: targets_dict = { } # a list of tuples: (gene name, list of target of this gene that might be cut by the candidate_str) genes_covering = [ ] #a list of tuples: (gene name, probability to be cut). for gene, targets_lst_of_gene in genes_sg_dict.items( ): ##find out if this gene i couched by the sgRNA seq prob_gene_will_not_cut = 1 ##eazier to calculate list_of_targets = [ ] ##for later knowing where the candidate_str might cut in each gene num_of_cuts_per_gene = 0 #in use only in the single gene version for target in targets_lst_of_gene: ##targets_lst_of_gene: list of the target of the gene distance_candidate_target = df(candidate_str, target, cfd_dict) candidate_cut_prob = 1 - distance_candidate_target ##the distance is between 0 to 1. 0 is usually a perfect match, 1 is far sg_site_differents = two_sequs_differeces( candidate_str, target) list_of_targets.append([target, sg_site_differents]) prob_gene_will_not_cut = prob_gene_will_not_cut * ( 1 - candidate_cut_prob ) #lowering the not cut prob in each sgRNA num_of_cuts_per_gene += candidate_cut_prob prob_gene_cut = 1 - prob_gene_will_not_cut if len(list_of_targets) > 0: targets_dict[ gene] = list_of_targets #targets of this gene to be cleaved by the current candidate if (for_single_gene): genes_covering.append((gene, num_of_cuts_per_gene)) else: genes_covering.append((gene, prob_gene_cut)) cut_expection = 0 ##the probability the permutationed sequence will cut all of the genes, that the probability each of them will be cut is greater then Omega genes_score_dict = { } # a dict of genes: genes considered cut by this sequence, and cut prob for tuple in genes_covering: #tuple : (gene name, probability to be cut) cut_expection += tuple[1] ## the prob to cut all the genes genes_score_dict[tuple[0]] = tuple[1] if cut_expection >= 1: #is this condition necessary? current_candidate = Candidate.Candidate(candidate_str, cut_expection, genes_score_dict, targets_dict) perm_grades.append(current_candidate) del list_of_perms_sequs print(perm_grades) return perm_grades
def main(args): ## ## ## // There are four required command line arguments: p_graph (.1, .2, .3), ## // p_malicious (.15, .30, .45), p_txDistribution (.01, .05, .10), ## // and numRounds (10, 20). You should try to test your CompliantNode ## // code for all 3x3x3x2 = 54 combinations. ## numNodes = 100; p_graph = float(args[1]) # // parameter for random graph: # // prob. that an edge will exist p_malicious = float(args[2]) # // prob. that a node will be set to be malicious p_txDistribution = float(args[3]) #// probability of assigning an initial transaction to each node numRounds = float(args[4]) #// number of simulation rounds your nodes will run for # // pick which nodes are malicious and which are compliant nodes = [None for i in range(numNodes)] mal = 0 com = 0 for i in range(numNodes): if(random.random() < p_malicious): # // When you are ready to try testing with malicious nodes, replace the # // instantiation below with an instantiation of a MaliciousNode nodes[i] = MaliciousNode(p_graph, p_malicious, p_txDistribution, numRounds) mal = mal + 1 else: nodes[i] = CompliantNode(p_graph, p_malicious, p_txDistribution, numRounds) com = com + 1 print("{} malicious nodes and {} compliant nodes".format(mal,com)) followees = [[1 if random.random() < p_graph and i!= j else 0 for i in range(numNodes)] for j in range(numNodes)] for i in range(numNodes): nodes[i].setFollowees(followees[i]); ## // initialize a set of 500 valid Transactions with random ids numTx = 500 validTxIds = [] for i in range(numTx): validTxIds.append(random.randint(1000,50000)) ## ## // distribute the 500 Transactions throughout the nodes, to initialize ## // the starting state of Transactions each node has heard. The distribution ## // is random with probability p_txDistribution for each Transaction-Node pair. for i in range(numNodes): pendingTransactions = [] for txid in validTxIds: if (random.random() < p_txDistribution): #// p_txDistribution is .01, .05, or .10. pendingTransactions.append(Transaction(txid)) nodes[i].setPendingTransaction(pendingTransactions) numRounds = int(numRounds) for round in range(numRounds): ## // gather all the proposals into a map. The key is the index of the node receiving ## // proposals. The value is an List containing pairs. The first ## // element is the id of the transaction being proposed and the second ## // element is the index # of the node proposing the transaction. allProposals = {} ## allProposals = { idx1: [cand1, cand2, ...], idx2: [...] } for i in range(numNodes): proposals = nodes[i].sendToFollowers() for tx in proposals: if (tx not in validTxIds): break #// ensure that each tx is actually valid for j in range(numNodes): if (not followees[j][i]): break ## tx only matters if j follows i if (j not in allProposals): allProposals[j] = [] ## key: j; value: [list] candidate = Candidate(tx, i) allProposals[j].append(candidate) ## // Distribute the Proposals to their intended recipients as Candidates for i in range(numNodes): if (i in allProposals): nodes[i].receiveFromFollowees(allProposals.get(i)); transactions = nodes[i].sendToFollowers() # save to file filename = "results/"+"round_"+str(round)+"_node_"+str(i)+".txt" f = open(filename,"w+") for tx in transactions: f.write(str(tx.id)+"\n") f.close() # // print results for i in range(numNodes): transactions = nodes[i].sendToFollowers() print("Transaction ids that Node " + str(i) + " believes consensus on: ", end="") ''' for tx in transactions: print("{} ".format(tx.id), end='') print("") ''' print("{} transactions".format(len(transactions)))