def leaderboard(m_val, n_val, exp_spectrum): #masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186] masses = list(map(int, convolution(m_val, exp_spectrum).split(' '))) candidates = list(masses) max_exp = max(exp_spectrum) leader = '0' while candidates: d = collections.defaultdict(list) candidates = branch(masses, candidates) copy_candidates = list(candidates) for item in copy_candidates: current_peptide = item cyclic_theo = cyclic_spectrum(current_peptide) max_theo = max(cyclic_theo) if max_theo > max_exp: #candidates.remove(current_peptide) // This didn't work, threw 'x not in candidates' error. copy_candidates.remove(current_peptide) else: score_theo = score( cyclic_theo, exp_spectrum) #score of cyclic theo spectrum leader_cyclic_theo = cyclic_spectrum( leader) #cyclic theo spectrum of current leader peptide score_leader = score( leader_cyclic_theo, exp_spectrum) #score of leader cyclic theo spectrum if score_theo > score_leader: leader = current_peptide d[score_theo].append(current_peptide) #bound pt2 top_n_scores = get_top_n_scores(d, n_val) candidates = get_top_scorers(d, top_n_scores, n_val) return leader
def classbyUPGMA(self,obstimes,trainingtimes,obsnodes): self._classorder=["C1"] self._classscore["C1"] = score() for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes link = frozenset([u,v]) self._classUnion.addPair(link) if link not in obstimes: self._classscore["C1"].addPair(link) learningperiods=dict() for link in obstimes: if link in trainingtimes: learningperiods[link] = obstimes[link]+trainingtimes[link] else: learningperiods[link] = obstimes[link] Y,self._label=classes.Makedistmatx(learningperiods,self._VandPparameter,0,0) self._linkage=hierarchy.average(Y) cutree = hierarchy.cut_tree(self._linkage,self._nbcluster) for i in range(self._nbcluster): self._classscore["C"+str(i+2)] = score() self._classorder.append("C"+str(i+2)) for i in range(len(self._label)): u=self._label[i] self._classscore["C"+str(cutree[i][0]+2)].addPair(u)
def test_score_delta0(): assert 1.0 == score(0, np.array([[0, 0], [0, 0]]), np.array([[0, 0], [0, 0]])) assert 0.75 == score(0, np.array([[0, 0], [0, 0]]), np.array([[0, 1], [0, 0]])) assert 0.0 == score(0, np.array([[0, 0], [0, 0]]), np.array([[1, 1], [1, 1]]))
def test_score_delta1_toad(): toad_1 = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 1, 1, 1, 0], [0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) toad_2 = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0], [0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0], [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0]]) assert 1.0 == score(1, toad_1, toad_2) assert 1.0 == score(3, toad_1, toad_2) assert 30 / 36 == score(3, np.zeros(toad_2.size).reshape((6, 6)), toad_2)
def eval(predict): multi_step_errors = [] one_step_errors = [] for delta, stop in tqdm( bitmap.generate_test_set(set_size=args.test_size, seed=args.test_seed)): start = predict(delta, stop) multi_step_errors.append(1 - score(delta, start, stop)) one_step_start = start if delta == 1 else predict(1, stop) one_step_errors.append(1 - score(1, one_step_start, stop)) return np.mean(multi_step_errors), np.var(multi_step_errors), np.mean( one_step_errors), np.var(one_step_errors)
def main(): rankings = getRankings() matches = [] lines = [line.rstrip('\n') for line in open('scores/2014-15_round16Scores.txt')] for line in lines: r = line.split(", ") # print r m = classes.Match(r[0]) m.setVisitingTeam(r[3]) m.setHomeScore(r[1]) m.setVisitorScore(r[2]) matches.append(m) predictions = predictMatches(matches, rankings, verbose=False) scoring.score(matches, predictions)
def confirmChoices(choices, board, boardH, boardW, dictionaryFile, modDict): confirmed = dict() dictionary = fileToSet(dictionaryFile) for word, knowledge in choices.items(): used, horizontal, vertical = knowledge for path in horizontal: location = path[0] ok, scoreSides = checkPath( word, used, path, "u", "d", board, boardH, boardW, dictionary, modDict, ) if ok is True: wordScore, usedWord = score(used, location, 0, board, modDict) scoreSides += wordScore value = confirmed.get(word, list()) value.append( (scoreSides, usedWord, location, "horizontal", scoreSides)) confirmed[word] = value for path in vertical: location = path[0] ok, scoreSides = checkPath( word, used, path, "l", "r", board, boardH, boardW, dictionary, modDict, ) if ok is True: wordScore, usedWord = score(used, location, 1, board, modDict) scoreSides += wordScore value = confirmed.get(word, list()) value.append((scoreSides, usedWord, location, "vertical")) confirmed[word] = value return confirmed
def index(): inv_map = score_scraper.ncaa_to_id() master = score_scraper.get_master_bracket() elim = score_scraper.get_elim() users = User.query.all() scoring.convert_brackets_to_id(inv_map, users, master) end_rounds = scoring.get_end_rounds(users) score = scoring.score(master, users) order = scoring.order(score) rank = scoring.rank(order, score) potential = scoring.potential(elim, users, master) game_scores = score_scraper.get_scoreticker_json() seo = score_scraper.convert_short_to_seo() return render_template('index.html', users=users, score=score, potential=potential, end_rounds=end_rounds, elim=elim, order=order, rank=rank, User=User, master=master, matches=game_scores, seo=seo)
def entries(): users = User.query.all() users.sort(key=lambda x: x.firstname, reverse=False) master = score_scraper.get_master_bracket() score = scoring.score(master, users) order = scoring.order(score) rank = scoring.rank(order, score) bracket_teams = score_scraper.get_bracket_teams() if current_user.is_authenticated: user_id = request.args.get('id', default=current_user.id, type=int) else: user_id = request.args.get('id', default=1, type=int) elim = score_scraper.get_elim() game_scores = score_scraper.get_scoreticker_json() if user_id == -1 or User.query.get(user_id).round1 is None: display = [] else: display = User.query.get(user_id).round1.replace('"', '').replace( '[', '').replace(']', '').split(',') return render_template('entries.html', users=users, display=display, master=master, elim=elim, user_id=user_id, User=User, order=order, Users=users, rank=rank, matches=game_scores, teams=bracket_teams)
def classbyUPGMASIZE(self,obstimes,trainingtimes,obsnodes): self._classorder=["C1"] self._classscore["C1"] = score() for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes link = frozenset([u,v]) self._classUnion.addPair(link) if link not in obstimes: self._classscore["C1"].addPair(link) learningperiods=dict() for link in obstimes: if link in trainingtimes: learningperiods[link] = obstimes[link]+trainingtimes[link] else: learningperiods[link] = obstimes[link] Y,self._label=classes.Makedistmatx(learningperiods,self._VandPparameter,0,0) self._linkage=hierarchy.average(Y) n=len(self._label) # print(self._label) q=[] q.append(self._linkage[-1,0]) q.append(self._linkage[-1,1]) q.sort() for i in range(self._nbcluster-2): X=q.pop( q.index(max(q,key=lambda x: self._linkage[int(x-n),3]))) q.append(self._linkage[int(X-n),0]) q.append(self._linkage[int(X-n),1]) q.sort() q.sort(key=lambda x: self._linkage[int(x-n),3],reverse=True) self._cuttree= [-1] * n for i in range(self._nbcluster): self.getLeafID(q[i],i) # cutree = hierarchy.cut_tree(self._linkage,self._nbcluster) # print(cutree) for i in range(self._nbcluster): self._classscore["C"+str(i+2)] = score() self._classorder.append("C"+str(i+2)) for i in range(len(self._label)): u=self._label[i] self._classscore["C"+str(self._cuttree[i]+2)].addPair(u)
def ensemble(predicts, deltas_batch, stops_batch): predictions = np.array([p(deltas_batch, stops_batch) for p in predicts]) scores = np.array([[score(deltas_batch[j], predictions[i][j], stops_batch[j]) for j in range(len(predictions[i]))] for i in range(len(predicts))]) max_idxs = np.argmax(scores, axis=0) best = [] for i,best_idx in enumerate(max_idxs): best.append(predictions[best_idx][i]) return np.array(best)
def test_score_block(): block = np.array([ [0, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 0], ]) block2 = np.array([ [1, 1, 0, 0], [1, 1, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], ]) assert 1.0 == score(1, block, block) assert 1.0 == score(10, block, block) assert 10 / 16 == score(1, block2, block) assert 10 / 16 == score(10, block2, block)
def eval(predict): multi_step_errors = [] one_step_errors = [] for batch in tqdm(grouper(bitmap.generate_test_set(set_size=args.test_size, seed=args.test_seed), 100)): deltas, stops = zip(*batch) delta_batch = np.array(deltas) stop_batch = np.array(stops) start_batch = predict(deltas, stops) for delta, start, stop in zip(delta_batch, start_batch, stop_batch): multi_step_errors.append(1 - score(delta, start, stop)) one_deltas = np.ones_like(delta_batch) one_step_start = np.where(deltas == 1, start_batch, predict(one_deltas, stops)) for delta, start, stop in zip(one_deltas, one_step_start, stop_batch): one_step_errors.append(1 - score(delta, start, stop)) return np.mean(multi_step_errors), np.var(multi_step_errors), np.mean(one_step_errors), np.var(one_step_errors)
def test_step_back_all_3x3(alg_class): alg = alg_class(tile_graph) for Aorig in tqdm(generate_all(3, 3)): X = np.copy(Aorig) X = life_step(X) A = alg.step_back(X) sc = score(1, A, X) assert sc == 1.0
def __init__(self): self._classorder = list() self._classscore = dict() self._classUnion = score() self._threshold= None self._linkage = [] self._label=[] self._VandPparameter= None self._nbcluster =None self._cuttree =None
def getStartingWord(modDict, star, board, boardW, rack, dictionary): boardWidth = boardW star = star if board[star] != (".", "."): return None possible = getAnagrams(rack, dictionary) overallHighestScore = 0 play = list() for word, used in possible.items(): highestScore = 0 positions = list() usedlen = len(used) for i in range(usedlen): x0 = star[0] + 1 - usedlen + i xe = x0 + usedlen - 1 y = star[1] if x0 < 1: continue if xe > boardWidth: break posscore = score(used, (x0, y), 0, board, modDict)[0] if posscore == highestScore: positions.append((x0, y)) elif posscore > highestScore: highestScore = posscore positions = [(x0, y)] if highestScore == overallHighestScore: for position in positions: play.append( ( overallHighestScore, word, used, position, "horizontal", used, ) ) if highestScore > overallHighestScore: overallHighestScore = highestScore play = list() for position in positions: play = [ ( overallHighestScore, word, used, position, "horizontal", used, ) ] return play
def classbythreshold(self,obstimes,obsnodes): #Class Making #3 Class : C1 = New link C2 = less than classthreshold link C3 more than classthreshold self._classorder = ["C1","C2","C3"] self._classscore["C1"] = score() self._classscore["C2"] = score() self._classscore["C3"] = score() # print(self._classthreshold) for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes link = frozenset([u,v]) self._classUnion.addPair(link) if link not in obstimes: self._classscore["C1"].addPair(link) elif len(obstimes[link]) < self._classthreshold: self._classscore["C2"].addPair(link) else: self._classscore["C3"].addPair(link)
def __init__(self, filename, scoring_type, test_size=0.25, target=''): self.X = pd.read_csv(filename).drop(target, axis=1) self.Y = pd.read_csv(filename)[target] self.test_size = test_size self.target = target self.scoring = scoring.score(scoring_type) self.scores_dic = {} self.models_files = {} self.top_model = '' self.x_train, self.x_test, self.y_train, self.y_test = \ train_test_split(self.X, self.Y, test_size=self.test_size) self.scaler = StandardScaler()
def subject_regularize(rfcs, X_int, X_other, Y, oob=False, regularize=[0.75, 0.3, 0.65]): if len(regularize) == 1: regularize = regularize * 3 observed_ = [] predicted_ = [] for subject in range(1, 50): observed = Y['subject'][subject] rfc = rfcs[1][subject] if oob: predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_other) predicted_int = rfc.predict(X_int) predicted[:, 0] = predicted_int[:, 0] observed_.append(observed) predicted_.append(predicted) predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted, axis=2, keepdims=True) predicted_std = np.std(predicted, axis=2, keepdims=True) predicted_mean_std = np.hstack((predicted_mean, predicted_std)).squeeze() predicted_int = regularize[0] * (predicted_mean) + ( 1 - regularize[0]) * predicted predicted_ple = regularize[1] * (predicted_mean) + ( 1 - regularize[1]) * predicted predicted_dec = regularize[2] * (predicted_mean) + ( 1 - regularize[2]) * predicted predicted = regularize[0] * (predicted_mean) + (1 - regularize[0]) * predicted r_int = scoring.r('int', predicted_int, observed) r_ple = scoring.r('ple', predicted_ple, observed) r_dec = scoring.r('dec', predicted_dec, observed) score1_ = scoring.score(predicted, observed, n_subjects=49) score1 = scoring.rs2score(r_int, r_ple, r_dec) #print(score1_,score1) print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)" % (1, score1, r_int, r_ple, r_dec)) score2 = scoring.score2(predicted_mean_std, Y['mean_std']) r_int_mean = scoring.r2('int', 'mean', predicted_mean_std, Y['mean_std']) r_ple_mean = scoring.r2('ple', 'mean', predicted_mean_std, Y['mean_std']) r_dec_mean = scoring.r2('dec', 'mean', predicted_mean_std, Y['mean_std']) r_int_sigma = scoring.r2('int', 'sigma', predicted_mean_std, Y['mean_std']) r_ple_sigma = scoring.r2('ple', 'sigma', predicted_mean_std, Y['mean_std']) r_dec_sigma = scoring.r2('dec', 'sigma', predicted_mean_std, Y['mean_std']) print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" % \ (2,score2,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma)) return (r_int, r_ple, r_dec, r_int_mean, r_ple_mean, r_dec_mean, r_int_sigma, r_ple_sigma, r_dec_sigma)
def count_shuffle(rides: list, car_number: int, final_time: int, bonus: int, iterations: int = 100) -> tuple: best_score = 0 best_result = None for i in range(iterations): car_assignments = chunk_shuffle(len(rides), car_number) local_score = score(car_assignments, rides, final_time, bonus) if local_score > best_score: best_result = car_assignments best_score = local_score return best_score, best_result
def main(input, output, truth=None): # train HMM training_fname = "WSJ_POS_CORPUS_FOR_STUDENTS/WSJ_02-21.pos" lines, tag_idx, word_idx = read_data(training_fname) start_prob, A = get_transition_probs(lines, tag_idx) B = get_emission_probs(lines, tag_idx, word_idx) # open file containing input sequence with open(input, 'r') as f: lines = [line.strip() for line in f] # split the list into lists of sentences sentences = [list(v) for k, v in itertools.groupby(lines, key=bool) if k] # clear out output file open(output, 'w').close() # generate predictions for all sentences for i, sentence in enumerate(sentences): best_path, bestpath_prob = run_viterbi(sentence, tag_idx, word_idx, start_prob, A, B) print_prediction(sentence, tag_idx, best_path, output) if truth: score(truth, output) # prints out accuracy of output predictions compared to truth
def implies(hyp, results): scoredresults = [] for text in results: pair = (text, score(text, hyp)) scoredresults.append(pair) sortedresults = sorted(scoredresults, key=lambda h: h[1], reverse=True) try: text = sortedresults[1][0] except IndexError: text = hyp return text
def implies(hyp, results): scoredresults = [] for text in results: pair = (text, score(text, hyp)) scoredresults.append(pair) sortedresults = sorted(scoredresults, key=lambda h:h[1], reverse=True) try: text = sortedresults[1][0] except IndexError: text = hyp return text
def main(): top = Tk() top.title("Scrabble Score") entry = Entry(top, bd=5) entry.pack() var = StringVar() message = Message(top, textvariable=var) button = Button(top, text="score", command=lambda: var.set(score(entry.get()))) message.pack() button.pack() top.mainloop()
def search(request, SearchTerm_id): #need to tidy up variable names api = auth.GetTweepyAPI() term = get_object_or_404(SearchTerm, id=SearchTerm_id) recent = api.search(q=term.phrase,lang=term.lang,rpp=term.pagesize) #if, for whatever reason, I wanted to return yet more tweets ''' i = 2 while i < 3: recent.extend(api.search(q=term.phrase,lang=term.lang,rpp=term.pagesize,page=i)) i += 1 ''' #print len(recent) r = scoring.score(recent, SearchTerm_id) template = 'smj_app/results.html' return render_to_response( template , {'results': r.values(), 'term': term.phrase}, context_instance = RequestContext( request ))
def scoreAndWriteResult(self): scores = {} for d in self.labels: front_length = int(self.values[d].__len__() * self.proportion) latter_length = self.values[d].__len__() - front_length (TP, TN, FP, FN) = scoring.score(self.scoring_name, self.delay, self.labels[d], self.anomaly_results[d], front_length, latter_length) try: Precision = (TP + 0.0) / (TP + FP + 0.0) Recall = (TP + 0.0) / (TP + FN + 0.0) FScore = 2 * Precision * Recall / (Precision + Recall) except ZeroDivisionError: Precision, Recall, FScore = 0.0, 0.0, 0.0 scores[d] = (TP, TN, FP, FN, Precision, Recall, FScore) json_path = './results/' + self.algorithm + '/score_' + self.scoring_name + '.json' with open(json_path, 'w') as f: json.dump(scores, f, indent=4)
def train_loop(model_name, learner, early_stop_window=100, rseed=9342184): errors = [] latencies = [] best_mean_err = 1.0 best_i = -1 for i, (delta, start, stop) in enumerate(generate_inf_cases(True, rseed)): tic = time.perf_counter() A = learner.predict(delta, stop) toc = time.perf_counter() err = 1 - score(delta, A, stop) errors.append(err) latency = toc - tic latencies.append(latency) mean_err = np.mean(errors) mean_latency = np.mean(latencies) print( f'Error: mean {mean_err}, cur {err}; latency: mean {mean_latency:0.4f}s, cur {latency:0.4f}; delta {delta}, density: {np.mean(stop)}' ) if mean_err < best_mean_err: best_mean_err = mean_err best_i = i file_path = f'{model_name}_{i:05}' print(f' Best model - saving {file_path}...') learner.save_model(file_path) elif i - best_i > early_stop_window: print( f"Haven't found a better model for more than {early_stop_window} iterations - terminating early." ) print(f"Best iteration: {best_i}, mean error: {best_mean_err}") break learner.train(delta, start, stop)
def rfc_final(X, Y, max_features, min_samples_leaf, max_depth, et, Y_test=None, regularize=[0.7, 0.7, 0.7], n_estimators=100, seed=0): if Y_test is None: Y_test = Y def rfc_maker(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, et=False): if not et: return RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=True, n_jobs=-1, random_state=seed) else: return ExtraTreesRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, n_jobs=-1, random_state=seed) kinds = ['int', 'ple', 'dec'] rfcs = {} for kind in kinds: rfcs[kind] = {} for subject in range(1, 50): rfcs[kind][subject] = rfc_maker( n_estimators=n_estimators, max_features=max_features[kind], min_samples_leaf=min_samples_leaf[kind], max_depth=max_depth[kind], et=et[kind]) for subject in range(1, 50): for kind in kinds: rfcs[kind][subject].fit(X, Y[subject]) predictions = {} for kind in kinds: predictions[kind] = {} for subject in range(1, 50): if et[kind]: # Check in-sample fit because there isn't any alternative. predictions[kind][subject] = rfcs[kind][subject].predict(X) else: predictions[kind][subject] = rfcs[kind][ subject].oob_prediction_ predicted = predictions['int'].copy() for subject in range(1, 50): predicted[subject][:, 0] = predictions['int'][subject][:, 0] predicted[subject][:, 1] = predictions['ple'][subject][:, 1] predicted[subject][:, 2:] = predictions['dec'][subject][:, 2:] # Regularize: predicted_stack = np.zeros( (predicted[1].shape[0], predicted[1].shape[1], 49)) for subject in range(1, 50): predicted_stack[:, :, subject - 1] = predicted[subject] predicted_mean = predicted_stack.mean(axis=2, keepdims=True) predicted_reg = {kind: predicted.copy() for kind in kinds} for i, kind in enumerate(kinds): predicted_reg[kind] = regularize[i] * predicted_mean + ( 1 - regularize[i]) * predicted_stack predicted_stack[:, 0, :] = predicted_reg['int'][:, 0, :] predicted_stack[:, 1, :] = predicted_reg['ple'][:, 1, :] predicted_stack[:, 2:, :] = predicted_reg['dec'][:, 2:, :] predicted = predicted_stack observed = predicted.copy() for subject in range(1, 50): observed[:, :, subject - 1] = Y_test[subject] score = scoring.score(predicted, observed) rs = {} predictions = {} for kind in ['int', 'ple', 'dec']: rs[kind] = scoring.r(kind, predicted, observed) print("For subchallenge 1:") print("\tScore = %.2f" % score) for kind in kinds: print("\t%s = %.3f" % (kind, rs[kind])) return (rfcs, score, rs)
def rfc_cv(X, Y, n_splits=5, n_estimators=15, max_features=1000, min_samples_leaf=1, max_depth=None, regularize=[0.7, 0.35, 0.7]): test_size = 0.2 n_molecules = X.shape[0] shuffle_split = ShuffleSplit(n_molecules, n_splits, test_size=test_size) test_size *= n_molecules rfcs = {} n_subjects = 49 for subject in range(1, n_subjects + 1): rfc = RandomForestRegressor(n_estimators=n_estimators, max_features=max_features, min_samples_leaf=min_samples_leaf, max_depth=max_depth, oob_score=False, n_jobs=-1, random_state=0) rfcs[subject] = rfc rs = {'int': [], 'ple': [], 'dec': []} scores = [] for train_index, test_index in shuffle_split: predicted_list = [] observed_list = [] for subject in range(1, n_subjects + 1): rfc = rfcs[subject] X_train = X[train_index] Y_train = Y[subject][train_index] rfc.fit(X_train, Y_train) X_test = X[test_index] predicted = rfc.predict(X_test) observed = Y[subject][test_index] predicted_list.append(predicted) observed_list.append(observed) observed = np.ma.dstack(observed_list) predicted = np.dstack(predicted_list) predicted_mean = predicted.mean(axis=2, keepdims=True) predicted_int = regularize[0] * (predicted_mean) + ( 1 - regularize[0]) * predicted predicted_ple = regularize[1] * (predicted_mean) + ( 1 - regularize[1]) * predicted predicted = regularize[2] * (predicted_mean) + ( 1 - regularize[2]) * predicted predicted[:, 0, :] = predicted_int[:, 0, :] predicted[:, 1, :] = predicted_ple[:, 1, :] score = scoring.score(predicted, observed) scores.append(score) for kind in ['int', 'ple', 'dec']: rs[kind].append(scoring.r(kind, predicted, observed)) for kind in ['int', 'ple', 'dec']: rs[kind] = { 'mean': np.mean(rs[kind]), 'sem': np.std(rs[kind]) / np.sqrt(n_splits) } scores = { 'mean': np.mean(scores), 'sem': np.std(scores) / np.sqrt(n_splits) } print( "For subchallenge 1, using cross-validation with at least %d samples_per_leaf:" % min_samples_leaf) print("\tscore = %.2f+/- %.2f" % (scores['mean'], scores['sem'])) for kind in ['int', 'ple', 'dec']: print("\t%s = %.2f+/- %.2f" % (kind, rs[kind]['mean'], rs[kind]['sem'])) return scores, rs
def genReport(): scoring.score() report.gen()
def rfc_(X_train, Y_train, X_test_int, X_test_other, Y_test, max_features=1500, n_estimators=1000, max_depth=None, min_samples_leaf=1): print(max_features) def rfc_maker(): return RandomForestRegressor(max_features=max_features, n_estimators=n_estimators, max_depth=max_depth, min_samples_leaf=min_samples_leaf, n_jobs=-1, oob_score=True, random_state=0) n_subjects = 49 predicted_train = [] observed_train = [] predicted_test = [] observed_test = [] rfcs = {subject: rfc_maker() for subject in range(1, n_subjects + 1)} for subject in range(1, n_subjects + 1): print(subject) observed = Y_train[subject] rfc = rfcs[subject] rfc.fit(X_train, observed) #predicted = rfc.predict(X_train) predicted = rfc.oob_prediction_ observed_train.append(observed) predicted_train.append(predicted) observed = Y_test[subject] rfc = rfcs[subject] if Y_train is Y_test: # OOB prediction predicted = rfc.oob_prediction_ else: predicted = rfc.predict(X_test_other) predicted_int = rfc.predict(X_test_int) predicted[:, 0] = predicted_int[:, 0] observed_test.append(observed) predicted_test.append(predicted) scores = {} for phase, predicted_, observed_ in [ ('train', predicted_train, observed_train), ('test', predicted_test, observed_test) ]: predicted = np.dstack(predicted_) observed = np.ma.dstack(observed_) predicted_mean = np.mean(predicted, axis=2, keepdims=True) regularize = 0.7 predicted = regularize * (predicted_mean) + (1 - regularize) * predicted score = scoring.score(predicted, observed, n_subjects=n_subjects) r_int = scoring.r('int', predicted, observed) r_ple = scoring.r('ple', predicted, observed) r_dec = scoring.r('dec', predicted, observed) print("For subchallenge 1, %s phase, score = %.2f (%.2f,%.2f,%.2f)" % (phase, score, r_int, r_ple, r_dec)) scores[phase] = score return rfcs, scores['train'], scores['test']
def main(): parser = argparse.ArgumentParser(description="Run QA-CLEF-System") parser.add_argument('--preprocess',action="store_true") parser.add_argument('--train',action="store_true") parser.add_argument('--answeronly',action='store_true') parser.add_argument('--selftest',action='store_true') parser.add_argument('--data',nargs = '+',default=[2011],type=int) parser.add_argument('--test',nargs = '+',default=[2012],type=int) parser.add_argument('--forcedownload',action='store_true') parser.add_argument('--preprocessonly',action='store_true') parser.add_argument('--ngram', type=int, default=3) parser.add_argument('--threshold', type=float, default=0.5) parser.add_argument('--report',action='store_true') args = parser.parse_args() process_args(args) data = [] for edition in args.data + args.test: _data = qacache.find_data(edition) if args.preprocess or _data is None: input_check([edition],args.forcedownload) _data = input_parse([edition]) print >> sys.stderr, 'preprocessing ' + str(edition) + '-data' _data = preprocessing.preprocess(_data) qacache.store_preprocessed_data(edition,_data[0]) else: print >> sys.stderr, str(edition) + '-data is found on cache/' + str(edition) + '-prerocessed.txt' data.append(_data) if args.preprocessonly: print >> sys.stderr, 'Preprocess-only task is done.' sys.exit(0) # build-model print >> sys.stderr, 'Building model...' training_model = model_builder.build_model(data[:len(args.data)]) test_model = model_builder.build_model(data[-len(args.test):]) if len(args.test) != 0 and not args.selftest else [] # scoring print >> sys.stderr, 'Unweighted Feature Scoring...' training_model and scoring.score(training_model) test_model and scoring.score(test_model) # training weight = qacache.stored_weight() if args.train or weight is None: print >> sys.stderr, 'Training...' weight = train(training_model) else: print >> sys.stderr, 'Weight is found on cache/weight.txt' # weighted_scoring print >> sys.stderr, 'Weighted Feature Scoring...' final = scoring.weighted_scoring(training_model if args.selftest else test_model, weight) # answer selection select_answer(final,args.threshold) # evaluation result = evaluate(final) qacache.write_json(final,'final.txt',indent=True) if args.report: report(final, args.test if not args.selftest else args.data,weight) print "Result: %f" % result
### Pre process data ## target = 'HPYLORI' data1 = dp.modify_data(data,[],data.columns,target) print(data1) stats.gen_stats(data1, target) n_features = data1.shape[1]-1 # # #################### RUNNING WITHOUT BOOSTING AND BAGGING for all ranking feature selections and CFS############### n_seed = 2 splits =2 runs = stats.runSKFold(n_seed,splits,data=data1) # score.score(rsr.normal_run( n_seed, splits, ['infogain_10'], ['elasticnet'], runs, n_features),n_seed,splits) score.score(sfs_r.subset_run(n_seed, splits,['elasticnet'],['f1'],runs,n_features),n_seed,splits) # sfs_r.subset_features(n_seed,splits, ['knn'],['accuracy'],runs, n_features) # score.score(bbr.boostbag_run(n_seed,splits,['infogain_10'],['elasticnet'],runs,'boost',n_features), n_seed,splits) # # score.score(nr.normal_run(data1,n_seed=1,splits=2,methods=['cfs_0'],estimators=['knn']),1,2) # num = data1.shape[1] # score.score(parallel.normal_run( 1, 10, ['fcbf_0'], ['naive_bayes'], runs),1,10) # parallel.normal_run( 1, 10, ['mrmr_0'], ['naive_bayes'], runs) #, 'infogain_20','reliefF_10','reliefF_20','infogain_'+str(num),'cfs_0' # # , 'svm','naive_bayes','knn','xgboost' # # , 'infogain_20', 'reliefF_10','reliefF_20','infogain_'+str(num) # score.score(parallel.boostbag_run( 2,3, ['infogain_10'], ['elasticnet'], runs,'bag'),2,3) # parallel.subset_run(5,10,['elasticnet'],['f1'],runs) # parallel.subset_run(1,2,['knn'],['f1'],runs)
import scoring import random import sys if len(sys.argv) != 2: raise Exception("Script needs the test set name") test_set = sys.argv[1] contributors, c2S, projects, p2D, p2W, p2B, p2S = io.read_input_file( "input/{}.txt".format(test_set)) p_order, p2C_order = io.read_output_init_file("output/{}.out".format(test_set), projects, contributors) P = len(p_order) score = scoring.score(contributors, c2S, projects, p2D, p2W, p2B, p2S, p_order, p2C_order) save_step = 100000 last_save = score nb_save = 0 max_nb_test = 1000000 nb_test = 0 print(score) while nb_test < max_nb_test: if score - last_save > save_step: print("Saving") io.write_output("output/{}-optim{}.out".format(test_set, nb_save), projects, contributors, p_order, p2C_order) nb_save += 1
def main(): args = cli.parse_args() if not args.no_det: np.random.seed(args.env_seed) random.seed(args.env_seed) torch.manual_seed(args.env_seed) torch.backends.cudnn.deterministic = True torch.backends.cudnn.benchmark = False ### Logger setup ### logger = Logger(args.fileloc, args.load_loc) logger.init_config(args) logger.load_results() ### If we're making a visualisation, branch here ### if args.task == 'vis': if args.ranking not in logger.data['scores'][0]: print("\nNeed to compute ranking {} before doing visualisation!". format(args.ranking)) exit() run_and_save(logger) exit() ### Get or update all results ### counts_do = { 'redo': args.redo_all or not logger.is_done('counts'), 'update': args.more_count is not None } scores_do = { 'redo': counts_do['redo'] or counts_do['update'] or not logger.is_done('scores'), 'update': args.more_scoretypes is not None } interpol_do = { 'redo': scores_do['redo'] or not logger.is_done('interpol') or args.redo_interpol is not None, 'update': scores_do['update'] } ### Counts if counts_do['redo']: print("\n----- Counting -----\n") counts = count(logger) logger.update_counts(counts) if counts_do['update']: N = args.more_count print("\n----- Additional Counts ({} more runs) -----\n".format(N)) # If we're adding more counts without having run before, then we need to reset the # env or we would be revisiting the same states because of the seed. if not counts_do['redo']: for _ in range(logger.config['n_runs']): logger.config['env'].reset() counts = count(logger, n_runs=N) logger.update_counts(counts, addn=N) if counts_do['redo'] or counts_do['update']: logger.dump_results() logger.dump_config() ### Scores if scores_do['redo']: print("\n----- Scoring -----\n") scores = score(logger) logger.update_scores(scores) if scores_do['update']: already_done = [ st for st in args.more_scoretypes if st in logger.config['score_types'] ] if len(already_done) != 0: raise Exception( "Scoretypes", ",".join(already_done), "already done! Remove them from --more_scoretypes") print("\n----- Additional Scores ({}) -----\n".format( args.more_scoretypes)) scores = score(logger, score_types=args.more_scoretypes) logger.update_scores(scores) if scores_do['redo'] or scores_do['update']: logger.dump_results() logger.dump_config() ### Interpolation if interpol_do['redo']: print("\n----- Interpolating -----\n") if args.redo_interpol is not None: i, t = args.redo_interpol logger.config['n_inc'] = i if i >= 0 else logger.config['n_inc'] logger.config['n_test'] = t if t >= 0 else logger.config['n_test'] elif logger.config['n_inc'] == -1: logger.config['n_inc'] = int( logger.data['logs'][0]['counting_abs_states'] / 10) interpol = interpolate(logger) logger.update_interpolation(interpol) if interpol_do['update']: print("\n----- Additional Interpolations ({}) -----\n".format( args.more_scoretypes)) interpol = interpolate(logger, score_types=args.more_scoretypes) logger.update_interpolation(interpol) if interpol_do['redo'] or interpol_do['update']: logger.dump_results() logger.dump_config() ### Display results ### draw_interpol_results(logger, logger.config['score_types'], 0, [1], x_fracs=True, y_fracs=True, smooth=False, x_name='States Restored (%)', y_names=['Original Reward (%)'], combine_sbfl=True) draw_interpol_results(logger, logger.config['score_types'], 4, [1], y_fracs=True, trans_x=lambda x: 1 - x, x_name="Policy's Action Taken (% of Steps)", y_names=['Original Reward (%)'], smooth=False, combine_sbfl=True)