def leaderboard(m_val, n_val, exp_spectrum):
    #masses = [57, 71, 87, 97, 99, 101, 103, 113, 114, 115, 128, 129, 131, 137, 147, 156, 163, 186]
    masses = list(map(int, convolution(m_val, exp_spectrum).split(' ')))
    candidates = list(masses)
    max_exp = max(exp_spectrum)
    leader = '0'

    while candidates:
        d = collections.defaultdict(list)
        candidates = branch(masses, candidates)
        copy_candidates = list(candidates)
        for item in copy_candidates:
            current_peptide = item
            cyclic_theo = cyclic_spectrum(current_peptide)
            max_theo = max(cyclic_theo)
            if max_theo > max_exp:
                #candidates.remove(current_peptide) // This didn't work, threw 'x not in candidates' error.
                copy_candidates.remove(current_peptide)
            else:
                score_theo = score(
                    cyclic_theo, exp_spectrum)  #score of cyclic theo spectrum
                leader_cyclic_theo = cyclic_spectrum(
                    leader)  #cyclic theo spectrum of current leader peptide
                score_leader = score(
                    leader_cyclic_theo,
                    exp_spectrum)  #score of leader cyclic theo spectrum
                if score_theo > score_leader:
                    leader = current_peptide
                d[score_theo].append(current_peptide)
            #bound pt2
            top_n_scores = get_top_n_scores(d, n_val)
            candidates = get_top_scorers(d, top_n_scores, n_val)
    return leader
    def classbyUPGMA(self,obstimes,trainingtimes,obsnodes):
        self._classorder=["C1"]
        self._classscore["C1"] = score()

        for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes
        	link = frozenset([u,v])
        	self._classUnion.addPair(link)
        	if link not in obstimes:
        		self._classscore["C1"].addPair(link)
        learningperiods=dict()
        for link in obstimes:
            if link in trainingtimes:
                learningperiods[link] = obstimes[link]+trainingtimes[link]
            else:
                learningperiods[link] = obstimes[link]
        Y,self._label=classes.Makedistmatx(learningperiods,self._VandPparameter,0,0)
        self._linkage=hierarchy.average(Y)
        cutree = hierarchy.cut_tree(self._linkage,self._nbcluster)


        for i in range(self._nbcluster):
            self._classscore["C"+str(i+2)] = score()
            self._classorder.append("C"+str(i+2))

        for i in range(len(self._label)):
            u=self._label[i]
            self._classscore["C"+str(cutree[i][0]+2)].addPair(u)
Beispiel #3
0
def test_score_delta0():
    assert 1.0 == score(0, np.array([[0, 0], [0, 0]]),
                        np.array([[0, 0], [0, 0]]))
    assert 0.75 == score(0, np.array([[0, 0], [0, 0]]),
                         np.array([[0, 1], [0, 0]]))
    assert 0.0 == score(0, np.array([[0, 0], [0, 0]]),
                        np.array([[1, 1], [1, 1]]))
Beispiel #4
0
def test_score_delta1_toad():
    toad_1 = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0],
                       [0, 0, 1, 1, 1, 0], [0, 1, 1, 1, 0, 0],
                       [0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0]])
    toad_2 = np.array([[0, 0, 0, 0, 0, 0], [0, 0, 0, 1, 0, 0],
                       [0, 1, 0, 0, 1, 0], [0, 1, 0, 0, 1, 0],
                       [0, 0, 1, 0, 0, 0], [0, 0, 0, 0, 0, 0]])
    assert 1.0 == score(1, toad_1, toad_2)
    assert 1.0 == score(3, toad_1, toad_2)
    assert 30 / 36 == score(3, np.zeros(toad_2.size).reshape((6, 6)), toad_2)
Beispiel #5
0
    def eval(predict):
        multi_step_errors = []
        one_step_errors = []
        for delta, stop in tqdm(
                bitmap.generate_test_set(set_size=args.test_size,
                                         seed=args.test_seed)):
            start = predict(delta, stop)
            multi_step_errors.append(1 - score(delta, start, stop))

            one_step_start = start if delta == 1 else predict(1, stop)
            one_step_errors.append(1 - score(1, one_step_start, stop))
        return np.mean(multi_step_errors), np.var(multi_step_errors), np.mean(
            one_step_errors), np.var(one_step_errors)
Beispiel #6
0
def main():
	rankings = getRankings()
	matches = []
	lines = [line.rstrip('\n') for line in open('scores/2014-15_round16Scores.txt')]
	for line in lines:
		r = line.split(", ")
		# print r
		m = classes.Match(r[0])
		m.setVisitingTeam(r[3])
		m.setHomeScore(r[1])
		m.setVisitorScore(r[2])
		matches.append(m)

	predictions = predictMatches(matches, rankings, verbose=False)
	scoring.score(matches, predictions)
Beispiel #7
0
def confirmChoices(choices, board, boardH, boardW, dictionaryFile, modDict):
    confirmed = dict()
    dictionary = fileToSet(dictionaryFile)
    for word, knowledge in choices.items():
        used, horizontal, vertical = knowledge
        for path in horizontal:
            location = path[0]
            ok, scoreSides = checkPath(
                word,
                used,
                path,
                "u",
                "d",
                board,
                boardH,
                boardW,
                dictionary,
                modDict,
            )
            if ok is True:
                wordScore, usedWord = score(used, location, 0, board, modDict)
                scoreSides += wordScore
                value = confirmed.get(word, list())
                value.append(
                    (scoreSides, usedWord, location, "horizontal", scoreSides))
                confirmed[word] = value
        for path in vertical:
            location = path[0]
            ok, scoreSides = checkPath(
                word,
                used,
                path,
                "l",
                "r",
                board,
                boardH,
                boardW,
                dictionary,
                modDict,
            )
            if ok is True:
                wordScore, usedWord = score(used, location, 1, board, modDict)
                scoreSides += wordScore
                value = confirmed.get(word, list())
                value.append((scoreSides, usedWord, location, "vertical"))
                confirmed[word] = value

    return confirmed
Beispiel #8
0
def index():
    inv_map = score_scraper.ncaa_to_id()
    master = score_scraper.get_master_bracket()
    elim = score_scraper.get_elim()
    users = User.query.all()
    scoring.convert_brackets_to_id(inv_map, users, master)
    end_rounds = scoring.get_end_rounds(users)
    score = scoring.score(master, users)
    order = scoring.order(score)
    rank = scoring.rank(order, score)
    potential = scoring.potential(elim, users, master)
    game_scores = score_scraper.get_scoreticker_json()
    seo = score_scraper.convert_short_to_seo()
    return render_template('index.html',
                           users=users,
                           score=score,
                           potential=potential,
                           end_rounds=end_rounds,
                           elim=elim,
                           order=order,
                           rank=rank,
                           User=User,
                           master=master,
                           matches=game_scores,
                           seo=seo)
Beispiel #9
0
def entries():
    users = User.query.all()
    users.sort(key=lambda x: x.firstname, reverse=False)
    master = score_scraper.get_master_bracket()
    score = scoring.score(master, users)
    order = scoring.order(score)
    rank = scoring.rank(order, score)
    bracket_teams = score_scraper.get_bracket_teams()
    if current_user.is_authenticated:
        user_id = request.args.get('id', default=current_user.id, type=int)
    else:
        user_id = request.args.get('id', default=1, type=int)
    elim = score_scraper.get_elim()
    game_scores = score_scraper.get_scoreticker_json()
    if user_id == -1 or User.query.get(user_id).round1 is None:
        display = []
    else:
        display = User.query.get(user_id).round1.replace('"', '').replace(
            '[', '').replace(']', '').split(',')
    return render_template('entries.html',
                           users=users,
                           display=display,
                           master=master,
                           elim=elim,
                           user_id=user_id,
                           User=User,
                           order=order,
                           Users=users,
                           rank=rank,
                           matches=game_scores,
                           teams=bracket_teams)
    def classbyUPGMASIZE(self,obstimes,trainingtimes,obsnodes):
        self._classorder=["C1"]
        self._classscore["C1"] = score()

        for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes
        	link = frozenset([u,v])
        	self._classUnion.addPair(link)
        	if link not in obstimes:
        		self._classscore["C1"].addPair(link)
        learningperiods=dict()
        for link in obstimes:
            if link in trainingtimes:
                learningperiods[link] = obstimes[link]+trainingtimes[link]
            else:
                learningperiods[link] = obstimes[link]
        Y,self._label=classes.Makedistmatx(learningperiods,self._VandPparameter,0,0)
        self._linkage=hierarchy.average(Y)


        n=len(self._label)

        # print(self._label)
        q=[]
        q.append(self._linkage[-1,0])
        q.append(self._linkage[-1,1])
        q.sort()
        for i in range(self._nbcluster-2):
            X=q.pop( q.index(max(q,key=lambda x: self._linkage[int(x-n),3])))
            q.append(self._linkage[int(X-n),0])
            q.append(self._linkage[int(X-n),1])
            q.sort()

        q.sort(key=lambda x: self._linkage[int(x-n),3],reverse=True)
        self._cuttree=   [-1] * n
        for i in range(self._nbcluster):
            self.getLeafID(q[i],i)

        # cutree = hierarchy.cut_tree(self._linkage,self._nbcluster)
        # print(cutree)

        for i in range(self._nbcluster):
            self._classscore["C"+str(i+2)] = score()
            self._classorder.append("C"+str(i+2))

        for i in range(len(self._label)):
            u=self._label[i]
            self._classscore["C"+str(self._cuttree[i]+2)].addPair(u)
Beispiel #11
0
def ensemble(predicts, deltas_batch, stops_batch):
    predictions = np.array([p(deltas_batch, stops_batch) for p in predicts])
    scores = np.array([[score(deltas_batch[j], predictions[i][j], stops_batch[j]) for j in range(len(predictions[i]))] for i in range(len(predicts))])
    max_idxs = np.argmax(scores, axis=0)
    best = []
    for i,best_idx in enumerate(max_idxs):
        best.append(predictions[best_idx][i])
    return np.array(best)
Beispiel #12
0
def test_score_block():
    block = np.array([
        [0, 0, 0, 0],
        [0, 1, 1, 0],
        [0, 1, 1, 0],
        [0, 0, 0, 0],
    ])
    block2 = np.array([
        [1, 1, 0, 0],
        [1, 1, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
    ])
    assert 1.0 == score(1, block, block)
    assert 1.0 == score(10, block, block)
    assert 10 / 16 == score(1, block2, block)
    assert 10 / 16 == score(10, block2, block)
Beispiel #13
0
    def eval(predict):
        multi_step_errors = []
        one_step_errors = []
        for batch in tqdm(grouper(bitmap.generate_test_set(set_size=args.test_size, seed=args.test_seed), 100)):
            deltas, stops = zip(*batch)

            delta_batch = np.array(deltas)
            stop_batch = np.array(stops)
            start_batch = predict(deltas, stops)

            for delta, start, stop in zip(delta_batch, start_batch, stop_batch):
                multi_step_errors.append(1 - score(delta, start, stop))

            one_deltas = np.ones_like(delta_batch)
            one_step_start = np.where(deltas == 1, start_batch, predict(one_deltas, stops))
            for delta, start, stop in zip(one_deltas, one_step_start, stop_batch):
                one_step_errors.append(1 - score(delta, start, stop))
        return np.mean(multi_step_errors), np.var(multi_step_errors), np.mean(one_step_errors), np.var(one_step_errors)
Beispiel #14
0
def test_step_back_all_3x3(alg_class):
    alg = alg_class(tile_graph)

    for Aorig in tqdm(generate_all(3, 3)):
        X = np.copy(Aorig)
        X = life_step(X)
        A = alg.step_back(X)

        sc = score(1, A, X)
        assert sc == 1.0
 def __init__(self):
     self._classorder = list()
     self._classscore = dict()
     self._classUnion = score()
     self._threshold= None
     self._linkage = []
     self._label=[]
     self._VandPparameter= None
     self._nbcluster =None
     self._cuttree =None
def getStartingWord(modDict, star, board, boardW, rack, dictionary):
    boardWidth = boardW
    star = star
    if board[star] != (".", "."):
        return None
    possible = getAnagrams(rack, dictionary)
    overallHighestScore = 0
    play = list()
    for word, used in possible.items():
        highestScore = 0
        positions = list()
        usedlen = len(used)
        for i in range(usedlen):
            x0 = star[0] + 1 - usedlen + i
            xe = x0 + usedlen - 1
            y = star[1]
            if x0 < 1:
                continue
            if xe > boardWidth:
                break
            posscore = score(used, (x0, y), 0, board, modDict)[0]
            if posscore == highestScore:
                positions.append((x0, y))
            elif posscore > highestScore:
                highestScore = posscore
                positions = [(x0, y)]
        if highestScore == overallHighestScore:
            for position in positions:
                play.append(
                    (
                        overallHighestScore,
                        word,
                        used,
                        position,
                        "horizontal",
                        used,
                    )
                )
        if highestScore > overallHighestScore:
            overallHighestScore = highestScore
            play = list()
            for position in positions:
                play = [
                    (
                        overallHighestScore,
                        word,
                        used,
                        position,
                        "horizontal",
                        used,
                    )
                ]

    return play
    def classbythreshold(self,obstimes,obsnodes):

        #Class Making
        #3 Class : C1 = New link C2 = less than classthreshold link C3 more than classthreshold
        self._classorder = ["C1","C2","C3"]

        self._classscore["C1"] = score()
        self._classscore["C2"] = score()
        self._classscore["C3"] = score()

        # print(self._classthreshold)
        for u,v in itertools.combinations(obsnodes.keys(),2): #to predict new pair of nodes and initialize the classes
        	link = frozenset([u,v])
        	self._classUnion.addPair(link)
        	if link not in obstimes:
        		self._classscore["C1"].addPair(link)
        	elif len(obstimes[link]) < self._classthreshold:
        		self._classscore["C2"].addPair(link)
        	else:
        		self._classscore["C3"].addPair(link)
Beispiel #18
0
 def __init__(self, filename, scoring_type, test_size=0.25, target=''):
     self.X = pd.read_csv(filename).drop(target, axis=1)
     self.Y = pd.read_csv(filename)[target]
     self.test_size = test_size
     self.target = target
     self.scoring = scoring.score(scoring_type)
     self.scores_dic = {}
     self.models_files = {}
     self.top_model = ''
     self.x_train, self.x_test, self.y_train, self.y_test = \
         train_test_split(self.X, self.Y, test_size=self.test_size)
     self.scaler = StandardScaler()
Beispiel #19
0
def subject_regularize(rfcs,
                       X_int,
                       X_other,
                       Y,
                       oob=False,
                       regularize=[0.75, 0.3, 0.65]):
    if len(regularize) == 1:
        regularize = regularize * 3
    observed_ = []
    predicted_ = []
    for subject in range(1, 50):
        observed = Y['subject'][subject]
        rfc = rfcs[1][subject]
        if oob:
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X_other)
            predicted_int = rfc.predict(X_int)
            predicted[:, 0] = predicted_int[:, 0]
        observed_.append(observed)
        predicted_.append(predicted)
    predicted = np.dstack(predicted_)
    observed = np.ma.dstack(observed_)
    predicted_mean = np.mean(predicted, axis=2, keepdims=True)
    predicted_std = np.std(predicted, axis=2, keepdims=True)
    predicted_mean_std = np.hstack((predicted_mean, predicted_std)).squeeze()
    predicted_int = regularize[0] * (predicted_mean) + (
        1 - regularize[0]) * predicted
    predicted_ple = regularize[1] * (predicted_mean) + (
        1 - regularize[1]) * predicted
    predicted_dec = regularize[2] * (predicted_mean) + (
        1 - regularize[2]) * predicted
    predicted = regularize[0] * (predicted_mean) + (1 -
                                                    regularize[0]) * predicted
    r_int = scoring.r('int', predicted_int, observed)
    r_ple = scoring.r('ple', predicted_ple, observed)
    r_dec = scoring.r('dec', predicted_dec, observed)
    score1_ = scoring.score(predicted, observed, n_subjects=49)
    score1 = scoring.rs2score(r_int, r_ple, r_dec)
    #print(score1_,score1)
    print("For subchallenge %d, score = %.3f (%.3f,%.3f,%.3f)" %
          (1, score1, r_int, r_ple, r_dec))
    score2 = scoring.score2(predicted_mean_std, Y['mean_std'])
    r_int_mean = scoring.r2('int', 'mean', predicted_mean_std, Y['mean_std'])
    r_ple_mean = scoring.r2('ple', 'mean', predicted_mean_std, Y['mean_std'])
    r_dec_mean = scoring.r2('dec', 'mean', predicted_mean_std, Y['mean_std'])
    r_int_sigma = scoring.r2('int', 'sigma', predicted_mean_std, Y['mean_std'])
    r_ple_sigma = scoring.r2('ple', 'sigma', predicted_mean_std, Y['mean_std'])
    r_dec_sigma = scoring.r2('dec', 'sigma', predicted_mean_std, Y['mean_std'])
    print("For subchallenge %d, score = %.2f (%.2f,%.2f,%.2f,%.2f,%.2f,%.2f)" % \
         (2,score2,r_int_mean,r_ple_mean,r_dec_mean,r_int_sigma,r_ple_sigma,r_dec_sigma))
    return (r_int, r_ple, r_dec, r_int_mean, r_ple_mean, r_dec_mean,
            r_int_sigma, r_ple_sigma, r_dec_sigma)
Beispiel #20
0
def count_shuffle(rides: list,
                  car_number: int,
                  final_time: int,
                  bonus: int,
                  iterations: int = 100) -> tuple:
    best_score = 0
    best_result = None
    for i in range(iterations):
        car_assignments = chunk_shuffle(len(rides), car_number)
        local_score = score(car_assignments, rides, final_time, bonus)
        if local_score > best_score:
            best_result = car_assignments
            best_score = local_score
    return best_score, best_result
Beispiel #21
0
def main(input, output, truth=None):
    # train HMM
    training_fname = "WSJ_POS_CORPUS_FOR_STUDENTS/WSJ_02-21.pos"
    lines, tag_idx, word_idx = read_data(training_fname)
    start_prob, A = get_transition_probs(lines, tag_idx)
    B = get_emission_probs(lines, tag_idx, word_idx)

    # open file containing input sequence
    with open(input, 'r') as f:
        lines = [line.strip() for line in f]

    # split the list into lists of sentences
    sentences = [list(v) for k, v in itertools.groupby(lines, key=bool) if k]

    # clear out output file
    open(output, 'w').close()

    # generate predictions for all sentences
    for i, sentence in enumerate(sentences):
        best_path, bestpath_prob = run_viterbi(sentence, tag_idx, word_idx, start_prob, A, B)
        print_prediction(sentence, tag_idx, best_path, output)

    if truth:
        score(truth, output) # prints out accuracy of output predictions compared to truth
Beispiel #22
0
def implies(hyp, results):

    scoredresults = []

    for text in results:
        pair = (text, score(text, hyp))
        scoredresults.append(pair)

    sortedresults = sorted(scoredresults, key=lambda h: h[1], reverse=True)

    try:
        text = sortedresults[1][0]
    except IndexError:
        text = hyp

    return text
def implies(hyp, results):
    
    scoredresults = []
    
    for text in results:
        pair = (text, score(text, hyp))
        scoredresults.append(pair)
    
    sortedresults = sorted(scoredresults, key=lambda h:h[1], reverse=True)

    try:
        text = sortedresults[1][0]
    except IndexError:
        text = hyp

    return text
Beispiel #24
0
def main():
    top = Tk()

    top.title("Scrabble Score")

    entry = Entry(top, bd=5)
    entry.pack()

    var = StringVar()
    message = Message(top, textvariable=var)
    button = Button(top,
                    text="score",
                    command=lambda: var.set(score(entry.get())))

    message.pack()
    button.pack()
    top.mainloop()
Beispiel #25
0
def search(request, SearchTerm_id):
    #need to tidy up variable names
    api = auth.GetTweepyAPI()
    term = get_object_or_404(SearchTerm, id=SearchTerm_id)
    
    recent = api.search(q=term.phrase,lang=term.lang,rpp=term.pagesize)
    
    #if, for whatever reason, I wanted to return yet more tweets
    '''
    i = 2
    while i < 3:
        recent.extend(api.search(q=term.phrase,lang=term.lang,rpp=term.pagesize,page=i))
        i += 1  
    '''
    #print len(recent)    
    r = scoring.score(recent, SearchTerm_id)
    template = 'smj_app/results.html'
    return render_to_response( template , {'results': r.values(), 'term': term.phrase}, context_instance = RequestContext( request ))
Beispiel #26
0
    def scoreAndWriteResult(self):
        scores = {}
        for d in self.labels:
            front_length = int(self.values[d].__len__() * self.proportion)
            latter_length = self.values[d].__len__() - front_length
            (TP, TN, FP, FN) = scoring.score(self.scoring_name, self.delay,
                                             self.labels[d],
                                             self.anomaly_results[d],
                                             front_length, latter_length)
            try:
                Precision = (TP + 0.0) / (TP + FP + 0.0)
                Recall = (TP + 0.0) / (TP + FN + 0.0)
                FScore = 2 * Precision * Recall / (Precision + Recall)
            except ZeroDivisionError:
                Precision, Recall, FScore = 0.0, 0.0, 0.0

            scores[d] = (TP, TN, FP, FN, Precision, Recall, FScore)

        json_path = './results/' + self.algorithm + '/score_' + self.scoring_name + '.json'
        with open(json_path, 'w') as f:
            json.dump(scores, f, indent=4)
Beispiel #27
0
def train_loop(model_name, learner, early_stop_window=100, rseed=9342184):
    errors = []
    latencies = []
    best_mean_err = 1.0
    best_i = -1
    for i, (delta, start, stop) in enumerate(generate_inf_cases(True, rseed)):
        tic = time.perf_counter()
        A = learner.predict(delta, stop)
        toc = time.perf_counter()

        err = 1 - score(delta, A, stop)
        errors.append(err)

        latency = toc - tic
        latencies.append(latency)

        mean_err = np.mean(errors)
        mean_latency = np.mean(latencies)

        print(
            f'Error: mean {mean_err}, cur {err}; latency: mean {mean_latency:0.4f}s, cur {latency:0.4f}; delta {delta}, density: {np.mean(stop)}'
        )

        if mean_err < best_mean_err:
            best_mean_err = mean_err
            best_i = i
            file_path = f'{model_name}_{i:05}'
            print(f'    Best model - saving {file_path}...')
            learner.save_model(file_path)
        elif i - best_i > early_stop_window:
            print(
                f"Haven't found a better model for more than {early_stop_window} iterations - terminating early."
            )
            print(f"Best iteration: {best_i}, mean error: {best_mean_err}")
            break

        learner.train(delta, start, stop)
Beispiel #28
0
def rfc_final(X,
              Y,
              max_features,
              min_samples_leaf,
              max_depth,
              et,
              Y_test=None,
              regularize=[0.7, 0.7, 0.7],
              n_estimators=100,
              seed=0):

    if Y_test is None:
        Y_test = Y

    def rfc_maker(n_estimators=n_estimators,
                  max_features=max_features,
                  min_samples_leaf=min_samples_leaf,
                  max_depth=max_depth,
                  et=False):
        if not et:
            return RandomForestRegressor(n_estimators=n_estimators,
                                         max_features=max_features,
                                         min_samples_leaf=min_samples_leaf,
                                         max_depth=max_depth,
                                         oob_score=True,
                                         n_jobs=-1,
                                         random_state=seed)
        else:
            return ExtraTreesRegressor(n_estimators=n_estimators,
                                       max_features=max_features,
                                       min_samples_leaf=min_samples_leaf,
                                       max_depth=max_depth,
                                       n_jobs=-1,
                                       random_state=seed)

    kinds = ['int', 'ple', 'dec']
    rfcs = {}
    for kind in kinds:
        rfcs[kind] = {}
        for subject in range(1, 50):
            rfcs[kind][subject] = rfc_maker(
                n_estimators=n_estimators,
                max_features=max_features[kind],
                min_samples_leaf=min_samples_leaf[kind],
                max_depth=max_depth[kind],
                et=et[kind])

    for subject in range(1, 50):
        for kind in kinds:
            rfcs[kind][subject].fit(X, Y[subject])

    predictions = {}
    for kind in kinds:
        predictions[kind] = {}
        for subject in range(1, 50):
            if et[kind]:
                # Check in-sample fit because there isn't any alternative.
                predictions[kind][subject] = rfcs[kind][subject].predict(X)
            else:
                predictions[kind][subject] = rfcs[kind][
                    subject].oob_prediction_

    predicted = predictions['int'].copy()
    for subject in range(1, 50):
        predicted[subject][:, 0] = predictions['int'][subject][:, 0]
        predicted[subject][:, 1] = predictions['ple'][subject][:, 1]
        predicted[subject][:, 2:] = predictions['dec'][subject][:, 2:]

    # Regularize:
    predicted_stack = np.zeros(
        (predicted[1].shape[0], predicted[1].shape[1], 49))
    for subject in range(1, 50):
        predicted_stack[:, :, subject - 1] = predicted[subject]
    predicted_mean = predicted_stack.mean(axis=2, keepdims=True)
    predicted_reg = {kind: predicted.copy() for kind in kinds}
    for i, kind in enumerate(kinds):
        predicted_reg[kind] = regularize[i] * predicted_mean + (
            1 - regularize[i]) * predicted_stack
    predicted_stack[:, 0, :] = predicted_reg['int'][:, 0, :]
    predicted_stack[:, 1, :] = predicted_reg['ple'][:, 1, :]
    predicted_stack[:, 2:, :] = predicted_reg['dec'][:, 2:, :]
    predicted = predicted_stack

    observed = predicted.copy()
    for subject in range(1, 50):
        observed[:, :, subject - 1] = Y_test[subject]
    score = scoring.score(predicted, observed)
    rs = {}
    predictions = {}
    for kind in ['int', 'ple', 'dec']:
        rs[kind] = scoring.r(kind, predicted, observed)

    print("For subchallenge 1:")
    print("\tScore = %.2f" % score)
    for kind in kinds:
        print("\t%s = %.3f" % (kind, rs[kind]))

    return (rfcs, score, rs)
Beispiel #29
0
def rfc_cv(X,
           Y,
           n_splits=5,
           n_estimators=15,
           max_features=1000,
           min_samples_leaf=1,
           max_depth=None,
           regularize=[0.7, 0.35, 0.7]):
    test_size = 0.2
    n_molecules = X.shape[0]
    shuffle_split = ShuffleSplit(n_molecules, n_splits, test_size=test_size)
    test_size *= n_molecules
    rfcs = {}
    n_subjects = 49
    for subject in range(1, n_subjects + 1):
        rfc = RandomForestRegressor(n_estimators=n_estimators,
                                    max_features=max_features,
                                    min_samples_leaf=min_samples_leaf,
                                    max_depth=max_depth,
                                    oob_score=False,
                                    n_jobs=-1,
                                    random_state=0)
        rfcs[subject] = rfc
    rs = {'int': [], 'ple': [], 'dec': []}
    scores = []
    for train_index, test_index in shuffle_split:
        predicted_list = []
        observed_list = []
        for subject in range(1, n_subjects + 1):
            rfc = rfcs[subject]
            X_train = X[train_index]
            Y_train = Y[subject][train_index]
            rfc.fit(X_train, Y_train)
            X_test = X[test_index]
            predicted = rfc.predict(X_test)
            observed = Y[subject][test_index]
            predicted_list.append(predicted)
            observed_list.append(observed)
        observed = np.ma.dstack(observed_list)
        predicted = np.dstack(predicted_list)
        predicted_mean = predicted.mean(axis=2, keepdims=True)
        predicted_int = regularize[0] * (predicted_mean) + (
            1 - regularize[0]) * predicted
        predicted_ple = regularize[1] * (predicted_mean) + (
            1 - regularize[1]) * predicted
        predicted = regularize[2] * (predicted_mean) + (
            1 - regularize[2]) * predicted
        predicted[:, 0, :] = predicted_int[:, 0, :]
        predicted[:, 1, :] = predicted_ple[:, 1, :]
        score = scoring.score(predicted, observed)
        scores.append(score)
        for kind in ['int', 'ple', 'dec']:
            rs[kind].append(scoring.r(kind, predicted, observed))
    for kind in ['int', 'ple', 'dec']:
        rs[kind] = {
            'mean': np.mean(rs[kind]),
            'sem': np.std(rs[kind]) / np.sqrt(n_splits)
        }
    scores = {
        'mean': np.mean(scores),
        'sem': np.std(scores) / np.sqrt(n_splits)
    }
    print(
        "For subchallenge 1, using cross-validation with at least %d samples_per_leaf:"
        % min_samples_leaf)
    print("\tscore = %.2f+/- %.2f" % (scores['mean'], scores['sem']))
    for kind in ['int', 'ple', 'dec']:
        print("\t%s = %.2f+/- %.2f" %
              (kind, rs[kind]['mean'], rs[kind]['sem']))

    return scores, rs
Beispiel #30
0
def genReport():
        scoring.score()
        report.gen()
Beispiel #31
0
def rfc_(X_train,
         Y_train,
         X_test_int,
         X_test_other,
         Y_test,
         max_features=1500,
         n_estimators=1000,
         max_depth=None,
         min_samples_leaf=1):
    print(max_features)

    def rfc_maker():
        return RandomForestRegressor(max_features=max_features,
                                     n_estimators=n_estimators,
                                     max_depth=max_depth,
                                     min_samples_leaf=min_samples_leaf,
                                     n_jobs=-1,
                                     oob_score=True,
                                     random_state=0)

    n_subjects = 49
    predicted_train = []
    observed_train = []
    predicted_test = []
    observed_test = []
    rfcs = {subject: rfc_maker() for subject in range(1, n_subjects + 1)}
    for subject in range(1, n_subjects + 1):
        print(subject)
        observed = Y_train[subject]
        rfc = rfcs[subject]
        rfc.fit(X_train, observed)
        #predicted = rfc.predict(X_train)
        predicted = rfc.oob_prediction_
        observed_train.append(observed)
        predicted_train.append(predicted)

        observed = Y_test[subject]
        rfc = rfcs[subject]
        if Y_train is Y_test:  # OOB prediction
            predicted = rfc.oob_prediction_
        else:
            predicted = rfc.predict(X_test_other)
            predicted_int = rfc.predict(X_test_int)
            predicted[:, 0] = predicted_int[:, 0]
        observed_test.append(observed)
        predicted_test.append(predicted)
    scores = {}
    for phase, predicted_, observed_ in [
        ('train', predicted_train, observed_train),
        ('test', predicted_test, observed_test)
    ]:
        predicted = np.dstack(predicted_)
        observed = np.ma.dstack(observed_)
        predicted_mean = np.mean(predicted, axis=2, keepdims=True)
        regularize = 0.7
        predicted = regularize * (predicted_mean) + (1 -
                                                     regularize) * predicted
        score = scoring.score(predicted, observed, n_subjects=n_subjects)
        r_int = scoring.r('int', predicted, observed)
        r_ple = scoring.r('ple', predicted, observed)
        r_dec = scoring.r('dec', predicted, observed)
        print("For subchallenge 1, %s phase, score = %.2f (%.2f,%.2f,%.2f)" %
              (phase, score, r_int, r_ple, r_dec))
        scores[phase] = score
    return rfcs, scores['train'], scores['test']
Beispiel #32
0
def main():	
	parser = argparse.ArgumentParser(description="Run QA-CLEF-System")
	parser.add_argument('--preprocess',action="store_true")
	parser.add_argument('--train',action="store_true")
	parser.add_argument('--answeronly',action='store_true')
	parser.add_argument('--selftest',action='store_true')
	parser.add_argument('--data',nargs = '+',default=[2011],type=int)
	parser.add_argument('--test',nargs = '+',default=[2012],type=int)
	parser.add_argument('--forcedownload',action='store_true')
	parser.add_argument('--preprocessonly',action='store_true')
	parser.add_argument('--ngram', type=int, default=3)
	parser.add_argument('--threshold', type=float, default=0.5)
	parser.add_argument('--report',action='store_true')
	args = parser.parse_args()
	process_args(args)

	data = []
	for edition in args.data + args.test:
		_data = qacache.find_data(edition)

		if args.preprocess or _data is None:
			input_check([edition],args.forcedownload)

			_data = input_parse([edition])

			print >> sys.stderr, 'preprocessing ' + str(edition) + '-data'
			_data = preprocessing.preprocess(_data)

			qacache.store_preprocessed_data(edition,_data[0])
		else:
			print >> sys.stderr, str(edition) + '-data is found on cache/' + str(edition) + '-prerocessed.txt'
		data.append(_data)

	if args.preprocessonly:
		print >> sys.stderr, 'Preprocess-only task is done.'
		sys.exit(0)

	# build-model
	print >> sys.stderr, 'Building model...'
	training_model = model_builder.build_model(data[:len(args.data)])
	test_model = model_builder.build_model(data[-len(args.test):]) if len(args.test) != 0 and not args.selftest else []

	# scoring
	print >> sys.stderr, 'Unweighted Feature Scoring...'
	training_model and scoring.score(training_model)
	test_model and scoring.score(test_model)

	# training
	weight = qacache.stored_weight()
	if args.train or weight is None:
		print >> sys.stderr, 'Training...'
		weight = train(training_model)
	else:
		print >> sys.stderr, 'Weight is found on cache/weight.txt'

	# weighted_scoring
	print >> sys.stderr, 'Weighted Feature Scoring...'
	final = scoring.weighted_scoring(training_model if args.selftest else test_model, weight)

	# answer selection
	select_answer(final,args.threshold)

	# evaluation
	result = evaluate(final)

	qacache.write_json(final,'final.txt',indent=True)

	if args.report:
		report(final, args.test if not args.selftest else args.data,weight)

	print "Result: %f" % result
Beispiel #33
0

###     Pre process data ##
target = 'HPYLORI'
data1 = dp.modify_data(data,[],data.columns,target)
print(data1)

stats.gen_stats(data1, target)
n_features = data1.shape[1]-1

# # #################### RUNNING WITHOUT BOOSTING AND BAGGING for all ranking feature selections and CFS###############
n_seed = 2
splits =2
runs = stats.runSKFold(n_seed,splits,data=data1)
# score.score(rsr.normal_run( n_seed, splits, ['infogain_10'], ['elasticnet'], runs, n_features),n_seed,splits)
score.score(sfs_r.subset_run(n_seed, splits,['elasticnet'],['f1'],runs,n_features),n_seed,splits)
# sfs_r.subset_features(n_seed,splits, ['knn'],['accuracy'],runs, n_features)
# score.score(bbr.boostbag_run(n_seed,splits,['infogain_10'],['elasticnet'],runs,'boost',n_features), n_seed,splits)
    
# # score.score(nr.normal_run(data1,n_seed=1,splits=2,methods=['cfs_0'],estimators=['knn']),1,2)
# num = data1.shape[1]
# score.score(parallel.normal_run( 1, 10, ['fcbf_0'], ['naive_bayes'], runs),1,10)

# parallel.normal_run( 1, 10, ['mrmr_0'], ['naive_bayes'], runs)
#, 'infogain_20','reliefF_10','reliefF_20','infogain_'+str(num),'cfs_0'
# # , 'svm','naive_bayes','knn','xgboost'
# # , 'infogain_20', 'reliefF_10','reliefF_20','infogain_'+str(num)
# score.score(parallel.boostbag_run( 2,3, ['infogain_10'], ['elasticnet'], runs,'bag'),2,3)
# parallel.subset_run(5,10,['elasticnet'],['f1'],runs)
# parallel.subset_run(1,2,['knn'],['f1'],runs)
Beispiel #34
0
import scoring
import random
import sys

if len(sys.argv) != 2:
    raise Exception("Script needs the test set name")

test_set = sys.argv[1]

contributors, c2S, projects, p2D, p2W, p2B, p2S = io.read_input_file(
    "input/{}.txt".format(test_set))
p_order, p2C_order = io.read_output_init_file("output/{}.out".format(test_set),
                                              projects, contributors)

P = len(p_order)
score = scoring.score(contributors, c2S, projects, p2D, p2W, p2B, p2S, p_order,
                      p2C_order)

save_step = 100000
last_save = score
nb_save = 0

max_nb_test = 1000000
nb_test = 0

print(score)
while nb_test < max_nb_test:
    if score - last_save > save_step:
        print("Saving")
        io.write_output("output/{}-optim{}.out".format(test_set, nb_save),
                        projects, contributors, p_order, p2C_order)
        nb_save += 1
def main():
    args = cli.parse_args()

    if not args.no_det:
        np.random.seed(args.env_seed)
        random.seed(args.env_seed)
        torch.manual_seed(args.env_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

    ### Logger setup ###
    logger = Logger(args.fileloc, args.load_loc)
    logger.init_config(args)
    logger.load_results()

    ### If we're making a visualisation, branch here ###
    if args.task == 'vis':
        if args.ranking not in logger.data['scores'][0]:
            print("\nNeed to compute ranking {} before doing visualisation!".
                  format(args.ranking))
            exit()
        run_and_save(logger)
        exit()

    ### Get or update all results ###
    counts_do = {
        'redo': args.redo_all or not logger.is_done('counts'),
        'update': args.more_count is not None
    }
    scores_do = {
        'redo':
        counts_do['redo'] or counts_do['update']
        or not logger.is_done('scores'),
        'update':
        args.more_scoretypes is not None
    }
    interpol_do = {
        'redo':
        scores_do['redo'] or not logger.is_done('interpol')
        or args.redo_interpol is not None,
        'update':
        scores_do['update']
    }

    ### Counts
    if counts_do['redo']:
        print("\n----- Counting -----\n")
        counts = count(logger)
        logger.update_counts(counts)

    if counts_do['update']:
        N = args.more_count
        print("\n----- Additional Counts ({} more runs) -----\n".format(N))

        # If we're adding more counts without having run before, then we need to reset the
        # env or we would be revisiting the same states because of the seed.
        if not counts_do['redo']:
            for _ in range(logger.config['n_runs']):
                logger.config['env'].reset()

        counts = count(logger, n_runs=N)
        logger.update_counts(counts, addn=N)

    if counts_do['redo'] or counts_do['update']:
        logger.dump_results()
        logger.dump_config()

    ### Scores
    if scores_do['redo']:
        print("\n----- Scoring -----\n")
        scores = score(logger)
        logger.update_scores(scores)

    if scores_do['update']:
        already_done = [
            st for st in args.more_scoretypes
            if st in logger.config['score_types']
        ]
        if len(already_done) != 0:
            raise Exception(
                "Scoretypes", ",".join(already_done),
                "already done! Remove them from --more_scoretypes")
        print("\n----- Additional Scores ({}) -----\n".format(
            args.more_scoretypes))
        scores = score(logger, score_types=args.more_scoretypes)
        logger.update_scores(scores)

    if scores_do['redo'] or scores_do['update']:
        logger.dump_results()
        logger.dump_config()

    ### Interpolation
    if interpol_do['redo']:
        print("\n----- Interpolating -----\n")
        if args.redo_interpol is not None:
            i, t = args.redo_interpol
            logger.config['n_inc'] = i if i >= 0 else logger.config['n_inc']
            logger.config['n_test'] = t if t >= 0 else logger.config['n_test']
        elif logger.config['n_inc'] == -1:
            logger.config['n_inc'] = int(
                logger.data['logs'][0]['counting_abs_states'] / 10)
        interpol = interpolate(logger)
        logger.update_interpolation(interpol)

    if interpol_do['update']:
        print("\n----- Additional Interpolations ({}) -----\n".format(
            args.more_scoretypes))
        interpol = interpolate(logger, score_types=args.more_scoretypes)
        logger.update_interpolation(interpol)

    if interpol_do['redo'] or interpol_do['update']:
        logger.dump_results()
        logger.dump_config()

    ### Display results ###
    draw_interpol_results(logger,
                          logger.config['score_types'],
                          0, [1],
                          x_fracs=True,
                          y_fracs=True,
                          smooth=False,
                          x_name='States Restored (%)',
                          y_names=['Original Reward (%)'],
                          combine_sbfl=True)
    draw_interpol_results(logger,
                          logger.config['score_types'],
                          4, [1],
                          y_fracs=True,
                          trans_x=lambda x: 1 - x,
                          x_name="Policy's Action Taken (% of Steps)",
                          y_names=['Original Reward (%)'],
                          smooth=False,
                          combine_sbfl=True)