Beispiel #1
0
def prefloptable():
    # iterate through each hands
    prefloptable = {}
    counter = 0
    for currhand in totalhands:
        counter += 1
        # one loop of this will calculate EHS for 1 hand, 1325 to go
        wintally = 0
        totaltally = 0
        handsdone = []
        handsdone.append(currhand)
        prunedhands = pruner(currhand, totalhands, totalflops, totalturns,
                             totalrivers)[0]
        # hand to remove, keep track of hands that have been removed before
        for opphand in [x for x in prunedhands if x not in handsdone]:
            # list comprehension
            # need to then remove this hand
            handsdone.append(opphand)
            totaltally += 1
            myrank = rank.ranking(list(currhand), [])
            opprank = rank.ranking(list(opphand), [])
            # how to deal with ties? figure it out
            if myrank > opprank:
                wintally += 1
            else:
                pass
        # need to consider ties
        totalwinrate = (wintally / totaltally) * 100
        prefloptable[currhand] = totalwinrate

    # Store data (serialize)
    with open('prefloptable.pickle', 'wb') as handle:
        pickle.dump(prefloptable, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return prefloptable
    def get_rewards(self):
        player = self.get_active_player_index()
        opponent = 1 - player

        if self.history[-1] == 'f':
            return self.current_bets[opponent]

        player_rank = rank.ranking('player.hand', self.board)
        opponent_rank = rank.ranking('opponent.hand', self.board)
        if player_rank < opponent_rank:
            return self.current_bets[opponent]
        elif player_rank > opponent_rank:
            return -self.current_bets[player]
        else:
            return 0
Beispiel #3
0
 def onSearchBtnClick(self, event):
     if self.selectedHero is None:
         print("Heroi não selecionado")
         return
     features = [
         self.dataset.columns[i + 1]
         for i in range(self.ckboxList.GetCount())
         if self.ckboxList.IsChecked(i)
     ]
     if len(features) == 0:
         print("Features não selecionadas")
         return
     #index = self.list_ctrl.GetFirstSelected()
     self.list_ctrl.DeleteAllItems()
     #hero_name = self.dataset.copy().iloc[index]['hero_names']
     #features = ["Agility", "Accelerated Healing", "Lantern Power Ring", "Dimensional Awareness"]
     print("Buscando por " + "'" + str(self.selectedHero) + "'")
     print("Features: ")
     print(features)
     result = ranking(self.dataset.copy(),
                      features,
                      str(self.selectedHero),
                      metodo=self.combo.GetValue())
     result = result[0:10]
     result = result.iloc[:, ::-1]
     #print result
     self.list_ctrl.DeleteAllColumns()
     self.list_ctrl.InsertColumn(0, 'Score', width=80)
     self.list_ctrl.InsertColumn(1, 'Super-herói', width=80)
     self.__addItems(result[0:10])  #top 10
     self.heroForm.SetValue('')
Beispiel #4
0
def search(kwords_lst):
    conn = happybase.Connection(host = settings.HBASE_HOST,
                                port = settings.HBASE_PORT,
                                compat = '0.90')
    ksegs = []
    for kwords in kwords_lst:
        segs = jieba.cut(kwords, cut_all = True)
        unicode_segs = []
        for seg in segs:
            unicode_segs.append(seg.encode('utf-8'))

        ksegs += unicode_segs
    ksegs = set(ksegs)

    # 500 is not the correct parameter
    # should pass the number of html documents in table 'WebData'
    result_urls = ranking.ranking(conn, ksegs, 500)
    url_table = conn.table('WebData')
    results = []
    for url in result_urls:
        row = url_table.row(url)
        title = row['content:title']
        results.append([url, title])

    return results
Beispiel #5
0
def averageRankingSingleShot(descr_probe,
                             descr_gallery,
                             maxrank=50,
                             iterations=100):
    ranks = np.zeros(maxrank)
    for i in xrange(iterations):
        descr_probe_i = get_random_elements(descr_probe)
        descr_gallery_i = get_random_elements(descr_gallery)

        descrs_query = []
        query_labels = []

        for p in descr_probe_i.keys():
            query_labels.append(p)
            descrs_query.append(descr_probe_i[p])

        descrs_gallery = []
        gallery_labels = []

        for p in descr_gallery_i.keys():
            gallery_labels.append(p)
            descrs_gallery.append(descr_gallery_i[p])

        r = rank.ranking(descrs_query,
                         query_labels,
                         descrs_gallery,
                         gallery_labels,
                         maxrank=maxrank)
        ranks += r

    return ranks * 1. / iterations
Beispiel #6
0
 def test_post_ranking(self):
     form = ''
     handler = ranking()
     handler.request = Request({
         'REQUEST_METHOD': 'POST',
         'PATH_INFO': '/ranking',
         })
     handler.response = Response()
     handler.post()
Beispiel #7
0
def floptable():
    # iterate through each hands
    floptable = {}
    counter = 0
    for currhand in totalhands:
        counter += 1
        # one loop of this will calculate EHS for 1 hand, 1325 to go
        wintally = 0
        totaltally = 0
        handsdone = []
        handsdone.append(currhand)
        prunedhands = pruner(currhand, totalhands, totalflops, totalturns,
                             totalrivers)[0]
        # hand to remove, keep track of hands that have been removed before
        for opphand in [x for x in prunedhands if x not in handsdone]:
            # need to then remove this hand
            handsdone.append(opphand)
            # removes illegal hands, flops, turns, rivers
            current = pruner(currhand, totalhands, totalflops, totalturns,
                             totalrivers)
            final = pruner(opphand, current[0], current[1], current[2],
                           current[3])
            # checking every flop for each hand
            for flop in final[1]:
                totaltally += 1
                myrank = rank.ranking(list(currhand), list(flop))
                opprank = rank.ranking(list(opphand), list(flop))
                # how to deal with ties? figure it out
                if myrank > opprank:
                    wintally += 1
                else:
                    pass

        # need to consider ties
        totalwinrate = (wintally / totaltally) * 100
        floptable[currhand] = totalwinrate
        print(totalwinrate)
        print(counter)

    # Store data (serialize)
    with open('floptable.pickle', 'wb') as handle:
        pickle.dump(floptable, handle, protocol=pickle.HIGHEST_PROTOCOL)

    return floptable
Beispiel #8
0
def userdata_receive_cbandit(request, userid):
    global LoudBandit
    global ModeBandit
    global TempoBandit
    global CB_CKPTSTATE
    if request.method == 'GET':
        pulse = request.GET.get('heartrate')
        timevalue = (((datetime.datetime.now().hour) * 60) + datetime.datetime.now().minute)
        upc, created = UserPlayCounter.objects.get_or_create(userid=userid)
        # Only used since rating is a required value in our serializer
        rating = 1.0
        if (CB_CKPTSTATE != TempoBandit.get_checkpoint_state()):
            TempoBandit = CBandit.CBandit(CB_NUMBER_OF_STATES, CB_TEMPO_ACTIONS, CB_TEMPO_CKPT_PATH, cb_outputtempo)
            LoudBandit = CBandit.CBandit(CB_NUMBER_OF_STATES, CB_LOUD_ACTIONS, CB_LOUD_CKPT_PATH, cb_outputloud)
            ModeBandit = CBandit.CBandit(CB_NUMBER_OF_STATES, CB_MODE_ACTIONS, CB_MODE_CKPT_PATH, cb_outputmode)
            CB_CKPTSTATE = TempoBandit.get_checkpoint_state()
        if ((userid in cb_recommendation_cache) and cb_recommendation_cache.get(userid)):
            song = cb_recommendation_cache.get(userid).pop()
            # All songs that have been cached from one recommendation request will use the same ranking id
            rid = cb_rid_cache.get(userid)
        else:
            usernumber = upc.userindex
            bucketedpulse = Bucketizer.bucketize_pulse(int(pulse))
            bucketedtime = Bucketizer.bucketize_time(timevalue)
            state = usernumber*CB_NUMBER_OF_STATES + bucketedpulse*CB_TIME_BUCKETS + bucketedtime
            # We get all ranking ids here but they will all be the same (since they are updated at the same time)
            # Might change this just to get one since its all we need.
            temporid, tempo = TempoBandit.predict(state)
            moderid, mode = ModeBandit.predict(state)
            loudrid, loudness = LoudBandit.predict(state)
            # Cache new songs based on bandit sugggestions
            cb_recommendation_cache[userid] = ranking.ranking(Bucketizer.bucketize_tempo(tempo), Bucketizer.bucketize_loudness(loudness), mode, userid)
            #all rankingids should be identical so it doesnt matter which one we choose
            cb_rid_cache[userid] = loudrid
            rid = loudrid
            song = cb_recommendation_cache.get(userid).pop()
        sc, created = SongCounter.objects.get_or_create(userid=userid, songid=song)
        delta = upc.playCounter - sc.lastPlayed
        data = Userdata.create(userid, song, pulse, rating, delta)
        data.ratingid = rid
        serializer = UserdataSerializer(data)
        return JsonResponse(serializer.data, status=200)

    # Not used at the moment
    elif request.method == 'PUT':
        data = JSONParser().parse(request)
        serializer = UserdataSerializer(userdata, data=data)
        if serializer.is_valid():
            serializer.save()
            return JsonResponse(serializer.data)
        return JsonResponse(serializer.errors, status=400)

    # Not used at the moment
    elif request.method == 'DELETE':
        return HttpResponse(status=403)
Beispiel #9
0
    def test_post_ranking(self):

        handler = ranking()
        handler.request = Request({
            'REQUEST_METHOD': 'GET',
            'PATH_INFO': '/ranking',
            })
        handler.response = Response()
        handler.get()
        self.failUnless(len(handler.get()) <= 10)
        self.assertEqual('200 OK', response.status)
Beispiel #10
0
def userdata_receive_dnn(request, userid):
    global LoudDNN
    global ModeDNN
    global TempoDNN
    global DNN_CKPTSTATE
    if request.method == 'GET':
        timevalue = (((datetime.datetime.now().hour) * 60) + datetime.datetime.now().minute)
        pulse = request.GET.get('heartrate')
        upc, created = UserPlayCounter.objects.get_or_create(userid=userid)
        # Rating is set to 1 since we want a song with high rating
        rating = 1.0
        if (DNN_CKPTSTATE != TempoDNN.get_checkpoint_state()):
            LoudDNN = DNNModel.DNNModel(DNN_LOUD_CKPT_PATH, dnn_outputloud)
            ModeDNN = DNNModel.DNNModel(DNN_MODE_CKPT_PATH, dnn_outputmode)
            TempoDNN = DNNModel.DNNModel(DNN_TEMPO_CKPT_PATH, dnn_outputtempo)
            DNN_CKPTSTATE = TempoDNN.get_checkpoint_state()
        if ((userid in dnn_recommendation_cache) and dnn_recommendation_cache.get(userid)):
            song = dnn_recommendation_cache.get(userid).pop()
        else:
            data = {'user_id':[userid],'time':[timevalue],'heart_rate':[int(pulse)],'rating':[rating]}
            tempo = Bucketizer.bucketize_tempo(int(TempoDNN.get_predict_class_id(data_matrix=data)))
            mode = Bucketizer.bucketize_mode(int(ModeDNN.get_predict_class_id(data_matrix=data)))
            loudness = Bucketizer.bucketize_loudness(int(LoudDNN.get_predict_class_id(data_matrix=data)))
            # Cache new songs based on DNN sugggestions
            dnn_recommendation_cache[userid] = ranking.ranking(tempo, loudness, mode, userid)
            song = dnn_recommendation_cache.get(userid).pop()
        sc, created = SongCounter.objects.get_or_create(userid=userid, songid=song)
        delta = upc.playCounter - sc.lastPlayed
        data = Userdata.create(userid, song, pulse, rating, delta)
        serializer = UserdataSerializer(data)
        return JsonResponse(serializer.data, status=200)

    # Not used at the moment
    elif request.method == 'PUT':
        data = JSONParser().parse(request)
        serializer = UserdataSerializer(userdata, data=data)
        if serializer.is_valid():
            serializer.save()
            return JsonResponse(serializer.data)
        return JsonResponse(serializer.errors, status=400)

    # Not used at the moment
    elif request.method == 'DELETE':
        return HttpResponse(status=403)
Beispiel #11
0
    def process(self, data, election, user):

        if self.choice:
            c = choice.choice()
            c.ans = data["ans"]
            user.ballot.add_vote(c)
            self.choice = False

        if self.rank:
            r = ranking.ranking()
            r.rankings = data["ans"]
            user.ballot.add_vote(r)
            self.rank = False

        if self.writeIn:
            w = writeIn.writeIn()
            w.ans = data["ans"]
            user.ballot.add_vote(w)
            self.writeIn = False

        if self.voteOrWriteIn:
            self.voteOrWriteIn = False
            c = int(data["ans"])
            index = len(user.ballot.votes)
            if c == len(election.voteActions[index].options):
                self.writeIn = True
                write = {
                    "Instructions": "Please write in your choice: ",
                    "type": "char25",
                }
                instance.send(write)
                return election, user
            else:
                c = choice.choice()
                c.ans = data["ans"]
                user.ballot.add_vote(c)

        if len(user.ballot.votes) < len(election.voteActions):
            q = self.getVote(election, len(user.ballot.votes))
            instance.send(q)
        else:
            self.ballotComplete = True

        return election, user
Beispiel #12
0
def search(query1, i_i):
    logging.basicConfig(level=logging.DEBUG, format='%(filename)s %(levelname)s: %(asctime)s: %(message)s')
    logger = logging.getLogger('main')
    logger.info('Executing indexing module')
    logger.info('Reading file')
    # GIVEN QUERY FROM FRONT-END, FIND RELEVANT RESULTS
    query =  query1# user input
    print('input:',query)
    matcher = match()
    q = preprocessing().the_works(query)
    CR = i_i.lookup_query(q)
    CR = matcher.boolean(CR)
    # added in case not every token matches
    doctoken_matchnums =[len(i) for i in CR.values()]
    if len(doctoken_matchnums) == 0: return ''
    scaler = max(doctoken_matchnums)
    CR = matcher.scale(CR,scaler)

    # RANK RELEVANT RESULTS
    r_ranking = ranking()
    resources = list(CR.keys())
    max_freq = r_ranking.get_max_frequencies(index=CR) # , num_docs=len(i_i.storage.index)
    # Now save this into the persisted memory object within the index
    i_i.storage.max_frequency_terms_per_doc = max_freq
    res = r_ranking.relevance_ranking(query = query,
                               num_results=5,
                                index=i_i.index,
                                resources=resources,
                                max_freq=i_i.storage.max_frequency_terms_per_doc,
                                N=len(i_i.storage.index),
                                term_doc_matrix=i_i.doc_term_matrix_all)

    # GENERATE RANKED JSON_SNIPPETS FOR FRONT-END
    snipper = snip(r_ranking)
    json = snipper.get_snippets(res, resources=resources, query=query, i_i=i_i)

    # print('output:',json)
    return(json)
Beispiel #13
0
 def SortPlayers(self, break_ties = 0):
     self.player_rankings = []
     ranking_list = []
     for player_1 in self.player_handler_list:
             inserted_flag = 0
             for player_2, index in zip(ranking_list, range(len(ranking_list))):
                 if player_1.GetScore() > player_2.GetScore():
                     ranking_list.insert(index, player_1)
                     inserted_flag = 1
                     break
             if not inserted_flag:
                 ranking_list.append(player_1)
     
     for player, index in zip(ranking_list, range(len(ranking_list))):
         inserted_flag = 0
         for rank in self.player_rankings:
             if abs(rank.GetScore() - player.GetScore()) < .2:
                 rank.AddPlayer(player)
                 inserted_flag = 1
                 break
         if not inserted_flag:
             temp = ranking.ranking(index+1, [player])
             self.player_rankings.append(temp)
Beispiel #14
0
def rankUI(root, funcName, databaseHandle: mysql.connector.connect):
    def closeThisWindow():
        top.destroy()

    top = Toplevel(root)
    top.title("统计信息")
    titleFrame = Frame(top)
    titleLabel = Label(titleFrame, text="成 绩 排 名", font=('圆体-简', '30'))
    titleFrame.pack(side='top', fill=X, padx=10)
    titleLabel.pack(side='top', fill=X, padx=10)

    color = ("#ffffff", "#ececec")

    deptInfo = queryDept(databaseHandle)
    for dept in deptInfo:
        deptFrame = Frame(top)
        deptLabelFrame = Frame(deptFrame)
        deptLabel = Label(deptLabelFrame, text=dept[0], font=('圆体-简', '20'))
        leftLabel = Label(deptLabelFrame,
                          text="~  ~  ~  ~  ~  ~  ~  ~  ~  ~  ~  ",
                          font=('圆体-简', '20'))
        rightLabel = Label(deptLabelFrame,
                           text="~  ~  ~  ~  ~  ~  ~  ~  ~  ~  ~  ",
                           font=('圆体-简', '20'))
        deptFrame.pack(side='top', padx=10, pady=20)
        deptLabelFrame.pack(side='top', fill=X, padx=10)
        leftLabel.pack(side='left', padx=20, pady=20)
        deptLabel.pack(side='left', fill=X, padx=20)
        rightLabel.pack(side='left', padx=20, pady=20)
        infoTableFrame = Frame(deptFrame)
        infoTableFrame.pack(side='top', fill=X, padx=10)
        headFrame = Frame(infoTableFrame)
        headFrame.pack(side='top', padx=10)
        SnoHeadLabel = Label(headFrame, text='学号', width=10)
        SnoHeadLabel.pack(side='left', padx=10)
        SnameHeadLabel = Label(headFrame, text='姓名', width=10)
        SnameHeadLabel.pack(side='left', padx=10)
        avgGradeHeadLabel = Label(headFrame, text='平均分', width=10)
        avgGradeHeadLabel.pack(side='left', padx=10)
        rankHeadLabel = Label(headFrame, text='名次', width=10)
        rankHeadLabel.pack(side='left', padx=10)

        courseInfo = queryCourseByDept(databaseHandle, dept)
        for course in courseInfo:
            courseHeadLabel = Label(headFrame, text=course[0], width=10)
            courseHeadLabel.pack(side='left', padx=10)

        infoByDept = ranking(databaseHandle, dept)
        count = 1

        for course in infoByDept:
            infoRowFrame = Frame(deptFrame, bg=color[count % 2])
            infoRowFrame.pack(side='top', fill=X, padx=10)
            SnoLabel = Label(infoRowFrame,
                             text=course[0],
                             width=10,
                             bg=color[count % 2])
            SnoLabel.pack(side='left', padx=10)
            SnameLabel = Label(infoRowFrame,
                               text=course[1],
                               width=10,
                               bg=color[count % 2])
            SnameLabel.pack(side='left', padx=10)
            avgGradeLabel = Label(infoRowFrame,
                                  text=course[2],
                                  width=10,
                                  bg=color[count % 2])
            avgGradeLabel.pack(side='left', padx=10)
            rankLabel = Label(infoRowFrame,
                              text=count,
                              width=10,
                              bg=color[count % 2])
            rankLabel.pack(side='left', padx=10)
            for i in range(len(courseInfo)):
                scInfo = querySCBySnoCno(course[0], courseInfo[i][1],
                                         databaseHandle)
                print(scInfo)
                gradeLabel = Label(infoRowFrame, bg=color[count % 2], width=10)
                gradeLabel.pack(side='left', padx=10)
                try:
                    gradeLabel.config(text=scInfo[4])
                except:
                    gradeLabel.config(text='无成绩')
            count += 1
    exitButtonFrame = Frame(top)
    exitButton = Button(exitButtonFrame, text='关闭', command=closeThisWindow)
    exitButtonFrame.pack(side='top', fill=X, padx=10)
    exitButton.pack(side='top', fill=X, padx=10)
    top.mainloop()
Beispiel #15
0
def index():
    matches = db.session.query(Match).order_by(Match.created_asof.desc())
    rankings = ranking(matches)
    return render_template('index.html', matches=matches, rankings=rankings)
Beispiel #16
0
            print "\trecall macro:", recallmacro
            recallmicro = recall_score(y_test, y_pred, average='micro')
            print "\trecall micro:", recallmicro
            f1macro = f1_score(y_test, y_pred, average='macro')
            print "\tf1 macro:", f1macro
            f1micro = f1_score(y_test, y_pred, average='micro')
            print "\tf1 micro:", f1micro

            rank_options = [False]
            if test['method'] == 'dummy':
                rank_options = [True, False]

            for preshuffle in rank_options:
                df_with_ranking = rk.ranking(data_test,
                                             y_pred,
                                             y_prob,
                                             preshuffle=preshuffle,
                                             target=True)

                search_ids = df_with_ranking['srch_id']
                diff_search_ids = search_ids.drop_duplicates()

                k = 0
                ndcg_list = []

                for id in diff_search_ids:
                    mask = (df_with_ranking['srch_id'] == id)
                    result_df = df_with_ranking.loc[mask]
                    ndcg_result = ndcg.ndcg(result_df)
                    ndcg_list.append(ndcg_result)
Beispiel #17
0
import menu as menu
import jogo as jogo
import ranking as ranking
import tutorial as tutorial
import gameover as gameover
from PPlay.sound import *

janela = Window(1000, 600)
janela.set_title("Mimi")
game_state = 0
dificuldade = 1

tema = Sound("sons/jogo.ogg")
tema.set_volume(50)
tema.set_repeat(True)
tema.play()

while True:
    if game_state == 0:
        game_state = menu.menu(janela)
    if game_state == 1:
        game_state = jogo.jogo(janela)
    if game_state == 2:
        game_state = tutorial.tutorial(janela)
    if game_state == 3:
        game_state = ranking.ranking(janela)
    if game_state == 4:
        game_state = gameover.gameover(janela)

    janela.update()
select_cols = [
    'prop_starrating', 'prop_review_score', 'prop_location_score2',
    'price_usd', 'promotion_flag', 'no_bookings_prop', 'no_found_prop'
]
rank_options = [False]

slices_to_do = range(17, 25)

for i in slices_to_do:
    data_file = "data/test_set_added_variables_%i.csv" % (i)
    data_test_slice = dp.DataAggregator(data_file)
    data_test_slice.read_data()
    data_test_df = data_test_slice.df

    X_test = make_X(data_test_df, select_cols)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)

    for preshuffle in rank_options:
        df_with_ranking = rk.ranking(data_test_df,
                                     y_pred,
                                     y_prob,
                                     preshuffle=preshuffle,
                                     target=False)

    final_df = df_with_ranking[['srch_id', 'prop_id']]

    final_df.to_csv('prediction_file%d.csv' % (i), index=False)
    print "slice %d done" % (i)
print "start classfieing"

model = pkl.load(open(
    'Classifiers_final\gradient_boosting_Boosting-False_max_leaf_nodes-4-learning_rate-0.1-n_estimators-100-subsample-0.5-random_state-2-min_samples_split-5-max_depth-None.pkl',
    'r'))

select_cols = ['prop_starrating', 'prop_review_score', 'prop_location_score2', 'price_usd',
               'promotion_flag', 'no_bookings_prop', 'no_found_prop']
rank_options = [False]

for i in slices_to_do:
    data_file = "data/test_set_added_variables_%i.csv" % (i)
    data_test_slice = dp.DataAggregator(data_file)
    data_test_slice.read_data()
    data_test_df = data_test_slice.df

    X_test = make_X(data_test_df, select_cols)

    y_pred = model.predict(X_test)
    y_prob = model.predict_proba(X_test)

    for preshuffle in rank_options:
        df_with_ranking = rk.ranking(data_test_df, y_pred, y_prob, preshuffle=preshuffle, target=False)

    final_df = df_with_ranking[['srch_id', 'prop_id']]

    final_df.to_csv('prediction_file%d.csv' % (i), index=False)
    print "slice %d done" % (i)

            print "\taccuracy:", accuracy
            recallmacro = recall_score(y_test, y_pred, average='macro')
            print "\trecall macro:", recallmacro
            recallmicro = recall_score(y_test, y_pred, average='micro')
            print "\trecall micro:", recallmicro
            f1macro = f1_score(y_test, y_pred, average='macro')
            print "\tf1 macro:", f1macro
            f1micro = f1_score(y_test, y_pred, average='micro')
            print "\tf1 micro:", f1micro

            rank_options = [False]
            if test['method'] == 'dummy':
                rank_options = [True, False]

            for preshuffle in rank_options:
                df_with_ranking = rk.ranking(data_test, y_pred, y_prob, preshuffle=preshuffle, target = True)

                search_ids = df_with_ranking['srch_id']
                diff_search_ids = search_ids.drop_duplicates()

                k = 0
                ndcg_list = []

                for id in diff_search_ids:
                    mask = (df_with_ranking['srch_id'] == id)
                    result_df = df_with_ranking.loc[mask]
                    ndcg_result = ndcg.ndcg(result_df)
                    ndcg_list.append(ndcg_result)

                meanndcg = sum(ndcg_list) / float(len(ndcg_list))
                f.write('%s; %s; %s; %s; %s; %f; %f; %f; %f; %f; %f\n' % (
    clf = ensemble.RandomForestClassifier(**params)
    start_time = datetime.now()
    clf.fit(X_train, y_train)
    print clf.classes_
    print "trained in", datetime.now() - start_time

    y_pred = clf.predict(X_train)
    y_prob = clf.predict_proba(X_train)
    print "class probs", y_prob
    print "classes found", np.unique(y_pred)
    print "accuracy:", clf.score(X_train, y_train)
    print "recall macro:", recall_score(y_train, y_pred, average='macro')
    print "recall micro:", recall_score(y_train, y_pred, average='micro')
    print "f1 macro:", f1_score(y_train, y_pred, average='macro')
    print "f1 micro:", f1_score(y_train, y_pred, average='micro')

    df_with_ranking = rk.ranking(traindf, y_pred, y_prob)

    search_ids = df_with_ranking['srch_id']
    diff_search_ids = search_ids.drop_duplicates()

    k = 0
    ndcg_list = []

    for id in diff_search_ids:
        mask = (df_with_ranking['srch_id'] == id)
        result_df = df_with_ranking.loc[mask]
        ndcg = ndcg.ndcg(result_df, k)
        ndcg_list.append([ndcg])

Beispiel #22
0
import ranking

if __name__ == "__main__":
    a = ranking.ranking([
        ['a', 'b', 5, 5],
        ['a', 'c', 5, 3],
        ['b', 'c', 4, 3],
        ['a', 'c', 5, 3],
        ['a', 'd', 5, 1],
        ['b', 'c', 5, 3],
        ['b', 'd', 5, 1],
        ['c', 'd', 3, 1],
        ['a', 'd', 4, 3],
        ['d', 'a', 4, 1],
    ])
    print(a.massey())
    print(a.colley())
    a.find_dup()
    print(a.massey())
    print(a.colley())

    print(a.borda([
        [['A',3],['B',1],['D',2]],
        [['A',2],['B',1],['D',4],['C',3]],
        [['E',1]]
        ]))
Beispiel #23
0
        wh_q_lst.append(binary_form)
    processed_q_lst.append(binary_form)
    labels.append(wh_word)

# get the best guess sentence
fuzzy_ans = []
predictions = ranking.fuzzyCompare(sentences, fuzzy_lst)
threshold = 89
for (best_sentence, score) in predictions:
    if score < threshold:
        fuzzy_ans.append("No.")
    else:
        fuzzy_ans.append("Yes.")

wh_guess = []
ind = ranking.ranking(sentences, wh_q_lst)
for i in ind:
    wh_guess.append(sentences[i])
######################

#combine all candidates
ind_wh = 0
ind_binary = 0
candidates = []
for lab in labels:
    if lab != "BINARY" and lab != "OTHERS":
        candidates.append(wh_guess[ind_wh])
        ind_wh += 1
    else:
        candidates.append(fuzzy_ans[ind_binary])
        ind_binary += 1
Beispiel #24
0
def get_documents(query,score_dict,pos_dict):
    global words_list
    global phrase_list
    global negphrase_list
    global negwords_list
    global index

    common_docid_set = set()

    pre_process_query(query)

    all_negative_phrase_docs = defaultdict() ## stores the docids of all the phrases that are negated i.e it stores docids which contain the negated phrase
    all_negative_word_docs = defaultdict()   ## stores the docids of all words that are negated i.e it stores docids which contain the negated word
    
    ## processing phrases in the query that are not negated -- put their information in the 2 result dictionaries
    process_phrase(phrase_list,score_dict,pos_dict)

    ## processing phrases in the query that are negated -- put their info in a separate dict 'all_negative_phrase_docs'
    ## functionality is the same as the processing for a normal phrase, but stored in a separate dict
    for phrase in negphrase_list:
        
        phrase_words = phrase.strip().split()               ## split phrase into phrase-words
        for i in range(0,len(phrase_words)):                ## stemming the phrase words as index is stemmed
            phrase_words[i] = porter.stem(phrase_words[i])
        
        if len(phrase_words) == 1:                          ## if only one word in the phrase 
            words_list.append(phrase_words[0])                          ## add it to the word list to process as a normal word in the query
        else:                                               ## else
            if phrase_words[0] in index.keys():                         ## if the first phrase-word is in index
                common_docid_set = set(index[phrase_words[0]].keys())   ## add it's docids to the set containing the common docids

            for i in range(1,len(phrase_words)):
                if phrase_words[i] in index.keys():                     ## if the next phrase-word is in the index
                    common_docid_set = set(index[phrase_words[i]].keys()) & common_docid_set    ## find the docids that are common with the previous words in the phrase
                else:                                                   ## if one of the phrase words is not in index, it implies the phrase does not occur in any document
                    common_docid_set.clear()                            ## hence empty the common doc set and break
                    break

            for docid in common_docid_set:                          ## for each doc containing the phrase-words, find if they occur together as a phrase
                prev_pos_set = set(index[phrase_words[0]][docid])       ## stores the positions of the first phrase-word
                curr_pos_set = set()
                for j in range(1,len(phrase_words)):    
                    for pos in index[phrase_words[i]][docid]:           ## for each position of the current phrase-word in current doc
                        curr_pos_set.add(pos-j)                             ## decrement the positions by j to match the start position of the phrase
                    correct_pos_set = curr_pos_set & prev_pos_set       ## stores only the start positions of those instances where the words appear next to each other 
                    if len(correct_pos_set) == 0:                       ## if there are no such positions in thid doc where the words appear as a phrase
                        break                                               ## break and move to the next doc
                    else:
                        prev_pos_set = correct_pos_set
                        
                if len(correct_pos_set) == 0:               ## if pos set is empty, it implies that no phrase match in current document
                    continue                                    ## hence continue to next document
                else:                                       ## else make the entry in the doc dict for positive phrase
                    if docid not in all_negative_phrase_docs:           ## if new docid entry in dict
                        score = len(correct_pos_set)*len(phrase_words)      ## score = no of occurrences * no of phrase words
                        all_negative_phrase_docs[docid] = score             ## storing docids with no of occurrences
                    else:                                               ## if existing docid, add occurrences and new positions
                        score = all_negative_phrase_docs[docid] + (len(correct_pos_set)*len(phrase_words))      ## add the scores for the next phrase in the same doc
                        all_negative_phrase_docs[docid] = score         ## storing the scores just to maintain consistency, scores will not be used for document retrieval

    
    ## processing individual words in the query that are not negated -- put their info in the 2 result dictionaries
    process_words(words_list,score_dict,pos_dict)

    ## processing individual words in the query that are negated -- put their info in a separate dict 'all_negative_word_docs'
    ## functionality is the same as processing a normal word, but stored in a separate dict
    for q_negword in negwords_list:
        q_negword = q_negword.strip()
        if q_negword in index:
            for docid in index[q_negword].keys():
                score = 0
                if docid not in all_negative_word_docs:
                    all_negative_word_docs[docid] = len(index[q_negword][docid])            ## stores the docids in a separate dict for negated words     
                else:
                    score = all_negative_word_docs[docid] + len(index[q_negword][docid])    ## storing the scores just to maintain consistency in code, scores are not used in document retrieval
                    all_negative_word_docs[docid] = score                                                                   

    ## get the set of documents that contains negated phrases or negated words
    negative_phrase_set = set()
    negative_word_set = set()
    if len(all_negative_phrase_docs.keys()) != 0:                                           ## if there is a negated phrase in the query, the all_negative_phrase_docs will contain docids containing the phrase
        negative_phrase_set = set(score_dict.keys()) - set(all_negative_phrase_docs.keys())     ## thus taking the difference from the whole set of documents will give the set not containing that phrase
    if len(all_negative_word_docs.keys()) != 0:                                             ## if there is a negated word in the query, the all_negative_word_docs will contain docids containing the word
        negative_word_set = set(score_dict.keys()) - set(all_negative_word_docs.keys())         ## thus taking the difference from the whole set of documents will give the set not containing that word

    total_negative_set = negative_phrase_set | negative_word_set                            ## union of these 2 sets gives the set of documents not containing the phrase/words that are negated in the query

    ## to get the final set of documents, we need to do a fuzzy or between the normal(score_dict) and negated set (total_negative_set) of documents
    ## As per the following logic, if there is a docid common to both the normal set and the negated set, it will retain it and show phrase/word instances from the normal set.
    ## else the negated docid set is identified by assigning them a negative score
    for docid in total_negative_set:                                                        ## for each docid in the negated set of documents
        if score_dict[docid] == 0:                                                       
            score_dict[docid] = -1

    ## pickle the final dictionaries for ranking
##    pickle.dump(score_dict,open("score_dict","wb"))
##    pickle.dump(pos_dict,open("pos_dict","wb"))

    ## call the ranking function to rand and display snippets
    ranking.ranking(query,score_dict,pos_dict)
def start():

    print("Opening dblp index")

    pubIx = open_dir("index/dblp_index/Pubblication_Index")
    venIx = open_dir("index/dblp_index/Venue_Index")
    query_immesse = 10
    while query_immesse >= 0:

        searcher = pubIx.searcher(weighting=scoring.Frequency)
        searcher2 = venIx.searcher(weighting=scoring.Frequency)

        phrase = input("Insert query:\n>>")
        phrase_no_rank, choice, topk = choice_ranking(phrase)
        queries = divqueries(phrase_no_rank)
        print(queries)

        q1, q2 = setqueries(queries)
        print(q1 + '\t' + q2)
        print('\n')

        schema = create_scheme()[0]
        parser = whoosh.qparser.MultifieldParser(
            ['author', 'title', 'year'],
            schema=pubIx.schema)  #default is title
        query = parser.parse(q1)
        results = searcher.search(query, limit=None)

        schema = create_scheme()[1]
        parser = whoosh.qparser.MultifieldParser(
            ['author', 'title', 'year'],
            schema=venIx.schema)  #default is title
        query = parser.parse(q2)
        results2 = searcher2.search(query, limit=None)

        t, g = getquerywords(queries)

        rank = ranking(query=t,
                       result=results,
                       choice=choice,
                       ix=pubIx.doc_count(),
                       searcher=searcher,
                       pub=True)
        sorted_result = rank.rank()
        #print_sorted_result(sorted_result,choice)

        rank = ranking(query=g,
                       result=results2,
                       choice=choice,
                       searcher=searcher2,
                       ix=venIx.doc_count(),
                       pub=False)
        sorted_result2 = rank.rank()
        #print_sorted_result(sorted_result2,choice)

        result = merge_results(pub_result=sorted_result,
                               choice=choice,
                               venue_result=sorted_result2)

        Ta_result = Threshold(result, topk).run()

        f = open('Result.txt', 'a', encoding='utf-8')

        for i in Ta_result[0:topk]:

            if i[0][0][0] is None:
                final = i[0][1][0]

            elif i[0][1][0] is None:
                if return_fuzzy_choice(choice):
                    final = i[0][0][0][0]
                else:
                    final = i[0][0][0]

            else:
                if return_fuzzy_choice(choice):
                    final = list(set(
                        i[0][0][0][0] +
                        i[0][1][0]))  #list(set().union(i[0][0][0],i[0][1][0]))
                else:
                    final = list(set(i[0][0][0] + i[0][1][0]))

            print_result_TA(final, i[1], f)

        f.close()

        import subprocess
        subprocess.run(['more', str(os.path.abspath('Result.txt'))],
                       shell=True)
        status_cmd = subprocess.CompletedProcess(
            ['more', str(os.path.abspath('Result.txt'))],
            returncode=0).returncode

        if status_cmd == 0:
            os.remove(os.path.abspath('Result.txt'))

        query_immesse -= 1
Beispiel #26
0
    def contentquery(self): 
        ST=time.time()
	urllist=[]
        totalword=len(self.wordlist)
	for x in self.wordlist:
		print self.worddic[x]
        print "total word:", totalword
	for dbname in self.wordlist:
	    if self.tempdb.has_key(dbname):
	       urllist=urllist+self.Stringload(zlib.decompress(self.tempdb[dbname]))
        if totalword>1:
	    ranklist=ranking.ranking(urllist)
	    urlcomp=ranklist.dicuniques(totalword)
        else:
	    urlcomp=urllist
	totalsize=len(urlcomp)
	ralist=[]
	pagestartat=self.page*self.pagesize
	if (pagestartat+self.pagesize)>totalsize:
		pageendat=totalsize
	else:
		pageendat=pagestartat+self.pagesize
	
	print time.time()-ST

        if totalword!=1:
	    rangestsart=0
	    rangeend=totalsize
	    if totalsize>500 or pagestartat>=500:
	    	rangestsart=(pagestartat//500)*500
		rangeend=rangestsart+500
		pagestartat=pagestartat-rangestsart
		pageendat=pagestartat+self.pagesize
	    #for i in xrange(0,totalsize):
	    count=0
	    searchtime=0.0
	    linktime=0.0
	    for i in xrange(rangestsart,rangeend):
		bastscore=0
	        mirs=0
	        spliturl=urlcomp[i]
		if totalword>=3:
			sword=3
		else:
			sword=totalword

		if len(spliturl)==2 and spliturl[0]==totalword:
		    at=time.time()
		    self.pct,title=self.purei.queryPurecontent(spliturl[1])
		    bt=time.time()-at
		    matchstart=0
		    scorelist=[]

		    searchtime=searchtime+bt
		    for match in re.finditer(self.uni.decode("utf-8"),self.pct):
			matchstart=match.start()
		    if matchstart:
			bastscore=60
			if (matchstart+150)> len(self.pct):
				mirs=len(self.pct)-matchstart
			scorelist.append((match.start(),match.end()))
			startat=self.findpunctuation(matchstart)
			scorelist=self.wordmarkup(scorelist,startat-mirs)
			abstract=startat-mirs
			destcontent=self.pct[abstract:abstract+150]
			url=urllib.quote(self.urldb[self.serialdb[spliturl[1]]])
			ralist.append((destcontent,bastscore,str(spliturl[0]),url,title,scorelist))

		if len(spliturl)==2 and spliturl[0]>=sword and bastscore==0:
		    at=time.time()
		    # self.pct,title=self.purei.queryPurecontent(spliturl[1])
		    r=[]
		    for dbname in self.wordlist:
			if self.tempdb.has_key(dbname):
				    picklelist=[]
				    for match in re.finditer(self.worddic[dbname],self.pct):
					picklelist.append((match.start(),match.end()))
				    r=r+picklelist
		    r=sorted(r, key=operator.itemgetter(0))
		    r=ranklist.wordlinker(r)
		    bastscore, scorelist = ranklist.counttheimportantpart(r)
		    #print scorelist
		    if len(scorelist)>0:
		        startat=scorelist[0][0]
		        startat=self.findpunctuation(startat)
		        if (startat+150)> len(self.pct):
		            mirs=len(self.pct)-startat
		        scorelist=self.wordmarkup(scorelist,startat-mirs)
		        abstract=startat-mirs
		        destcontent=self.pct[abstract:abstract+150]
		        url=urllib.quote(self.urldb[self.serialdb[spliturl[1]]])
		        ralist.append((destcontent,bastscore,str(spliturl[0]),url,title,scorelist))
		    bt=time.time()-at
		    linktime=linktime+bt

	print 'totalword2:',
	print time.time()-ST

        if totalword==1:
	    for i in xrange(pagestartat,pageendat):
                bastscore=0
                mirs=0
                spliturl=urlcomp[i]
		self.pct,title=self.purei.queryPurecontent(spliturl)
		matchstart=0
		scorelist=[]
		picklelist=[]
		for match in re.finditer(self.uni,self.pct):
                    matchstart=match.start()
		    picklelist.append((match.start(),match.end()))
                if (matchstart+100) > len(self.pct):
                    mirs=len(self.pct)-matchstart
		scorelist=picklelist
		startat=scorelist[0][0]
		startat=self.findpunctuation(startat)
		scorelist=self.wordmarkup(scorelist,startat-mirs)
		abstract=startat-mirs
		destcontent=self.pct[abstract:abstract+150]
		url=urllib.quote(self.urldb[self.serialdb[spliturl[0:4]]])
		#print destcontent,str(1),url,title,scorelist
		ralist.append((destcontent,100,str(1),url,title,scorelist))
	    print 'totalword1:',
	    print time.time()-ST
	    return (totalsize,sorted(ralist, key=operator.itemgetter(1),reverse=True))
	print "search:",str(searchtime)
	print "Link:",str(linktime)
	return (totalsize,sorted(ralist, key=operator.itemgetter(1),reverse=True)[pagestartat:pageendat])