Ejemplo n.º 1
0
 def select_can_univ(self, uni_list_path):
     merged_rank_list = np.genfromtxt(os.path.join(uni_list_path, 'university_merged_list.csv'), dtype=str, delimiter=',')
     print 'read a matrix with shape:', merged_rank_list.shape
     merged_rank_dict = read_ranking_list(os.path.join(uni_list_path, 'university_merged_list.csv'), dtype=int)
     yiben_set = set(np.genfromtxt(os.path.join(uni_list_path, 'university_yiben.csv'), dtype=str))
     print '#yiben universities:', len(yiben_set)
     yiben_not_covered = 0
     for uni in yiben_set:
         if uni not in merged_rank_dict.keys():
             yiben_not_covered += 1
             print uni
     data = np.genfromtxt('/home/ffl/nus/MM/complementary/chinese_university_ranking/data/features/univ_category.csv', delimiter=',',
                          dtype=str)
     print 'read a matrix with shape: ', data.shape
     univ_types = {}
     for d in data:
         univ_types[clean_uni_name(d[0])] = d[1]
     arts_chi_str = '艺术'
     with open(os.path.join(uni_list_path, 'candidate_universites.csv'), 'w') as fout:
         for uni in yiben_set:
             if not uni in univ_types.keys():
                 fout.write('%s\n' % (uni))
                 print 'without category:', uni
                 continue
             if not univ_types[uni] == arts_chi_str:
                 fout.write('%s\n' % (uni))
Ejemplo n.º 2
0
 def ensemble(self, rsvm_tran_out, rnet_tran_out, ratio):
     assert ratio < 1 and ratio > 0, 'ratio: %f unexpected' % ratio
     rsvm = read_ranking_list(rsvm_tran_out, float)
     rnet = read_ranking_list(rnet_tran_out, float)
     assert len(rsvm) == len(rnet), 'length mismatch'
     rmerged = {}
     for r_kv in rsvm.iteritems():
         if r_kv[0] not in rnet.keys():
             print '%s not found in result of rank_net' % (r_kv[0])
             exit()
         rmerged[r_kv[0]] = r_kv[1] * ratio + rnet[r_kv[0]] * (1 - ratio)
     sor_gen_rl = sorted(rmerged.items(),
                         key=operator.itemgetter(1),
                         reverse=True)
     np.savetxt('ens_' + str(ratio) + '.csv',
                sor_gen_rl,
                fmt='%s',
                delimiter=',')
     # evaluate
     acc, cors = self.evaluator.evaluate(rmerged)
     # print acc, cors
     return acc, cors
Ejemplo n.º 3
0
    def post_merged_list(self, uni_list_path):
        merged_rank_list = np.genfromtxt(os.path.join(uni_list_path, 'university_merged_list.csv'), dtype=str, delimiter=',')
        print 'read a matrix with shape:', merged_rank_list.shape
        merged_rank_dict = read_ranking_list(os.path.join(uni_list_path, 'university_merged_list.csv'), dtype=int)
        yiben_set = set(np.genfromtxt(os.path.join(uni_list_path, 'university_yiben.csv'), dtype=str))
        print '#yiben universities:', len(yiben_set)
        data = np.genfromtxt('/home/ffl/nus/MM/complementary/chinese_university_ranking/data/features/univ_category.csv', delimiter=',', dtype=str)
        print 'read a matrix with shape: ', data.shape
        univ_types = {}
        for d in data:
            univ_types[clean_uni_name(d[0])] = d[1]
        arts_chi_str = '艺术'

        '''here we have a couple of actions to do:
            filter out military universities
            filter out art universities
            filter out universities that are not belongs to the first level
        '''
        print '#initial universities in the merged list:', len(merged_rank_dict)
        for uni in merged_rank_dict.keys():
            if not uni in yiben_set:
                del merged_rank_dict[uni]
        print '#yiben universities and not military in the merged list:', len(merged_rank_dict)
        for uni in merged_rank_dict.keys():
            if uni not in univ_types.keys():
                print 'category missing:', uni
                continue
            if univ_types[uni] == arts_chi_str:
                print uni
                del merged_rank_dict[uni]
        print '#yiben unviersities and not art universities in the merged list:', len(merged_rank_dict)

        # post-processing rank
        sorted_merged_rank = sorted(merged_rank_dict.items(), key=operator.itemgetter(1))
        prev_rank = 1
        ori_pre_rank = 1
        # for i in range(len(sorted_merged_rank)):
        #     if not sorted_merged_rank[i][1] == prev_rank:
        #         sorted_merged_rank[i][1] = i + 1
        with open('university-selected_merged_list.csv', 'w') as fout:
            for i in range(len(sorted_merged_rank)):
                if not sorted_merged_rank[i][1] == ori_pre_rank:
                    fout.write('%s,%d\n' %(sorted_merged_rank[i][0], i + 1))
                    prev_rank = i + 1
                    ori_pre_rank = sorted_merged_rank[i][1]
                else:
                    fout.write('%s,%d\n' % (sorted_merged_rank[i][0], prev_rank))
Ejemplo n.º 4
0
    def gen_pw_gt(self):
        unis = np.genfromtxt(os.path.join(
            '/home/ffl/nus/MM/cur_trans/data/prepared/',
            'university-selected_merged_list.csv'),
                             dtype=str,
                             delimiter=',')
        print 'data shape:', unis.shape
        data_dict = read_ranking_list(
            os.path.join('/home/ffl/nus/MM/cur_trans/data/prepared/',
                         'university-selected_merged_list.csv'), int)

        # read ground truth pair
        gt_pairs = []
        data = np.genfromtxt(self.gt_fname, delimiter=',', dtype=str)
        print 'ground truth pair read in with shape:', data.shape
        positive_pair = 0
        negative_pair = 0
        for pair in data:
            if pair[2] == '1':
                positive_pair += 1
                gt_pair = [pair[0], pair[1]]
            elif pair[2] == '-1':
                negative_pair += 1
                gt_pair = [pair[1], pair[0]]
            else:
                print 'unexpected pair:', pair
                break
            gt_pairs.append(gt_pair)

        # read template
        fin = open(
            '/home/ffl/nus/MM/cur_trans/code/tcurweb/flaskr/flaskr/templates/uxxx.html'
        )
        lines = fin.readlines()
        tags_before = []
        tags_after = []
        is_before = True
        for line in lines:
            if is_before:
                tags_before.append(line)
            else:
                tags_after.append(line)
            if line.__contains__('<!---->'):
                is_before = False

        fuout = open('gt_func.py', 'w')
        with open('gt.table', 'w') as fout:
            # currently, no unseen universities in traditional ranking systems
            ind = 0
            for u_kv in unis:
                bef = 0
                beh = 0
                unk = 0
                uni_bef = []
                uni_beh = []
                print u_kv[0]
                for gtp in gt_pairs:
                    if gtp[0] == u_kv[0]:
                        beh += 1
                        uni_beh.append(gtp[1])
                    if gtp[1] == u_kv[0]:
                        bef += 1
                        uni_bef.append(gtp[0])
                unk = len(self.sel_unis) - bef - beh
                # write html table
                fout.write('<tr>\n')
                fout.write('<td><a href="{{url_for(\'u%d\')}}">%s</td>\n' %
                           (ind, u_kv[0]))
                fout.write('<td>%d</td>\n' % (bef))
                fout.write('<td>%d</td>\n' % (beh))
                fout.write('<td>%d</td>\n' % (unk))
                fout.write('</tr>\n')

                # write handle function
                fuout.write(
                    '@app.route(\'/u%d/\',methods = [\'GET\'])\ndef u%d():\n    return render_template(\'u%d.html\')\n\n'
                    % (ind, ind, ind))

                # write university page
                fpout = open(
                    '/home/ffl/nus/MM/cur_trans/code/tcurweb/flaskr/flaskr/templates/u%d.html'
                    % ind, 'w')
                fpout.writelines(tags_before)
                fpout.writelines(self.gen_pw_gt_sin(ind, uni_bef, uni_beh))
                fpout.writelines(tags_after)
                ind += 1