def calculate(self): lis = [] for cluster in self.__data: # 判断是否是小于两元素,若是则不用计算 if len(cluster) <= 2: result = cluster else: self.__vsm = vsm.VSM(cluster) result = self.__calculate_detail(cluster) l = [] for r in result: l.append(r[0]) lis.append(l) return lis
def set_property(self, category, appId): self.__category = category self.__appId = appId self.__data = self.__get_data() # 初始化一个vsm对象 self.__vsm = vsm.VSM(self.__data)
correct = 0 precision = [] print(lt_gen) for i in lt_gen: a += 1 if i in lt_ground_truth: correct += 1 print('c', correct, 'a', a) precision.append(correct / a) MAP = sum(precision) / len(lt_ground_truth) return MAP # In[12]: v = vsm.VSM(model_dir=corpus) # ## Stage 1 # In[13]: df_whole = pd.read_csv('create_corpus/task2_trainset.csv') df_whole = df_whole.set_index('Id') target = df_whole.loc[target_id] target_whole_query = target['Abstract'] target_whole_query = re.sub(r'[^\w\s]', ' ', target_whole_query).lower() #.split(' ') target_whole_query = [target_whole_query] # In[14]:
help= 'target document id to generate query to calculate mAP, ranges from 0 to 4' ) parser.add_argument('--model_dir', type=str, default='./model/', help='directory of inverted file') args = parser.parse_args() # dim = 5000 print('---------------') print('dim=', args.dim) print('qlen=', args.qlen) print('target doc id', args.tid) print('---------------') v = vsm.VSM(model_dir=args.model_dir, dim=args.dim) # with open('test.json', 'w') as f: # json.dump(v.TF, f) # with open('vocab.json', 'w') as f: # json.dump(v.vocab, f) # print(v.vocab.shape) # print(v.vocab[1000]) # print(v.filelist[1349]) import numpy as np def calc_AP(ret, ans): AP = 0 hit = 0 for i, v in enumerate(ret):
import vsm """ queries - list - 可以多個query - 字中間以空格隔開,會處理標點符號 scores_follow_filelist - 每一個document 的分數,順序是依照當前檔案的file list descending_ranking_by_id - 排名,數字代表當前檔案的file list 的第幾個檔案 - 逆序,排在第一位代表分數最高第一名 """ v = vsm.VSM(model_dir='mini_corpus_1') queries = ["1st query", "2nd query"] # for example: # queries = ["Adversarial Examples that Fool Detectors"] scores_follow_filelist, descending_ranking_by_id = v.retrieval(queries)
return map_scores, rs if __name__ == '__main__': args = _parse_args() if not args.query_paths: if not os.path.exists(args.results_path): print("Neither query nor folder found!") exit(1) args.query_paths = glob.glob(args.results_path + "*.txt") print(args.query_paths) for doc_p in args.doc_path: print("Init VSM...") v = vsm.VSM(model_dir=doc_p) print("Calculate ranking...") query_lst = [] query_docid_lst = [] query_name_lst = [] queries = [] for filename in args.query_paths: with open(filename, 'r') as f: this_query_lst = f.readlines() for q in this_query_lst: q = q.strip().split(' ') # [docID, query_text] docID, query = int(q[0]), ' '.join(q[1:]) queries.append(query) #print(docID, query) if docID == -1 or args.topk: