def process(options, collection): rootpath = options.rootpath overwrite = options.overwrite feature = options.feature method = options.method sigma = options.sigma # result path ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, collection, 'DCG', method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qid_query_file = os.path.join(rootpath, collection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) qid2query = dict(zip(qid_list, query_list)) # inpute of image img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature) img_feats = BigFile(img_feat_path) # the model to calculate DCG@25 scorer = getScorer("DCG@25") done = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for qid in qid_list: iid_list, label_list = readAnnotationsFrom( collection, 'concepts%s.txt' % collection, qid, False, rootpath) renamed, test_X = img_feats.read(iid_list) parzen_list = [] for imidx in iid_list: parzen_list.append( calParzen(img_feats.read_one(imidx), test_X, sigma)) # parzen_list_suffle = calParzen_fast(test_X, len(renamed), sigma) # parzen_list = [] # for imidx in iid_list: # parzen_list.append(parzen_list_suffle[renamed.index(imidx)]) sorted_tuple = sorted(zip(iid_list, label_list, parzen_list), key=lambda v: v[2], reverse=True) qid2iid_label_score[qid] = sorted_tuple # calculate DCG@25 sorted_label = [x[1] for x in sorted_tuple] qid2dcg[qid] = scorer.score(sorted_label) printMessage("Done", qid, qid2query[qid]) done += 1 if done % 20 == 0: writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeDCGResult(DCG_result_path, qid2dcg) writeRankingResult(ranking_result_path, qid2iid_label_score) print "average DCG@25: %f" % (1.0 * sum(qid2dcg.values()) / len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file, 'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
def process(options, collection): rootpath = options.rootpath overwrite = options.overwrite feature = options.feature method = options.method sigma =options.sigma # result path ranking_result_path = os.path.join(rootpath, collection, 'SimilarityIndex', collection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, collection, 'DCG', method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qid_query_file = os.path.join(rootpath, collection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) qid2query = dict(zip(qid_list, query_list)) # inpute of image img_feat_path = os.path.join(rootpath, collection, 'FeatureData', feature) img_feats = BigFile(img_feat_path) # the model to calculate DCG@25 scorer = getScorer("DCG@25") done = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for qid in qid_list: iid_list, label_list = readAnnotationsFrom(collection, 'concepts%s.txt' % collection, qid, False, rootpath) renamed, test_X = img_feats.read(iid_list) parzen_list = [] for imidx in iid_list: parzen_list.append(calParzen(img_feats.read_one(imidx), test_X , sigma)) sorted_tuple = sorted(zip(iid_list, label_list, parzen_list), key=lambda v:v[2], reverse=True) qid2iid_label_score[qid] = sorted_tuple # calculate DCG@25 sorted_label = [x[1] for x in sorted_tuple] qid2dcg[qid] = scorer.score(sorted_label) printMessage("Done", qid, qid2query[qid]) done += 1 if done % 20 == 0: writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeDCGResult(DCG_result_path, qid2dcg) writeRankingResult(ranking_result_path, qid2iid_label_score) print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file,'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
def process(options, trainCollection, devCollection): rootpath = options.rootpath overwrite = options.overwrite method = options.method metric = options.metric qrysim = options.qrysim qrythres = options.qrythres ntopimg = options.ntopimg ntopqry = options.ntopqry mincc = options.mincc feature = options.feature # semantic embedding k = options.k corpus = options.corpus word2vec_model = options.word2vec label_source = options.label_source # result path ranking_result_path = os.path.join(rootpath, devCollection, 'SimilarityIndex', devCollection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, devCollection, metric, method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qp = SimpleQueryParser() qid_query_file = os.path.join(rootpath, devCollection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) #(qid query) qid2query = dict(zip(qid_list, [qp.process(query) for query in query_list])) # path of image feature train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', feature) dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData', feature) # method selection if method =='conse': se_searcher = ConSE(label_source, corpus, word2vec_model, dev_feat_path, rootpath) elif method == 't2i' or method == 'ta': nnquery_file = os.path.join(rootpath, devCollection, 'TextData','querynn', options.nnqueryfile) qryClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.queryclickfile) t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path, train_feat_path, ntopqry) elif method == 'i2t' or method == 'ia': nnimage_file = os.path.join(rootpath, devCollection, 'TextData','imagenn', feature, options.nnimagefile) imgClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.imageclickfile) i2t_searcher = Image2Text(nnimage_file, imgClick_file, qrysim, ntopimg, ntopqry) else: print "this model is not supported with %s" % method sys.exit(0) # calculate DCG@25 scorer = getScorer(metric) done = 0 failed_count = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for query_id in qid_list: iid_list, label_list = readAnnotationsFrom(devCollection, 'concepts%s.txt' % devCollection, query_id, False, rootpath) if method == 'conse': scorelist = se_searcher.do_search(qid2query[query_id], iid_list, k) elif method == 't2i': scorelist = t2i_searcher.text2image(query_id, iid_list, qrythres, mincc ) elif method == 'ta': scorelist = t2i_searcher.textAnnotation( query_id, iid_list, ntopimg, qrythres, mincc) elif method == 'i2t': scorelist = i2t_searcher.image2text(qid2query[query_id], iid_list, mincc ) elif method == 'ia': scorelist = i2t_searcher.imageAnnotation( qid2query[query_id], iid_list, mincc ) if len(scorelist) == 0: failed_count += 1 scorelist = [0]*len(iid_list) qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist) random.shuffle(qid2iid_label_score[query_id]) else: qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist) qid2iid_label_score[query_id] = sorted(qid2iid_label_score[query_id], key=lambda v:v[2], reverse=True) # calculate the result ranking of DCG@25 from our model qid2dcg[query_id] = scorer.score([x[1] for x in qid2iid_label_score[query_id]]) printMessage("Done", query_id, qid2query[query_id]) done += 1 if(done % 20 == 0): writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeRankingResult(ranking_result_path, qid2iid_label_score) writeDCGResult(DCG_result_path, qid2dcg) print "number of failed query: %d" % failed_count print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file,'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
def process(options, trainCollection, devCollection): rootpath = options.rootpath overwrite = options.overwrite method = options.method metric = options.metric qrysim = options.qrysim qrythres = options.qrythres ntopimg = options.ntopimg ntopqry = options.ntopqry mincc = options.mincc feature = options.feature # semantic embedding k = options.k corpus = options.corpus word2vec_model = options.word2vec label_source = options.label_source # result path ranking_result_path = os.path.join(rootpath, devCollection, 'SimilarityIndex', devCollection, 'MetaData', method, feature) DCG_result_path = os.path.join(rootpath, devCollection, 'DCG', method, feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qp = SimpleQueryParser() qid_query_file = os.path.join(rootpath, devCollection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) #(qid query) qid2query = dict(zip(qid_list, [qp.process(query) for query in query_list])) # path of image feature train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', feature) dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData', feature) # method selection if method == 'se': se_searcher = SemanticEmbedding(label_source, corpus, word2vec_model, dev_feat_path, rootpath) elif method == 't2i': nnquery_file = os.path.join(rootpath, devCollection, 'TextData', 'querynn', options.nnqueryfile) qryClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.queryclickfile) t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path, train_feat_path, ntopqry) elif method == 'i2t': nnimage_file = os.path.join(rootpath, devCollection, 'TextData', 'imagenn', feature, options.nnimagefile) imgClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.imageclickfile) i2t_searcher = Image2Text(nnimage_file, imgClick_file, qrysim, ntopimg, ntopqry) else: print "this model is not supported with %s" % method sys.exit(0) # calculate DCG@25 scorer = getScorer(metric) done = 0 failed_count = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for query_id in qid_list: iid_list, label_list = readAnnotationsFrom( devCollection, 'concepts%s.txt' % devCollection, query_id, False, rootpath) if method == 'se': scorelist = se_searcher.do_search(qid2query[query_id], iid_list, k) elif method == 't2i': scorelist = t2i_searcher.text2image(query_id, iid_list, qrythres, mincc) elif method == 'i2t': scorelist = i2t_searcher.image2text(qid2query[query_id], iid_list, mincc) if len(scorelist) == 0: failed_count += 1 scorelist = [0] * len(iid_list) qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist) random.shuffle(qid2iid_label_score[query_id]) else: qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist) qid2iid_label_score[query_id] = sorted( qid2iid_label_score[query_id], key=lambda v: v[2], reverse=True) # calculate the result ranking of DCG@25 from our model qid2dcg[query_id] = scorer.score( [x[1] for x in qid2iid_label_score[query_id]]) printMessage("Done", query_id, qid2query[query_id]) done += 1 if (done % 20 == 0): writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeRankingResult(ranking_result_path, qid2iid_label_score) writeDCGResult(DCG_result_path, qid2dcg) print "number of failed query: %d" % failed_count print "average DCG@25: %f" % (1.0 * sum(qid2dcg.values()) / len(qid2dcg.values())) result_path_file = "result/individual_result_pathes.txt" if os.path.exists(result_path_file): fout = open(result_path_file, 'a') else: makedirsforfile(result_path_file) fout = open(result_path_file, 'w') fout.write(ranking_result_path + '\n') fout.close()
def process(options, trainCollection, devCollection): rootpath = options.rootpath overwrite = options.overwrite metric = options.metric qrythres = options.qrythres ntopimg = options.ntopimg ntopqry = options.ntopqry mincc = options.mincc feature = options.feature # result path ranking_result_path = os.path.join(rootpath, devCollection, 'SimilarityIndex', devCollection, 'MetaData', 'text2image', feature) DCG_result_path = os.path.join(rootpath, devCollection, metric, 'text2image', feature) if checkToSkip(ranking_result_path, overwrite): sys.exit(0) if checkToSkip(DCG_result_path, overwrite): sys.exit(0) # inpute of query qp = SimpleQueryParser() qid_query_file = os.path.join(rootpath, devCollection, 'Annotations', 'qid.text.txt') qid_list, query_list = readQidQuery(qid_query_file) #(qid query) qid2query = dict(zip(qid_list, [qp.process(query) for query in query_list])) # random performance for specific queries qid_randomperf_file = os.path.join(rootpath, devCollection, 'Annotations', '*****@*****.**') qid2randomperf = {} for line in open(qid_randomperf_file): qid, random_perf = line.strip().split() qid2randomperf[qid] = float(random_perf) # path of image feature train_feat_path = os.path.join(rootpath, trainCollection, 'FeatureData', feature) dev_feat_path = os.path.join(rootpath, devCollection, 'FeatureData', feature) nnquery_file = os.path.join(rootpath, devCollection, 'TextData','querynn', options.nnqueryfile) qryClick_file = os.path.join(rootpath, trainCollection, 'TextData', options.queryclickfile) t2i_searcher = Text2Image(nnquery_file, qryClick_file, dev_feat_path, train_feat_path, ntopqry) # calculate DCG@25 scorer = getScorer(metric) done = 0 failed_count = 0 qid2dcg = collections.OrderedDict() qid2iid_label_score = {} for query_id in qid_list: iid_list, label_list = readAnnotationsFrom(devCollection, 'concepts%s.txt' % devCollection, query_id, False, rootpath) scorelist = t2i_searcher.doSearch( query_id, iid_list, ntopimg, qrythres, mincc) if len(scorelist) == 0: failed_count += 1 qid2dcg[query_id] = qid2randomperf[query_id] else: qid2iid_label_score[query_id] = zip(iid_list, label_list, scorelist) qid2iid_label_score[query_id] = sorted(qid2iid_label_score[query_id], key=lambda v:v[2], reverse=True) # calculate the result ranking of DCG@25 from our model qid2dcg[query_id] = scorer.score([x[1] for x in qid2iid_label_score[query_id]]) printMessage("Done", query_id, qid2query[query_id]) done += 1 if(done % 20 == 0): writeRankingResult(ranking_result_path, qid2iid_label_score) qid2iid_label_score = {} writeRankingResult(ranking_result_path, qid2iid_label_score) writeDCGResult(DCG_result_path, qid2dcg) print "number of failed query: %d" % failed_count print "average DCG@25: %f" % (1.0*sum(qid2dcg.values())/ len(qid2dcg.values()))