Esempio n. 1
0
def irsolver(data_file, index) :
	from questions import get_input_data
	lucene.initVM()
	stops = CharArraySet(Version.LUCENE_4_10_1, 0, True)
	for s in stopwords :
		stops.add(s)
	analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops)
	reader = IndexReader.open(SimpleFSDirectory(File(index)))
	searcher = IndexSearcher(reader)
	pred = []
	mapp = { 1 : 'A', 2 : 'B', 3 : 'C', 4 : 'D'}

	idx, ques, ans = get_input_data(data_file)
	for acm, (idq, q, a) in enumerate(zip(idx, ques, ans)) :
		max_score = -1000000
		best_ans = 'A'
		for i, ai in enumerate(a):
			sc = query(q, ai, analyzer, searcher)
			print(acm, i, sc)
			if sc > max_score :
				max_score = sc
				best_ans = mapp[i+1]
		pred.append(best_ans)

	return idx, pred
Esempio n. 2
0
def irsolver(data_file, index):
    from questions import get_input_data
    lucene.initVM()
    stops = CharArraySet(Version.LUCENE_4_10_1, 0, True)
    for s in stopwords:
        stops.add(s)
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops)
    reader = IndexReader.open(SimpleFSDirectory(File(index)))
    searcher = IndexSearcher(reader)
    pred = []
    mapp = {1: 'A', 2: 'B', 3: 'C', 4: 'D'}

    idx, ques, ans = get_input_data(data_file)
    for acm, (idq, q, a) in enumerate(zip(idx, ques, ans)):
        max_score = -1000000
        best_ans = 'A'
        for i, ai in enumerate(a):
            sc = query(q, ai, analyzer, searcher)
            print(acm, i, sc)
            if sc > max_score:
                max_score = sc
                best_ans = mapp[i + 1]
        pred.append(best_ans)

    return idx, pred
Esempio n. 3
0
def get_wiki_docids(data_file, wikipedia_index):
    from questions import get_input_data
    data = get_input_data(data_file)

    lucene.initVM()
    stops = CharArraySet(Version.LUCENE_4_10_1, 0, True)
    for s in stopwords:
        stops.add(s)
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops)
    reader = IndexReader.open(SimpleFSDirectory(File(wikipedia_index)))
    searcher = IndexSearcher(reader)

    generate_docids(data, data_file, analyzer, searcher)
Esempio n. 4
0
def get_wiki_docids(data_file, wikipedia_index):
    from questions import get_input_data

    data = get_input_data(data_file)

    lucene.initVM()
    stops = CharArraySet(Version.LUCENE_4_10_1, 0, True)
    for s in stopwords:
        stops.add(s)
    analyzer = StandardAnalyzer(Version.LUCENE_4_10_1, stops)
    reader = IndexReader.open(SimpleFSDirectory(File(wikipedia_index)))
    searcher = IndexSearcher(reader)

    generate_docids(data, data_file, analyzer, searcher)