Esempio n. 1
0
 def __init__(self):
     smartcn = lucene.SmartChineseAnalyzer(lucene.Version.LUCENE_33)
     self.analyzers = {"smartcn": smartcn}
     directory = lucene.SimpleFSDirectory(lucene.File(self.STORE_DIR))
     self.searcher = lucene.IndexSearcher(directory, True)
     self.pgconn = mypass.getConn()
     self.sw = sinaweibooauth.SinaWeiboOauth()
def main1():
    print "retrieve and display files......"
    direc = lucene.SimpleFSDirectory(lucene.File(INDEX_DIR))
    analyzer = lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT)
    searcher = lucene.IndexSearcher(direc)
    search(searcher, analyzer)
    search2(searcher, analyzer)
Esempio n. 3
0
    def getRecentConversations(self, username):
        #Determine index and data paths
        index_dir = self.indexdir + username
        data_dir = self.datadir + username

        #Load the index
        if os.path.isdir(index_dir) == True:
            luc_index = lucene.FSDirectory.getDirectory(index_dir)

            #Get the current time in UTC seconds
            curtime = int(time.time())

            #Convert to a search range
            searchstart = self.__padTimestamp(curtime - SECONDS_IN_20_MINUTES)
            searchend = self.__padTimestamp(MAX_TIMESTAMP)

            #Build and perform the query
            qtext = "timestamp:[" + searchstart + " TO " + searchend + "]"
            searcher = lucene.IndexSearcher(luc_index)
            qparser = lucene.QueryParser("text", lucene.StandardAnalyzer())
            query = qparser.parse(qtext)
            sortmethod = lucene.Sort(["protocol", "friend_chat", "timestamp"])
            qresults = searcher.search(query, sortmethod)

            #Fetch the results
            conversationlist = []
            for i in range(qresults.length()):
                mprotocol = qresults.doc(i).get("protocol")
                mfriend_chat = qresults.doc(i).get("friend_chat")
                mtimestamp = int(qresults.doc(i).get("timestamp"))
                mwho_sent = qresults.doc(i).get("who_sent")
                mfileoffset = int(qresults.doc(i).get("file_offset"))
                mrank = qresults.score(i)

                #This is a really bad and slow method that should
                #be optimized at a later date.
                #Simply search through all previously retrieved
                #conversations and check for a match. If match is
                #found, add it, otherwise create a new conversation.
                messagetext = self.__getMessageFromFile(
                    username, mfriend_chat, mprotocol, mfileoffset)
                message = LogMessage(messagetext, mtimestamp, mwho_sent)
                message.setRank(mrank)

                found = False
                for j in range(len(conversationlist)):
                    if conversationlist[j].getProtocol() == mprotocol and \
                       conversationlist[j].getFriendChat() == mfriend_chat:
                        found = True
                        conversationlist[j].addMessage(message)
                        break
                if found == False:
                    conversation = LogConversation(mprotocol, mfriend_chat)
                    conversation.addMessage(message)
                    conversationlist.append(conversation)

            return conversationlist
        else:
            #Index does not exist
            return False
Esempio n. 4
0
    def search(self, restrictions, destination):
        """ 
        @see: L{NullPrincipalSearcher<datafinder.persistence.search.searcher.NullSearcher>} 
        
        E1101: Pylint cannot detect the internals of the modules solr and lucene. 
        """
        # pylint: disable=E1101

        results = list()
        queryString = search_restriction_mapping.mapSearchRestriction(
            restrictions)
        if self._configuration.luceneIndexUri.startswith("file:///"):
            try:
                self._configuration.env.attachCurrentThread()
                indexDir = lucene.SimpleFSDirectory(
                    lucene.File(
                        self._configuration.luceneIndexUri.replace(
                            "file:///", "")))
                analyzer = lucene.StandardAnalyzer(
                    lucene.Version.LUCENE_CURRENT)
                searcher = lucene.IndexSearcher(indexDir)
                query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT,
                                           "content",
                                           analyzer).parse(queryString)
                hits = searcher.search(query, constants.MAX_RESULTS)
                for hit in hits.scoreDocs:
                    doc = searcher.doc(hit.doc)
                    results.append("/%s" % urllib.unquote(
                        doc.get(constants.FILEPATH_FIELD).encode("utf-8")))
                searcher.close()
            except Exception, error:
                errorMessage = "Cannot search items. Reason: '%s'" % error
                raise PersistenceError(errorMessage)
Esempio n. 5
0
    def __init__(self, session, config, parent):
        IndexStore.__init__(self, session, config, parent)
        path = self.get_path(session, 'defaultPath')
        self.analyzer = NullC3Analyzer()
        self.dir = lucene.FSDirectory.getDirectory(path, False)
        self.parser = lucene.QueryParser("", lucene.StandardAnalyzer())
        self.searcher = lucene.IndexSearcher(self.dir)

        self.writer = None
        self.currDoc = None
        self.currRec = None
Esempio n. 6
0
 def __init__(self, forumname):
     if not forumname in self.supported_forums:
         sys.exit()
     else:
         self.forum = forumname
     self.STORE_DIR = self.STORE_BASE_DIR + forumname
     smartcn = lucene.SmartChineseAnalyzer(lucene.Version.LUCENE_33)
     self.analyzers = {"smartcn": smartcn}
     directory = lucene.SimpleFSDirectory(lucene.File(self.STORE_DIR))
     self.searcher = lucene.IndexSearcher(directory, True)
     self.pgconn = mypass.getConn()
Esempio n. 7
0
	def SearchExactContents(self, keyword):
		"블로그 내용에 대하서 Exactch Matching 수행"
		searcher = lucene.IndexSearcher(self.store)

		print("Searching for ", keyword)
		k = keyword.decode('cp949').encode('utf-8')
		query = lucene.QueryParser('contents', self.analyzer).parse(k)

		hits = searcher.search(query)
		print ("%s matching documents" % hits.length())

		return self.__MakeResultFormat(hits, searcher)
Esempio n. 8
0
 def getHitCount(self, fieldName, searchString):
     reader = lucene.IndexReader.open(self.dir, True)  #readOnly = True
     print '%s total docs in index' % reader.numDocs()
     reader.close()
     
     searcher = lucene.IndexSearcher(self.dir, True) #readOnly = True
     t = lucene.Term(fieldName, searchString)
     query = lucene.TermQuery(t)
     hitCount = len(searcher.search(query, 50).scoreDocs)
     searcher.close()
     print "%s total matching documents for %s\n---------------" \
           % (hitCount, searchString)
     return hitCount
Esempio n. 9
0
	def SearchPrefixContents(self, keyword):
		"블로그 내용에 대하여 Prefix Matching 수행"
		searcher = lucene.IndexSearcher(self.store)

		print("Searching for ", keyword)
		
		k = keyword.decode('cp949').encode('utf-8')
		query = lucene.PrefixQuery( lucene.Term("contents", k) )

		hits = searcher.search(query)
		print ("%s matching documents" % hits.length())	

		return self.__MakeResultFormat(hits, searcher)
Esempio n. 10
0
def func_pic(command):
    global vm_env
    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()
    STORE_DIR="graphIndex"
    directory = lucene.SimpleFSDirectory(lucene.File(STORE_DIR))
    searcher = lucene.IndexSearcher(directory, True)
    analyzer = lucene.SimpleAnalyzer(lucene.Version.LUCENE_CURRENT)
    title = []
    url = []
    imgurl = []
    score = []
    resultInfo, title, url, imgurl, score = run(command, searcher, analyzer)
    searcher.close()
    return resultInfo, title, url, imgurl, score
Esempio n. 11
0
def search(request,
           template_name='reviews/search.html',
           local_site_name=None):
    """
    Searches review requests on Review Board based on a query string.
    """
    query = request.GET.get('q', '')
    siteconfig = SiteConfiguration.objects.get_current()

    if not siteconfig.get("search_enable"):
        # FIXME: show something useful
        raise Http404

    if not query:
        # FIXME: I'm not super thrilled with this
        return HttpResponseRedirect(reverse("root"))

    if query.isdigit():
        query_review_request = get_object_or_none(ReviewRequest, pk=query)
        if query_review_request:
            return HttpResponseRedirect(query_review_request.get_absolute_url())

    import lucene
    lv = [int(x) for x in lucene.VERSION.split('.')]
    lucene_is_2x = lv[0] == 2 and lv[1] < 9
    lucene_is_3x = lv[0] == 3 or (lv[0] == 2 and lv[1] == 9)

    # We may have already initialized lucene
    try:
        lucene.initVM(lucene.CLASSPATH)
    except ValueError:
        pass

    index_file = siteconfig.get("search_index_file")
    if lucene_is_2x:
        store = lucene.FSDirectory.getDirectory(index_file, False)
    elif lucene_is_3x:
        store = lucene.FSDirectory.open(lucene.File(index_file))
    else:
        assert False

    try:
        searcher = lucene.IndexSearcher(store)
    except lucene.JavaError, e:
        # FIXME: show a useful error
        raise e
Esempio n. 12
0
def SearchKeyword(indexDir, keyword):
	directory = lucene.FSDirectory.getDirectory(indexDir)
	searcher = lucene.IndexSearcher(directory)		# 인덱스 검색 객체
	analyzer = lucene.StandardAnalyzer()

	print ("Searching for %s" % keyword)
	keyword = keyword.decode('cp949').encode('utf-8')
	queryParser = lucene.QueryParser('content', analyzer)				# 질의 생성
	query = queryParser.parse(keyword)
	
	hits = searcher.search(query)					# 검색 수행
	print ("%s matching documents" % hits.length())	# 결과 갯수

	for h in hits:									# 결과 출력
		doc = lucene.Hit.cast_(h).getDocument()
		print("Path: %s, name: %s" % (doc.get("path"), doc.get("name")))

	searcher.close()
Esempio n. 13
0
	def SearchExactAll(self, keyword):
		"블로그 내용과 ID에 대해여 Exact Matching 수행"
		searcher = lucene.IndexSearcher(self.store)

		print("Searching for ", keyword)
		k = keyword.decode('cp949').encode('utf-8')

		tqBloger = lucene.TermQuery(lucene.Term("bloger", k))
		tqContents = lucene.TermQuery(lucene.Term("contents", k))

		qBoolean = lucene.BooleanQuery()
		qBoolean.add(tqBloger, lucene.BooleanClause.Occur.SHOULD)
		qBoolean.add(tqContents, lucene.BooleanClause.Occur.SHOULD)

		hits = searcher.search(qBoolean)
		print ("%s matching documents" % hits.length())

		return self.__MakeResultFormat(hits, searcher)
def search(input_q, web_data):
    numberOfHits = 5
    collector = lucene.TopScoreDocCollector.create(numberOfHits, True)
    searcher = lucene.IndexSearcher(directory, True)
    qp = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, 'word', analyzer)
    qp.setDefaultOperator(lucene.QueryParser.Operator.OR)
    query = qp.parse(input_q)

    searcher.search(query, collector)
    score_docs = collector.topDocs().scoreDocs

    count = 0
    url_list = []
    for my_doc in score_docs:
        #print my_doc.score
        doc = searcher.doc(my_doc.doc)
        # count,'|', doc['page_num'] ,'|',web_data[doc['page_num']]
        url_list.append('http://' + web_data[doc['page_num']])
        count += 1
    return url_list
Esempio n. 15
0
    def search(self, query, field="content", limit=None):
        '''
        Searches the index based on the query supplied.
        '''
        directory = lucene.SimpleFSDirectory(lucene.File(self.index_dir))
        searcher = lucene.IndexSearcher(directory, True)

        query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, field,
                                   self.analyser).parse(query)
        try:
            #if there's no limit then use a collector to retrieve them all
            if limit is None:
                collector = DocumentHitCollector(searcher)
                scoreDocs = searcher.search(query, collector)
                results = collector.get_collected_documents()
            else:
                scoreDocs = searcher.search(query, limit).scoreDocs
                results = []
                for scoreDoc in scoreDocs:
                    results.append(searcher.doc(scoreDoc.doc))
        except lucene.JavaError, e:
            print e
Esempio n. 16
0
def search_location(word):
    print("searching ")

    vm_env = lucene.getVMEnv()
    vm_env.attachCurrentThread()

    searcher = lucene.IndexSearcher(directory1, True)
    query = lucene.QueryParser(lucene.Version.LUCENE_CURRENT, 'eng',
                               analyzer1).parse(word)
    #print "查询"
    results = searcher.search(query, None, 20)
    score_docs = results.scoreDocs

    f**k = []

    for score_doc in score_docs:
        doc = searcher.doc(score_doc.doc)

        p = doc['eng']

        f**k.append(p)

    searcher.close()
    return f**k
Esempio n. 17
0
    def searchMessages(self, username, querytext):
        #Determine index and data paths
        index_dir = self.indexdir + username
        data_dir = self.datadir + username

        #Load the index
        if os.path.isdir(index_dir) == True:
            luc_index = lucene.FSDirectory.getDirectory(index_dir)

            #Build and perform the query
            searcher = lucene.IndexSearcher(luc_index)
            qparser = lucene.QueryParser("text", lucene.StandardAnalyzer())
            query = qparser.parse(querytext)
            qresults = searcher.search(query)

            #Fetch the results
            conversationlist = []
            for i in range(qresults.length()):
                mid = int(qresults.id(i))
                mprotocol = qresults.doc(i).get("protocol")
                mfriend_chat = qresults.doc(i).get("friend_chat")
                mtimestamp = int(qresults.doc(i).get("timestamp"))
                mwho_sent = qresults.doc(i).get("who_sent")
                mfileoffset = int(qresults.doc(i).get("file_offset"))
                mrank = qresults.score(i)

                #First check if it exists in one of the previously matched
                #conversations
                found = False
                for j in range(len(conversationlist)):
                    for k in range(len(conversationlist[j].messages)):
                        if conversationlist[j].messages[k].getID() == mid:
                            #Match found, so just update the messages rank
                            conversationlist[j].messages[k].setRank(mrank)
                            found = True

                #If no match was found, create a new conversation
                if found == False:
                    #Create a conversation for each result
                    conversation = LogConversation(mprotocol, mfriend_chat)

                    messagetext = self.__getMessageFromFile(
                        username, mfriend_chat, mprotocol, mfileoffset)
                    before_msgs = self.__getSurroundingMessages(
                        "before", searcher, username, mprotocol, mfriend_chat,
                        mtimestamp, mid)
                    for j in range(len(before_msgs)):
                        conversation.addMessage(before_msgs[j])
                    message = LogMessage(messagetext, mtimestamp, mwho_sent)
                    message.setRank(mrank)
                    message.setID(mid)
                    conversation.addMessage(message)
                    after_msgs = self.__getSurroundingMessages(
                        "after", searcher, username, mprotocol, mfriend_chat,
                        mtimestamp, mid)
                    for j in range(len(after_msgs)):
                        conversation.addMessage(after_msgs[j])

                    conversationlist.append(conversation)
            #End of fetching each result

            return conversationlist
        else:
            #Index not found
            return False
Esempio n. 18
0
 def __init__(self, dir_file_path):
     lucene.initVM()
     self.directory = lucene.SimpleFSDirectory(lucene.File(dir_file_path))
     self.analyzer = lucene.StandardAnalyzer(lucene.Version.LUCENE_30)
     self.search = lucene.IndexSearcher(self.directory)