예제 #1
0
def rollback(collection_name):
    if collection_name != "DEFAULT":
        INDEX_DIR = collection_name
    else:
        INDEX_DIR = INDEX_DIR_DEFAULT

    direc = SimpleFSDirectory(File(INDEX_DIR))
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)

    #setting writer configurations
    config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
    writer = IndexWriter(direc, config)

    writer.rollback()
    writer.close()
def rollback(collection_name):
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT

	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)

	#setting writer configurations
	config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
	config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
	writer=IndexWriter(direc,config)

	writer.rollback()
	writer.close()
def update(primary_keys_map,to_be_compressed_input,collection_name,tofind,update,commit=False,add_field_if_not_exists=True):
	INDEX_DIR_DEFAULT="IndexFiles.index"
	#As of now the update will be implemented as search,modify data in json file,delete and re-write
	if collection_name!="DEFAULT":
		INDEX_DIR=collection_name
	else:
		INDEX_DIR=INDEX_DIR_DEFAULT
	try:
		tofind_keyvalue_pairs=json.loads(tofind)
	except:
		return 100	
	direc=SimpleFSDirectory(File(INDEX_DIR))
	analyzer=StandardAnalyzer(Version.LUCENE_CURRENT)
	try:
		ireader=IndexReader.open(direc)	
		searcher=IndexSearcher(ireader)
		#setting writer configurations
		config=IndexWriterConfig(Version.LUCENE_CURRENT,analyzer)
		config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
		writer=IndexWriter(direc,config)
	except:
		return 105
	no_of_documents_modified=0	
	#finding the document to update
	#Scope for making this more efficient
	def rewrite(data_string):
		data=json.loads(data_string)
		toupdate=json.loads(update)
		#primary_key_modified=False

		#delete the appropriate document
		query=BooleanQuery()
		for key in primary_keys_map:
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		

		#modify the values
		for key,value in toupdate.items():
			#if such a key is not present the we either add and update that key into data,or just ignore it!(By default it is set to True!)	
			if add_field_if_not_exists==False:
				if key in data.keys():
					data[key]=value
			else:		
				data[key]=value

		#this deletion statement has been intenstionally added here		
		#only if the modified data,has primary keys already not existing,will the updating process continue
		query_search=BooleanQuery()
		for key in primary_keys_map:
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(data[key])
			query_search.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		hits=searcher.search(query_search,MAX_RESULTS).scoreDocs
		if len(hits) > 0:
			return 106			
		writer.deleteDocuments(query)

		#add the newly modified document
		doc=Document()
		#index files wrt primary key
		for primary_key in primary_keys_map:
			try:
				field=Field(primary_key,data[primary_key],Field.Store.NO,Field.Index.ANALYZED)
				doc.add(field)
			except:
				# primary_keys_map.pop(collection_name)
				return 101
		#compress data using snappy if compression is on		
		if to_be_compressed_input==True:
			data_string=snappy.compress(str(json.dumps(data)))
		else:
			data_string=json.dumps(data)	
		field=Field("$DATA$",data_string,Field.Store.YES,Field.Index.ANALYZED)
		doc.add(field)
		writer.addDocument(doc)

	tofind_primary_keyvalue_pairs={}
	tofind_nonprimary_keyvalue_pairs={}

	#separating out primary and non_primary keys
	for key in tofind_keyvalue_pairs.keys():
		if key in primary_keys_map:
			tofind_primary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]
		else:
			tofind_nonprimary_keyvalue_pairs[key]=tofind_keyvalue_pairs[key]

	#filtering documents		
	if len(tofind_primary_keyvalue_pairs)>0:		
		query=BooleanQuery()
		for key in tofind_primary_keyvalue_pairs.keys():
			temp=QueryParser(Version.LUCENE_CURRENT,key,analyzer).parse(tofind_primary_keyvalue_pairs[key])
			query.add(BooleanClause(temp,BooleanClause.Occur.MUST))
		hits=searcher.search(query,MAX_RESULTS).scoreDocs
		
		for hit in hits:
			doc=searcher.doc(hit.doc)
			if to_be_compressed_input==True:
				data=snappy.uncompress(doc.get("$DATA$"))
			else:
				data=doc.get("$DATA$")
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					if rewrite(data)!=106:
						no_of_documents_modified+=1
					else:
						writer.rollback()
						return 106	
			else:
				if rewrite(data)!=106:
					no_of_documents_modified+=1
				else:
					writer.rollback()
					return 106
				
			
	else:
		for i in range(0,ireader.numDocs()):
			doc=searcher.doc(i)
			if to_be_compressed_input==True:
				data=snappy.uncompress(doc.get("$DATA$"))
			else:
				data=doc.get("$DATA$")
			#non primary key filtering(without having to load all the primary key filtered values into main memory!)	
			if len(tofind_nonprimary_keyvalue_pairs)>0:
				entry=json.loads(data)
				satisfied=True
				for key in tofind_nonprimary_keyvalue_pairs.keys():
					if entry.get(key)!=tofind_nonprimary_keyvalue_pairs[key]:
						satisfied=False
						break
				if satisfied==True:
					if rewrite(data)!=106:
						no_of_documents_modified+=1
					else:
						writer.rollback()
						return 106
			else:
				if rewrite(data)!=106:
					no_of_documents_modified+=1
				else:
					writer.rollback()
					return 106
			
	
	ireader.close()
	if commit==True:
			writer.commit()
	writer.close()
	return str(no_of_documents_modified)+" have been modified"
예제 #4
0
def update(primary_keys_map,
           to_be_compressed_input,
           collection_name,
           tofind,
           update,
           commit=False,
           add_field_if_not_exists=True,
           MAX_RESULTS=1000):
    INDEX_DIR_DEFAULT = "IndexFiles.index"
    #As of now the update will be implemented as search,modify data in json file,delete and re-write
    if collection_name != "DEFAULT":
        INDEX_DIR = collection_name
    else:
        INDEX_DIR = INDEX_DIR_DEFAULT
    try:
        tofind_keyvalue_pairs = json.loads(tofind)
    except:
        return 100
    direc = SimpleFSDirectory(File(INDEX_DIR))
    analyzer = StandardAnalyzer(Version.LUCENE_CURRENT)
    try:
        ireader = IndexReader.open(direc)
        searcher = IndexSearcher(ireader)
        #setting writer configurations
        config = IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)
        config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND)
        writer = IndexWriter(direc, config)
    except:
        return 105
    no_of_documents_modified = 0

    #finding the document to update
    #Scope for making this more efficient
    def rewrite(data_string):
        data = json.loads(data_string)
        toupdate = json.loads(update)
        #primary_key_modified=False

        #delete the appropriate document
        query = BooleanQuery()
        for key in primary_keys_map:
            temp = QueryParser(Version.LUCENE_CURRENT, key,
                               analyzer).parse(data[key])
            query.add(BooleanClause(temp, BooleanClause.Occur.MUST))

        #modify the values
        for key, value in toupdate.items():
            #if such a key is not present the we either add and update that key into data,or just ignore it!(By default it is set to True!)
            if add_field_if_not_exists == False:
                if key in data.keys():
                    data[key] = value
            else:
                data[key] = value

        #this deletion statement has been intenstionally added here
        #only if the modified data,has primary keys already not existing,will the updating process continue
        primary_key_update = False
        for key in toupdate.keys():
            if key in primary_keys_map:
                primary_key_update = True
                break
        if primary_key_update == True:
            query_search = BooleanQuery()
            for key in primary_keys_map:
                temp = QueryParser(Version.LUCENE_CURRENT, key,
                                   analyzer).parse(data[key])
                query_search.add(BooleanClause(temp, BooleanClause.Occur.MUST))
            hits = searcher.search(query_search, MAX_RESULTS).scoreDocs
            if len(hits) > 0:
                return 106
        writer.deleteDocuments(query)

        #add the newly modified document
        doc = Document()
        #index files wrt primary key
        for primary_key in primary_keys_map:
            try:
                field = Field(primary_key, data[primary_key], Field.Store.NO,
                              Field.Index.ANALYZED)
                doc.add(field)
            except:
                # primary_keys_map.pop(collection_name)
                return 101
        #compress data using snappy if compression is on
        if to_be_compressed_input == True:
            temp = json.dumps(data)
            data_string = base64.b64encode(snappy.compress(temp))
        else:
            temp = json.dumps(data)
            data_string = base64.b64encode(temp)

        field = Field("$DATA$", data_string, Field.Store.YES,
                      Field.Index.ANALYZED)
        doc.add(field)
        writer.addDocument(doc)

    tofind_primary_keyvalue_pairs = {}
    tofind_nonprimary_keyvalue_pairs = {}

    #separating out primary and non_primary keys
    for key in tofind_keyvalue_pairs.keys():
        if key in primary_keys_map:
            tofind_primary_keyvalue_pairs[key] = tofind_keyvalue_pairs[key]
        else:
            tofind_nonprimary_keyvalue_pairs[key] = tofind_keyvalue_pairs[key]

    #filtering documents
    if len(tofind_primary_keyvalue_pairs) > 0:
        query = BooleanQuery()
        for key in tofind_primary_keyvalue_pairs.keys():
            temp = QueryParser(Version.LUCENE_CURRENT, key, analyzer).parse(
                tofind_primary_keyvalue_pairs[key])
            query.add(BooleanClause(temp, BooleanClause.Occur.MUST))
        hits = searcher.search(query, MAX_RESULTS).scoreDocs

        for hit in hits:
            doc = searcher.doc(hit.doc)
            if to_be_compressed_input == True:
                temp = doc.get("$DATA$")
                data = snappy.uncompress(base64.b64decode(temp))
            else:
                temp = doc.get("$DATA$")
                data = base64.b64decode(temp)
            #non primary key filtering(without having to load all the primary key filtered values into main memory!)
            if len(tofind_nonprimary_keyvalue_pairs) > 0:
                entry = json.loads(data)
                satisfied = True
                for key in tofind_nonprimary_keyvalue_pairs.keys():
                    if entry.get(key) != tofind_nonprimary_keyvalue_pairs[key]:
                        satisfied = False
                        break
                if satisfied == True:
                    if rewrite(data) != 106:
                        no_of_documents_modified += 1
                    else:
                        writer.rollback()
                        return 106
            else:
                if rewrite(data) != 106:
                    no_of_documents_modified += 1
                else:
                    writer.rollback()
                    return 106

    else:
        for i in range(0, ireader.numDocs()):
            doc = searcher.doc(i)
            if to_be_compressed_input == True:
                temp = doc.get("$DATA$")
                data = snappy.uncompress(base64.b64decode(temp))
            else:
                temp = doc.get("$DATA$")
                data = base64.b64decode(temp)
            #non primary key filtering(without having to load all the primary key filtered values into main memory!)
            if len(tofind_nonprimary_keyvalue_pairs) > 0:
                entry = json.loads(data)
                satisfied = True
                for key in tofind_nonprimary_keyvalue_pairs.keys():
                    if entry.get(key) != tofind_nonprimary_keyvalue_pairs[key]:
                        satisfied = False
                        break
                if satisfied == True:
                    if rewrite(data) != 106:
                        no_of_documents_modified += 1
                    else:
                        writer.rollback()
                        return 106
            else:
                if rewrite(data) != 106:
                    no_of_documents_modified += 1
                else:
                    writer.rollback()
                    return 106

    ireader.close()
    if commit == True:
        writer.commit()
    writer.close()
    return str(no_of_documents_modified) + " have been modified"