예제 #1
0
def build_iceberg_lattice(filename, lattice, threshold):
    irreducable = []
    for i, (intent, extent) in enumerate(lattice):
        coverage = list(intent)
        if (len(intent) < threshold):
            continue
        is_irreducable = True
        for j, (intent1, extent1) in enumerate(lattice):
            if (j == i or len(intent1) < threshold
                    or len(intent) <= len(intent1)):
                continue
            is_subset = True
            for obj in intent1:
                if (not (obj in intent)):
                    is_subset = False
                    break
            if is_subset:
                for obj in intent1:
                    if obj in coverage:
                        coverage.remove(obj)
                if (len(coverage) == 0):
                    is_irreducable = False
                    break
        if is_irreducable:
            irreducable.append((intent, extent))
            #print intent, extent
            #print '\n'
    df = Definition()
    for intent, extent in irreducable:
        obj_name = ';'.join(intent)
        df.add_object(obj_name, list(extent))
    conc = Context(*df)
    conc.tofile(filename='iceberg.' + filename, frmat='csv')
예제 #2
0
	print res
	return res
		

if __name__ == '__main__':
	animaux = ["Bat","Eagle","Monkey","Parrot fish","Penguin","Shark","Lantern fish"]
	proprietes = ["breathes in water","can fly","has beak","has hands","has skeleton","has wings","lives in water","is viviparous","produces light"]
	matrix = [
		(False, True, False, False, True, True, False, True, False), # Bat
		(False, True, True, False, True, True, False, False, False), # Eagle
		(False, False, False, True, True, False, False, True, False), # Monkey
		(True, False, True, False, True, False, True, False, False), # Parrot Fish
		(False, False, True, False, True, True, True, False, False), # Penguin
		(True, False, False, False, True, False, True, False, False), # Shark
		(True, False, False, False, True, False, True, False, True)] # Lantern Fish
	exportContext(animaux,proprietes,matrix)
	c = Context(animaux, proprietes, matrix)  # doctest: +ELLIPSIS
	'''clients = ['Anne','Basile','Carole']
	articles = ['fromage','vin','lait','lessive']
	matrix = [
		(True, False, True, False), #A
		(True, True, False, True), #B
		(True,False,True,True)] #C
	c = Context(clients, articles, matrix)'''
	#print c
	#c.lattice.graphviz(view=True)
	#for intent, extent in c.lattice:
		#print intent, extent
	c.tofile('animaux.txt',frmat='cxt',encoding='utf-8')
	writeConcepts(c.lattice)
예제 #3
0
def buildLattice(pattern = True, inputFiles = "dec", inputAttributes = "arts"):
	if pattern == True:
		name = "WithPS"
	else:
		name = "WithoutPS"
	print inputFiles, inputAttributes, name
	#Le contexte a construire
	matrixAttribute = []
	#
	listFiles = []
	#Liste des fichiers lus pour construire le contexte
	if(inputFiles == "dec"):
		listAllFiles = fg.getAllDecisions()
	elif(inputFiles == "avis"):
		listAllFiles = fg.getAllAvis()
	elif(inputFiles == "all"):
		listAllFiles = fg.getAllFiles()
	else:
		print "choix non reconnu. Choix possibles : 'dec' 'avis' 'all'"
		listAllFiles = fg.getAllDecisions()
	#Nombre de fichiers lus
	lengthAllFiles = len(listAllFiles)
	#L'ensemble des attributs du contexte
	setOfAttributes = set()
	#L'ensemble des attributs modifiés du contexte
	setFormated = set()
	#L'expression régulière des attributs possibles des différents textes
	if (inputAttributes == "arts"):
		expre = expreAttribute()
	elif(inputAttributes == "artsdocs"):
		expre = expreAttribute()+'|'+regex.exprReguliereDecision()
	else:
		print "choix non reconnu. Choix possibles : 'arts' 'docs' 'artsdocs'"
		expre = expreAttribute()
	#Compteur de fichiers lus 
	i = 0
	#Lecture des fichiers pour lister les attributs
	for dfile in listAllFiles:
		f = open(dfile,'r')
		#Enlever les sauts de lignes dûs au copier/coller du pdf
		data = ' '.join([line.rstrip() for line in f])
		#Pour chaque expression trouvée dans le texte
		for m in re.finditer(expre, data):
			#Expression réguliere
			attributFormated = m.group(0)
			#Lissage de l'expression :
			#Enlever les accents
			attributFormated = regex.removeAccent(attributFormated)
			#Corriger les erreurs potentielles
			attributFormated = correctSyntaxe(attributFormated)
			attributFormated = regex.supprNumero(attributFormated)
			setOfAttributes.add(attributFormated)
		i = i + 0.5
		if i%100==0:
			print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
	#Modification des attributs pour éviter les doublons
	setOfAttributes = list(setOfAttributes)

	for item in setOfAttributes:
		setFormated.add(regex.formatArticle(item))
	if pattern == True:
		developAttributes = buildAttributes(setFormated)
		setFormated =  list(developAttributes)
	else:
		setFormated = list(setFormated)

	#Nombre d'attributs dans le contexte
	lenset = len(setFormated)
	print str(lenset)
	writeAttributes(setFormated,name)
	#Construction du contexte
	for dfile in listAllFiles:
		f = open(dfile, 'r')
		data = ' '.join([line.rstrip() for line in f])
		#Lister les documents pour la construction du contexte
		listFiles.append(regex.nomDocument(dfile))
		#Construction d'une ligne du contexte
		nuplet = (False,)*lenset
		listuple = list(nuplet)
		#Pour chaque expression
		for m in re.finditer(expre, data):
			attributFormated = m.group(0)
			#Formater l'expression régulière
			attributFormated = regex.removeAccent(attributFormated)
			attributFormated = correctSyntaxe(attributFormated)
			attributFormated = regex.supprNumero(attributFormated)
			attributFormated = regex.formatArticle(attributFormated)
			#Si pattern, on découpe chaque attribut
			if pattern == True:
				listAtt = developAttribute(attributFormated)
				for item in listAtt:
					#Trouver l'indice de l'attribut
					index = setFormated.index(item)
					#Mettre à jour la valeur
					listuple[index] = True
			#Sinon on cherche juste les attributs
			else:
				index = setFormated.index(attributFormated)
				listuple[index] = True

		i = i + 0.5
		if i%100==0:
			print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
		nuplet = tuple(listuple)
		#Ajoute le nouvel objet au contexte
		matrixAttribute.append(nuplet)
	print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
	#Sauvegarde les attributs dans un txt
	#sauvegarde le contexte dans un json
	exportContext(listFiles,setFormated,matrixAttribute,name)
	c = Context(listFiles,setFormated,matrixAttribute)
	print "construction de la lattice. Cela peut prendre quelques instants"
	c.lattice.graphviz(view=True)
	#Sauvegarde le contexte dans un txt
	writeConcepts(c.lattice,name)
	c.tofile('latticeEtContext/saveLatticeWithPS.txt',frmat='cxt',encoding='utf-8')
예제 #4
0
# Creating and save context for implication rules
X_train_one_hot['Class'] = y_train
X_train_Class_split = pd.concat([
    X_train_one_hot,
    pd.get_dummies(X_train_one_hot['Class'], prefix='Class')
],
                                axis=1)
X_train_Class_split = X_train_Class_split.drop(["Class"],
                                               axis=1).drop_duplicates()
objects = X_train_Class_split.index.values
objects = [str(oi) for oi in objects]
properties = X_train_Class_split.columns.values
bools = list(
    X_train_Class_split.astype(bool).itertuples(index=False, name=None))
cxt = Context(objects, properties, bools)
cxt.tofile('diabetes_context.cxt', frmat='cxt', encoding='utf-8')

## Create concepts lattices for each class
c = {}
l = {}
no_of_classes = 2
X_train_one_hot['Class'] = y_train
X_train_one_hot = X_train_one_hot.drop_duplicates()

for i in range(0, no_of_classes):
    X_temp = X_train_one_hot.copy(deep=True)
    X_temp = X_temp[X_temp['Class'] == i].drop(["Class"], axis=1)
    objects = X_temp.index.values
    objects = [str(oi) for oi in objects]
    properties = X_temp.columns.values
    bools = list(X_temp.astype(bool).itertuples(index=False, name=None))