Beispiel #1
0
def build_iceberg_lattice(filename, lattice, threshold):
    irreducable = []
    for i, (intent, extent) in enumerate(lattice):
        coverage = list(intent)
        if (len(intent) < threshold):
            continue
        is_irreducable = True
        for j, (intent1, extent1) in enumerate(lattice):
            if (j == i or len(intent1) < threshold
                    or len(intent) <= len(intent1)):
                continue
            is_subset = True
            for obj in intent1:
                if (not (obj in intent)):
                    is_subset = False
                    break
            if is_subset:
                for obj in intent1:
                    if obj in coverage:
                        coverage.remove(obj)
                if (len(coverage) == 0):
                    is_irreducable = False
                    break
        if is_irreducable:
            irreducable.append((intent, extent))
            #print intent, extent
            #print '\n'
    df = Definition()
    for intent, extent in irreducable:
        obj_name = ';'.join(intent)
        df.add_object(obj_name, list(extent))
    conc = Context(*df)
    conc.tofile(filename='iceberg.' + filename, frmat='csv')
Beispiel #2
0
def test_fromstring_serialized(tmp_path, source, filename, includes_lattice):
    if filename is None:
        context = Context.fromstring(source, frmat='python-literal')
    else:
        target = tmp_path / filename
        kwargs = {'encoding': 'utf-8'}
        target.write_text(source, **kwargs)
        context = Context.fromfile(str(target),
                                   frmat='python-literal',
                                   **kwargs)

    assert context.objects == SERIALIZED['objects']
    assert context.properties == SERIALIZED['properties']
    assert context.bools == [
        (True, False, False, True, False, True, True, False, False, True),
        (True, False, False, True, False, True, False, True, True, False),
        (False, True, True, False, False, True, True, False, False, True),
        (False, True, True, False, False, True, False, True, True, False),
        (False, True, False, True, True, False, True, False, False, True),
        (False, True, False, True, True, False, False, True, True, False)
    ]

    if includes_lattice:
        assert 'lattice' in context.__dict__
    else:
        assert 'lattice' not in context.__dict__
def test_dict_roundtrip(context, ignore_lattice):
    context = Context(context.objects, context.properties, context.bools)
    assert 'lattice' not in context.__dict__

    d = context.todict(ignore_lattice=ignore_lattice)

    assert isinstance(d, dict) and d
    assert all(d[k] for k in ('objects', 'properties', 'context'))
    if ignore_lattice or ignore_lattice is None:
        assert 'lattice' not in context.__dict__
        assert 'lattice' not in d
    else:
        assert 'lattice' in context.__dict__
        assert d['lattice']

    result = Context.fromdict(d)

    assert isinstance(result, Context)
    assert result == context

    if ignore_lattice or ignore_lattice is None:
        assert 'lattice' not in result.__dict__
    else:
        assert 'lattice' in result.__dict__
        assert result.lattice._eq(context.lattice)
Beispiel #4
0
 def __init__(self, csv_location):
     # the Frame-capability lattice
     self.context = Context.fromfile(csv_location, frmat='csv')
     self.lattice = self.context.lattice
     # the Frame-uncapability lattice
     self.context_inv = Context(*self.context.definition().inverted())
     self.lattice_inv = self.context_inv.lattice
     # the list of all capabilities and frames
     self.capabilities = self.context.properties
     self.frames = self.context.objects
def test_todict(context, d):
    assert 'lattice' not in context.__dict__
    if 'lattice' in d:
        context = Context(context.objects, context.properties, context.bools)
        assert 'lattice' not in context.__dict__

        for ignore_lattice in (False, None):
            assert context.todict(ignore_lattice=ignore_lattice) == d
        assert 'lattice' in context.__dict__
    else:
        for ignore_lattice in (True, None):
            assert context.todict(ignore_lattice=ignore_lattice) == d
        assert 'lattice' not in context.__dict__
Beispiel #6
0
def main():
    #print sys.argv
    filename = str(sys.argv[1])
    iceberg_threshold = int(sys.argv[2])
    draw_iceberg = False
    cols_to_use = []
    i = 3
    cols_started = False
    while (i < len(sys.argv)):
        if (sys.argv[i][0] == '-'):
            cols_started = False
            if (sys.argv[i] == '-draw'):
                draw_iceberg = True
            elif (sys.argv[i] == '-columns'):
                cols_started = True
        elif (cols_started):
            cols_to_use.append(sys.argv[i])
        i += 1
    #print cols_to_use

    dataframe = pd.read_csv(filename)
    if (len(cols_to_use) > 0):
        dataframe = dataframe[[dataframe.columns[0]] + cols_to_use]
    col_info = pd.read_csv('cols.' + filename)
    transform_columns(dataframe, col_info)
    dataframe = dataframe.drop_duplicates(subset=list(dataframe.columns[0:1]),
                                          keep='first')
    dataframe.to_csv('transformed.' + filename, index_label=False, index=False)

    context = Context.fromfile('transformed.' + filename, frmat='csv')
    lattice_str = str(context.lattice.graphviz())
    f = open('lattice.dot', 'w')
    f.write(lattice_str)
    f.close()
    #context.lattice.graphviz()

    build_iceberg_lattice(filename, context.lattice, iceberg_threshold)
    iceberg_context = Context.fromfile(filename='iceberg.' + filename,
                                       frmat='csv')
    if draw_iceberg:
        iceberg_context.lattice.graphviz(view=True)

    lattice_str = str(iceberg_context.lattice.graphviz())
    f = open('iceberg.dot', 'w')
    f.write(lattice_str)
    f.close()

    implication_basis = find_implication_basis(iceberg_context)
    print "Implication basis: "
    for i, e in implication_basis:
        print str(i) + " => " + str(e)
def test_json_roundtrip(context, path_or_fileobj, encoding):
    context = Context(context.objects, context.properties, context.bools)
    assert 'lattice' not in context.__dict__

    is_fileobj = hasattr(path_or_fileobj, 'seek')
    kwargs = {'encoding': encoding} if encoding is not None else {}

    context.tojson(path_or_fileobj, ignore_lattice=True, **kwargs)
    if is_fileobj:
        path_or_fileobj.seek(0)
    assert 'lattice' not in context.__dict__

    deserialized = Context.fromjson(path_or_fileobj, **kwargs)
    if is_fileobj:
        path_or_fileobj.seek(0)
    assert 'lattice' not in deserialized.__dict__

    assert deserialized == context

    assert isinstance(context.lattice, Lattice)
    assert 'lattice' in context.__dict__

    context.tojson(path_or_fileobj, ignore_lattice=None, **kwargs)
    if is_fileobj:
        path_or_fileobj.seek(0)

    deserialized = Context.fromjson(path_or_fileobj, **kwargs)
    assert 'lattice' in deserialized.__dict__

    assert deserialized == context
    assert deserialized.lattice._eq(context.lattice)
Beispiel #8
0
def extract_concepts_in_order(objs, order:Order) -> [(set, set)]:
    var_idx = list(set(itertools.chain.from_iterable(variables for variables in objs.values())))
    context_def = [
        [var_idx.index(variable) for variable in variables]
        for (obj, variables) in objs.items()
    ]
    ctx = Context.fromdict({'objects': list(objs), 'properties': var_idx, 'context': context_def})

    def linksof(c) -> set:
        "edges covered by the given concept"
        return set(itertools.product(c.extent, c.intent))

    concepts_cover = {c: linksof(c) for c in iter(ctx.lattice)}
    treated = set()  # set of edges already described

    def def_Random(concepts):
        return random.choice(tuple(concepts))
    def def_LargestCoverFirst(concepts):
        return max(concepts.keys(), key=lambda c: len(linksof(c) - treated))
    def def_LargestExtentFirst(concepts):
        return max(concepts.keys(), key=lambda c: len(c.extent))
    def def_LargestIntentFirst(concepts):
        return max(concepts.keys(), key=lambda c: len(c.intent))
    def def_LargestExtentOrIntentFirst(concepts):
        return max(concepts.keys(), key=lambda c: max(len(c.extent), len(c.intent)))

    while concepts_cover:
        best = locals()['def_' + order.name](concepts_cover)
        simplified_best = simplify_concept(best, treated)
        treated |= linksof(best)
        concepts_cover = {c: linksof(c) - treated for c in concepts_cover}
        concepts_cover = {c: links for c, links in concepts_cover.items() if len(links) > 0}
        if not simplified_best[0] or not simplified_best[1]: continue  # ignore the extremums
        yield simplified_best
Beispiel #9
0
def render_all(filepattern='*.cxt',
               frmat=None,
               encoding=None,
               directory=None,
               out_format=None):
    from concepts import Context

    if directory is not None:
        get_name = lambda filename: os.path.basename(filename)
    else:
        get_name = lambda filename: filename

    if frmat is None:
        from concepts.formats import Format
        get_frmat = Format.by_extension.get
    else:
        get_frmat = lambda filename: frmat

    for cxtfile in glob.glob(filepattern):
        name, ext = os.path.splitext(cxtfile)
        filename = '%s.gv' % get_name(name)

        c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding)
        l = c.lattice
        dot = l.graphviz(filename, directory)

        if out_format is not None:
            dot.format = out_format
        dot.render()
Beispiel #10
0
def render_all(filepattern='*.cxt', frmat=None, encoding=None,
               directory=None, out_format=None):
    from concepts import Context

    if directory is not None:
        get_name = lambda filename: os.path.basename(filename)
    else:
        get_name = lambda filename: filename

    if frmat is None:
        from concepts.formats import Format
        get_frmat = Format.by_extension.get
    else:
        get_frmat = lambda filename: frmat

    for cxtfile in glob.glob(filepattern):
        name, ext = os.path.splitext(cxtfile)
        filename = '%s.gv' % get_name(name)

        c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding)
        l = c.lattice
        dot = l.graphviz(filename, directory)

        if out_format is not None:
            dot.format = out_format
        dot.render()
Beispiel #11
0
    def local_fca(self, molecules):
        props = self.get_properties(molecules)
        props = list(set(props))
        sub = list(molecules[0].subjects())[0]

        molecule_properties = [
            str(prop.encode('utf-8')) + '->' + str(value.encode('utf-8'))
            for prop, value in props
        ]
        molecule_names = [
            "{}_{}".format(str(sub.encode('utf-8')), y)
            for y in [2014, 2015, 2016]
        ]

        mat = []
        for molecule in molecules:
            row = [False] * len(props)
            for idx, (prop, val) in enumerate(props):
                if (sub, prop, val) in molecule:
                    row[idx] = True
            mat.append(row)

        c = Context(molecule_names, molecule_properties, mat)
        res = c.lattice
        result = []
        for (extent, intent) in res:
            result.append((list(extent), list(intent)))

        return result
def test_fromdict_raw(context, lattice, d, raw):
    def shuffled(items):
        result = list(items)
        random.shuffle(result)
        return result

    _lattice = d.get('lattice')
    d = {
        'objects': d['objects'],
        'properties': d['properties'],
        'context': [shuffled(intent) for intent in d['context']]
    }

    if _lattice is not None:
        pairs = shuffled(enumerate(_lattice))
        index_map = {old: new for new, (old, _) in enumerate(pairs)}
        d['lattice'] = [(shuffled(ex), shuffled(in_),
                         shuffled(index_map[i] for i in up),
                         shuffled(index_map[i] for i in lo))
                        for _, (ex, in_, up, lo) in pairs]

    result = Context.fromdict(d, raw=raw)

    assert isinstance(result, Context)
    assert result == context
    if _lattice is not None:
        if raw:
            assert result.lattice._eq(lattice)
        else:
            # instance broken by shuffled(d['lattice'])
            assert not result.lattice._eq(lattice)
Beispiel #13
0
def dictToConcept(data_matrix):
    """ From dictionnary to concepts """

    definition = concepts.Definition()
    for (current_obj, current_values) in data_matrix.items():
        definition.add_object(current_obj, current_values)
    context = Context(*definition)
    lattice = context.lattice

    return context, lattice
def nonascii_context(abba=(u'Agneta F\xe4ltskog', u'Anni-Frid Lyngstat',
                           u'Benny Andersson', u'Bj\xf6rn Ulvaeus')):
    d = Definition()
    for o in abba:
        d.add_object(o, [u'human', u'singer'])
    d.add_property(u'female', abba[:2])
    d.add_property(u'male', abba[2:])
    d.add_property(u'keyboarder', [abba[2]])
    d.add_property(u'guitarrist', [abba[3]])
    d.add_property(u'sch\xf6n', abba[::2])

    return Context(*d)
Beispiel #15
0
def generate_concept_matrix(filename, skill_list=None, render=False):
    # applying fca
    c = Context.fromfile(filename, frmat="csv")
    if render:
        c.lattice.graphviz(filename=filename.rstrip(".csv"), view=True)

    # reading csv headers
    csvfile = open(filename)
    csvreader = csv.reader(csvfile)

    # reading skills
    if skill_list is None:
        skill_list = csvreader.__next__()
        skill_list.pop(0)
    else:
        csvreader.__next__()

    # reading abstract names
    row_header = list()
    for row in csvreader:
        row_header.append(row[0])

    csvfile.close()

    # matrix to return
    mat = list()
    for i, concept in enumerate(c.lattice):
        extent, intent = concept

        # skip for non-significant concept
        if len(extent) == 0 or len(intent) == 0:
            continue

        print("c{} = {} > {}".format(i, extent, intent))
        row = list()
        for skill in skill_list:
            if skill in intent:
                row.append(1)
            else:
                row.append(0)
        for header in row_header:
            if header in extent:
                row.append(1)
            else:
                row.append(0)

        mat.append(row)

    return mat, row_header, skill_list
def test_fromdict(context, lattice, d, require_lattice, ignore_lattice, raw):
    if require_lattice and 'lattice' not in d:
        return

    result = Context.fromdict(d,
                              require_lattice=require_lattice,
                              ignore_lattice=ignore_lattice,
                              raw=raw)

    assert result == context

    if ignore_lattice or 'lattice' not in d:
        assert 'lattice' not in result.__dict__
    else:
        assert 'lattice' in result.__dict__
        assert result.lattice._eq(lattice)
Beispiel #17
0
def predict_fca(s):
    properties = s.index.values
    objects = [str(s.name)]
    bools = tuple(s.astype(bool))
    s_lattice = Context(objects, properties, [bools])
    s_intents = set()
    for extent_s, intent_s in s_lattice.lattice:
        s_intents.add(intent_s)
    sets = set(list(s_intents)[1])
    probs = []
    for i in range(0, no_of_classes):
        for intent_c in class_intents_sets[i]:
            setc = set(intent_c)
            if sets.issubset(setc):
                probs.append(i)
    if len(probs) == 0:
        return -1
    return max(probs, key=probs.count)
Beispiel #18
0
def test_json_roundtrip_nonascii_context(nonascii_context, encoding):
    assert isinstance(nonascii_context.lattice, Lattice)
    assert 'lattice' in nonascii_context.__dict__
    kwargs = {'encoding': encoding} if encoding is not None else {}

    with io.StringIO() as f:
        nonascii_context.tojson(f, **kwargs)
        serialized = f.getvalue()
        f.seek(0)

        deserialized = Context.fromjson(f, **kwargs)

    assert 'lattice' in deserialized.__dict__
    assert deserialized == nonascii_context
    assert deserialized.lattice._eq(nonascii_context.lattice)

    assert '"Agneta F\\u00e4ltskog"' in serialized
    assert '"Bj\\u00f6rn Ulvaeus"' in serialized
    assert '"sch\\u00f6n"' in serialized
def generate_concept_matrix(filename, skill_list=None):

    # applying fca
    c = Context.fromfile(filename, frmat="csv")

    # reading csv headers
    csvfile = open(filename)
    csvreader = csv.reader(csvfile)

    # reading skills
    if skill_list is None:
        skill_list = csvreader.__next__()
        skill_list.pop(0)
    else:
        csvreader.__next__()

    # reading abstract names
    row_header = list()
    for row in csvreader:
        row_header.append(row[0])

    csvfile.close()

    # matrix to return
    mat = list()
    for extent, intent in c.lattice:
        print("{} > {}".format(extent, intent))
        row = list()
        for skill in skill_list:
            if skill in intent:
                row.append(1)
            else:
                row.append(0)
        for header in row_header:
            if header in extent:
                row.append(1)
            else:
                row.append(0)

        mat.append(row)

    return mat, row_header, skill_list
Beispiel #20
0
# Creating and save context for implication rules
X_train_one_hot['Class'] = y_train
X_train_Class_split = pd.concat([
    X_train_one_hot,
    pd.get_dummies(X_train_one_hot['Class'], prefix='Class')
],
                                axis=1)
X_train_Class_split = X_train_Class_split.drop(["Class"],
                                               axis=1).drop_duplicates()
objects = X_train_Class_split.index.values
objects = [str(oi) for oi in objects]
properties = X_train_Class_split.columns.values
bools = list(
    X_train_Class_split.astype(bool).itertuples(index=False, name=None))
cxt = Context(objects, properties, bools)
cxt.tofile('diabetes_context.cxt', frmat='cxt', encoding='utf-8')

## Create concepts lattices for each class
c = {}
l = {}
no_of_classes = 2
X_train_one_hot['Class'] = y_train
X_train_one_hot = X_train_one_hot.drop_duplicates()

for i in range(0, no_of_classes):
    X_temp = X_train_one_hot.copy(deep=True)
    X_temp = X_temp[X_temp['Class'] == i].drop(["Class"], axis=1)
    objects = X_temp.index.values
    objects = [str(oi) for oi in objects]
    properties = X_temp.columns.values
def test_fromdict_empty_lattice(d_invalid):
    d_invalid['lattice'] = []
    with pytest.raises(ValueError, match='empty lattice'):
        Context.fromdict(d_invalid)
def test_fromdict_context_invalid_index(d_invalid):
    first = d_invalid['context'][0]
    d_invalid['context'][0] = (42, ) + first[1:]
    with pytest.raises(ValueError, match='invalid index'):
        Context.fromdict(d_invalid)
def test_fromdict_context_duplicates(d_invalid):
    first = d_invalid['context'][0]
    d_invalid['context'][0] = (first[0], first[0]) + first[2:]
    with pytest.raises(ValueError, match='duplicate'):
        Context.fromdict(d_invalid)
def test_fromdict_mismatch(d_invalid, short):
    d_invalid[short] = d_invalid[short][1:]
    lens = (5, 6) if short == 'objects' else (6, 5)
    match = r'mismatch: %d objects with %d context' % lens
    with pytest.raises(ValueError, match=match):
        Context.fromdict(d_invalid)
def test_fromdict_nonstring(d_invalid, nonstring):
    d_invalid[nonstring] = (42, ) + d_invalid[nonstring][1:]
    with pytest.raises(ValueError, match=r'non-string %s' % nonstring):
        Context.fromdict(d_invalid)
Beispiel #26
0
def buildLattice(pattern = True, inputFiles = "dec", inputAttributes = "arts"):
	if pattern == True:
		name = "WithPS"
	else:
		name = "WithoutPS"
	print inputFiles, inputAttributes, name
	#Le contexte a construire
	matrixAttribute = []
	#
	listFiles = []
	#Liste des fichiers lus pour construire le contexte
	if(inputFiles == "dec"):
		listAllFiles = fg.getAllDecisions()
	elif(inputFiles == "avis"):
		listAllFiles = fg.getAllAvis()
	elif(inputFiles == "all"):
		listAllFiles = fg.getAllFiles()
	else:
		print "choix non reconnu. Choix possibles : 'dec' 'avis' 'all'"
		listAllFiles = fg.getAllDecisions()
	#Nombre de fichiers lus
	lengthAllFiles = len(listAllFiles)
	#L'ensemble des attributs du contexte
	setOfAttributes = set()
	#L'ensemble des attributs modifiés du contexte
	setFormated = set()
	#L'expression régulière des attributs possibles des différents textes
	if (inputAttributes == "arts"):
		expre = expreAttribute()
	elif(inputAttributes == "artsdocs"):
		expre = expreAttribute()+'|'+regex.exprReguliereDecision()
	else:
		print "choix non reconnu. Choix possibles : 'arts' 'docs' 'artsdocs'"
		expre = expreAttribute()
	#Compteur de fichiers lus 
	i = 0
	#Lecture des fichiers pour lister les attributs
	for dfile in listAllFiles:
		f = open(dfile,'r')
		#Enlever les sauts de lignes dûs au copier/coller du pdf
		data = ' '.join([line.rstrip() for line in f])
		#Pour chaque expression trouvée dans le texte
		for m in re.finditer(expre, data):
			#Expression réguliere
			attributFormated = m.group(0)
			#Lissage de l'expression :
			#Enlever les accents
			attributFormated = regex.removeAccent(attributFormated)
			#Corriger les erreurs potentielles
			attributFormated = correctSyntaxe(attributFormated)
			attributFormated = regex.supprNumero(attributFormated)
			setOfAttributes.add(attributFormated)
		i = i + 0.5
		if i%100==0:
			print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
	#Modification des attributs pour éviter les doublons
	setOfAttributes = list(setOfAttributes)

	for item in setOfAttributes:
		setFormated.add(regex.formatArticle(item))
	if pattern == True:
		developAttributes = buildAttributes(setFormated)
		setFormated =  list(developAttributes)
	else:
		setFormated = list(setFormated)

	#Nombre d'attributs dans le contexte
	lenset = len(setFormated)
	print str(lenset)
	writeAttributes(setFormated,name)
	#Construction du contexte
	for dfile in listAllFiles:
		f = open(dfile, 'r')
		data = ' '.join([line.rstrip() for line in f])
		#Lister les documents pour la construction du contexte
		listFiles.append(regex.nomDocument(dfile))
		#Construction d'une ligne du contexte
		nuplet = (False,)*lenset
		listuple = list(nuplet)
		#Pour chaque expression
		for m in re.finditer(expre, data):
			attributFormated = m.group(0)
			#Formater l'expression régulière
			attributFormated = regex.removeAccent(attributFormated)
			attributFormated = correctSyntaxe(attributFormated)
			attributFormated = regex.supprNumero(attributFormated)
			attributFormated = regex.formatArticle(attributFormated)
			#Si pattern, on découpe chaque attribut
			if pattern == True:
				listAtt = developAttribute(attributFormated)
				for item in listAtt:
					#Trouver l'indice de l'attribut
					index = setFormated.index(item)
					#Mettre à jour la valeur
					listuple[index] = True
			#Sinon on cherche juste les attributs
			else:
				index = setFormated.index(attributFormated)
				listuple[index] = True

		i = i + 0.5
		if i%100==0:
			print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
		nuplet = tuple(listuple)
		#Ajoute le nouvel objet au contexte
		matrixAttribute.append(nuplet)
	print str(int(i))+' fichiers lus sur '+str(lengthAllFiles)
	#Sauvegarde les attributs dans un txt
	#sauvegarde le contexte dans un json
	exportContext(listFiles,setFormated,matrixAttribute,name)
	c = Context(listFiles,setFormated,matrixAttribute)
	print "construction de la lattice. Cela peut prendre quelques instants"
	c.lattice.graphviz(view=True)
	#Sauvegarde le contexte dans un txt
	writeConcepts(c.lattice,name)
	c.tofile('latticeEtContext/saveLatticeWithPS.txt',frmat='cxt',encoding='utf-8')
        if concept.properties:
            # dot.edge(name, name, taillabel=' '.join(concept.properties), labelangle='90', color='transparent')
            print("properties >", ' | '.join(concept.properties))
        # dot.edges((name, node_name(c)) for c in sorted(concept.lower_neighbors, key=sortkey))
        print("edges :")
        for i in sorted(concept.lower_neighbors, key=sortkey):
            print(name, "->", node_name(i))
            edgecount += 1

        print()
        print("nodes:", nodecount, "edges:", edgecount)

    # if render or view:
    #     dot.render(view=view)  # pragma: no cover
    # return dot


# c = Context.fromfile("test_files/tech_formal_context.csv",frmat="csv")
c = Context.fromfile("test_files/student_formal_context.csv", frmat="csv")
# max_e_len = 0
# for e,i in c.lattice:
#     if len(e) > max_e_len:
#         max_e_len = len(e)
for i, exin in enumerate(c.lattice):
    extent, intent = exin
    print("c" + str(i), ">", extent, "\t->", intent)
#
# c.lattice.graphviz(view=True,filename="temp_show.pdf")

# show_graph(c.lattice,filename="temp_show.pdf",directory="output_trees",view=True)
Beispiel #28
0
    outputCSVFile = open('train_output.csv', 'w+')
    wtr = csv.writer(outputCSVFile, delimiter=',', lineterminator='\n')
    for i in range(number_of_objects + 1):
        for j in range(number_of_columns + 1):
            if i == 0 and j == 0:
                output_matrix[i][j] = ''
            elif i == 0 and j > 0:
                output_matrix[i][j] = 'c' + str(j - 1)
            elif i > 0 and j == 0:
                output_matrix[i][j] = str(i - 1)
            else:
                output_matrix[i][j] = str(context_matrix[i - 1][j - 1])
        wtr.writerow(output_matrix[i])
    outputCSVFile.close()
    train_dict = {}
    c = Context.fromfile('train_output.csv', 'csv')

    # sys.stdout = open('output1.txt', 'w+')
    for extent, intent in c.lattice:
        #print('%r %r' % (extent, intent))
        # attribute_combinations = np.asarray(intent)
        if intent not in train_dict:
            count = 0
            extent_array = np.asarray(extent)
            for row in extent_array:
                if count == 0:
                    train_dict[intent] = [
                        int(float(tableCells[int(row)][number_of_columns]))
                    ]
                    count = count + 1
                else:
Beispiel #29
0
    def __init__(self,
                 dataframe,
                 leaves,
                 annotate=None,
                 dummy_formatter=None,
                 keep_names=True,
                 comp_prefix=None,
                 col_formatter=None,
                 na_value=None,
                 AOC=False,
                 collections=False,
                 verbose=True):
        """
        Arguments:
            dataframe (:class:`pandas:pandas.DataFrame`): A dataframe
            leaves (dict): Dictionnaire de microclasses
            annotate (dict): Extra annotations to add on lattice.
                Of the form: {<object label>:<annotation>}
            dummy_formatter (func): Function to make dummies from the table. (default to panda's)
            keep_names (bool): whether to keep original column names when dropping duplicate dummy columns.
            comp_prefix (str): If there are two sets of properties, the prefix used to distinguish column names.
            AOC (bool): Whether to limit ourselves to Attribute or Object Concepts.
            col_formatter (func): Function to format columns in the context table.
            na_value : A value tu use as "Na". Defaults to `None`
            collections (bool): Whether the table contains :class:`representations.patterns.PatternCollection` objects.
        """

        self.comp = comp_prefix  # whether there are two sets of properties.
        if na_value is not None:
            dataframe = dataframe.applymap(lambda x: None
                                           if x == na_value else x)
        if collections:
            dummies = to_dummies(dataframe, keep_names=keep_names)
        elif dummy_formatter:
            dummies = dummy_formatter(dataframe)
        else:
            dummies = pd.get_dummies(dataframe, prefix_sep="=")
            dummies = dummies.applymap(lambda x: "X" if x == 1 else "")
        if col_formatter:
            dummies.columns = col_formatter(dummies.columns)
        if verbose:
            print("Reading the context and building the lattice...")
        context_str = dummies.to_csv()
        c1 = Context.fromstring(context_str, frmat='csv')
        self.context = c1
        self.lattice = c1.lattice
        if annotate:
            for label in annotate:
                if label in self.lattice.supremum.extent:
                    self.lattice[[label
                                  ]]._extra_qumin_annotation = annotate[label]

        self.leaves = leaves
        if verbose:
            print("Converting to qumin node...")
        if AOC:
            self.nodes = self._lattice_to_nodeAOC()
        else:
            self.nodes = self._lattice_to_node()
        font = {'family': 'DejaVu Sans', 'weight': 'normal', 'size': 9}
        matplotlib.rc('font', **font)
Beispiel #30
0
def main():
    import os
    import sys
    import csv

    if not os.path.isdir(esbm_benchmark_path):
        print 'The esbm benchmark directory is required.'
        sys.exit(1)

    given_entities = esbm_benchmark_path + 'elist.txt'
    target_entities = set([])
    for row in open(given_entities):
        target_entities.add('<' + row.strip().split('\t')[2] + '>')

    for entity_idx in range(1, 141):

        if entity_idx > 100:
            targetKB = 'lmdb'
        else:
            targetKB = 'dbpedia'

        # One given entity description file
        entity_decriptions = esbm_benchmark_path + targetKB + '/' + str(
            entity_idx) + '/' + str(entity_idx) + '_desc.nt'

        # Creating a grid of formal concepts and save it as a CSV file
        if not os.path.isdir(fca_lattice_path):
            os.mkdir(fca_lattice_path)
        fcs_lattice_filename = fca_lattice_path + 'FCA_' + str(
            entity_idx) + '.csv'
        fcs_lattice_file = open(fcs_lattice_filename, 'w')

        sep = ':-:'

        property_set = set([])
        target_facts = set([])
        for row in open(entity_decriptions, 'r'):
            s = row.strip().split()[0]
            p = row.strip().split()[1]
            o = ' '.join(row.strip().split()[2:])[:-2]

            if s not in target_entities and o in target_entities:
                _s = s
                s = o + '[FALSE]'
                o = _s

            property_set.add(p)
            target_facts.add(s + sep + p + sep + o)
        property_list = list(property_set)
        property_list.insert(0, '')

        fca_csv = [property_list]

        final_rank = {}

        attribute_map = {}
        for spo in target_facts:
            default_score = 1
            s, p, o = spo.split(sep)
            s = s.replace('[FALSE]', '')

            # If there is less information available from the surface information, the score will be lower.
            for uninform_str in uninformative_values:
                if uninform_str in o:
                    default_score = 0

            if default_score > 0:

                # building attribute-token dict
                try:
                    attribute_map[p] = attribute_map[p] | extract_key_tokens(o)
                except KeyError:
                    attribute_map[p] = extract_key_tokens(o)

            final_rank[s + sep + p + sep + o] = default_score

        for spo, v in sorted(final_rank.items(),
                             key=lambda x: x[1],
                             reverse=True):
            tmp_fca_list = [''] * len(property_list)

            s, p, o = spo.split(sep)
            tmp_fca_list[0] = p + sep + o
            tmp_fca_list[property_list.index(p)] = 'X'

            for prop, tokens in attribute_map.items():
                for token in tokens:
                    if token in o.lower():
                        tmp_fca_list[property_list.index(prop)] = 'X'

            # print tmp_fca_list
            fca_csv.append(tmp_fca_list)

        with fcs_lattice_file:
            writer = csv.writer(fcs_lattice_file)
            writer.writerows(fca_csv)

        # Formal concept analysis
        from concepts import Context
        c = Context.fromfile(fcs_lattice_filename, frmat='csv')
        hierarchical_layer = 0
        for extents, intents in c.lattice:
            # print extents, intents
            for extent in extents:

                if final_rank[s + sep + extent] == 1:
                    final_rank[s + sep +
                               extent] = len(target_facts) - hierarchical_layer

            hierarchical_layer += 1

        # Generating result file
        if not os.path.isdir(kafca_final_result_path):
            os.mkdir(kafca_final_result_path)

        if not os.path.isdir(kafca_final_result_path + targetKB):
            os.mkdir(kafca_final_result_path + targetKB)

        output_filepath = kafca_final_result_path + targetKB + '/' + str(
            entity_idx) + '/'
        if not os.path.isdir(output_filepath):
            os.mkdir(output_filepath)

        fo_top5 = open(output_filepath + str(entity_idx) + '_top5.nt', 'wb')
        fo_top10 = open(output_filepath + str(entity_idx) + '_top10.nt', 'wb')
        fo_rank = open(output_filepath + str(entity_idx) + '_rank.nt', 'wb')

        chkcount = 0
        for spo, score in sorted(final_rank.items(),
                                 key=lambda x: x[1],
                                 reverse=True):
            s, p, o = spo.split(sep)

            if spo not in target_facts:
                _s = s
                s = o
                o = _s
            chkcount += 1

            try:
                fo_rank.write("%s %s %s .\n" % (s, p, o))
                fo_top10.write("%s %s %s .\n" % (s, p, o))
                fo_top5.write("%s %s %s .\n" % (s, p, o))
            except ValueError:
                pass

            if chkcount == 5:
                fo_top5.close()

            if chkcount == 10:
                fo_top10.close()

        fo_rank.close()
def test_fromdict_missing(d_invalid, missing):
    del d_invalid[missing]
    with pytest.raises(ValueError, match=r'missing .*%s' % missing):
        Context.fromdict(d_invalid, require_lattice=(missing == 'lattice'))
Beispiel #32
0
def extract_sumz():
	import os
	import csv
	import copy
	print 'summary'
	input_json = request.get_json(force=True)
	#print input_json
	print type(input_json)
	input_entity = input_json['entity']
	input_KB = input_json['KB']

	target_entity = set([])
	target_entity.add(input_entity)

	if not os.path.isdir(fca_lattice_path):
		os.mkdir(fca_lattice_path)
	fcs_lattice_filename = fca_lattice_path + 'FCA_' + input_entity + '.csv'
	fcs_lattice_file = open(fcs_lattice_filename, 'w')

	sep = ':-:'

	property_set = set([])
	target_facts = set([])
	for row in input_KB:
		s = row.strip().split()[0]
		p = row.strip().split()[1]
		o = ' '.join(row.strip().split()[2:])[:-2]

		if s not in target_entity and o in target_entity:
			_s = s
			s = o + '[FALSE]'
			o = _s

		property_set.add(p)
		target_facts.add(s + sep + p + sep + o)
	property_list = list(property_set)
	property_list.insert(0, '')

	fca_csv = [property_list]

	final_rank = {}

	attribute_map = {}
	for spo in target_facts:
		default_score = 1
		s, p, o = spo.split(sep)
		s = s.replace('[FALSE]', '')

		# If there is less information available from the surface information, the score will be lower.
		for uninform_str in uninformative_values:
			if uninform_str in o:
				default_score = 0

		if default_score > 0:

			# building attribute-token dict
			try:
				attribute_map[p] = attribute_map[p] | extract_key_tokens(o)
			except KeyError:
				attribute_map[p] = extract_key_tokens(o)

		final_rank[s + sep + p + sep + o] = default_score

	for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True):
		tmp_fca_list = [''] * len(property_list)

		s, p, o = spo.split(sep)
		tmp_fca_list[0] = p + sep + o
		tmp_fca_list[property_list.index(p)] = 'X'

		for prop, tokens in attribute_map.items():
			for token in tokens:
				if token in o.lower():
					tmp_fca_list[property_list.index(prop)] = 'X'

		# print tmp_fca_list
		fca_csv.append(tmp_fca_list)

	tmp_list = copy.deepcopy(fca_csv)
	with fcs_lattice_file:
		writer = csv.writer(fcs_lattice_file)
		for index, row in enumerate(fca_csv):
			for index_se, ele in enumerate(row):
				#print ele
				tmp_list[index][index_se] = ele.encode('utf-8')
		writer.writerows(tmp_list)

	# Formal concept analysis
	from concepts import Context
	#fcs_lattice_filename = './KAFCA_lattice/FCA_141.csv'
	c = Context.fromfile(fcs_lattice_filename, frmat='csv')
	hierarchical_layer = 0
	for extents, intents in c.lattice:
		#print extents, intents
		#f = open('text2.json', 'w')
		#json.dump(final_rank, f, ensure_ascii=False, indent=4)
		for extent in extents:
			try:
				extent_de = extent.decode('utf-8')
				if final_rank[s+sep+extent_de] == 1:
					final_rank[s+sep+extent_de] = len(target_facts) - hierarchical_layer
			except KeyError:
				print s+sep+extent_de
				continue

		hierarchical_layer += 1
	#print '-'*10
	#print final_rank.keys()

	os.remove(fcs_lattice_filename)
	result_top5 = []
	chkcount = 0
	for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True):
		s, p, o = spo.split(sep)

		if spo not in target_facts:
			_s = s
			s = o
			o = _s
		chkcount += 1

		result_top5.append(s+'\t'+p+'\t'+o)

		if chkcount == 5:
			break

	result = {}
	result['top5'] = result_top5

	return jsonify(result)
Beispiel #33
0
	print res
	return res
		

if __name__ == '__main__':
	animaux = ["Bat","Eagle","Monkey","Parrot fish","Penguin","Shark","Lantern fish"]
	proprietes = ["breathes in water","can fly","has beak","has hands","has skeleton","has wings","lives in water","is viviparous","produces light"]
	matrix = [
		(False, True, False, False, True, True, False, True, False), # Bat
		(False, True, True, False, True, True, False, False, False), # Eagle
		(False, False, False, True, True, False, False, True, False), # Monkey
		(True, False, True, False, True, False, True, False, False), # Parrot Fish
		(False, False, True, False, True, True, True, False, False), # Penguin
		(True, False, False, False, True, False, True, False, False), # Shark
		(True, False, False, False, True, False, True, False, True)] # Lantern Fish
	exportContext(animaux,proprietes,matrix)
	c = Context(animaux, proprietes, matrix)  # doctest: +ELLIPSIS
	'''clients = ['Anne','Basile','Carole']
	articles = ['fromage','vin','lait','lessive']
	matrix = [
		(True, False, True, False), #A
		(True, True, False, True), #B
		(True,False,True,True)] #C
	c = Context(clients, articles, matrix)'''
	#print c
	#c.lattice.graphviz(view=True)
	#for intent, extent in c.lattice:
		#print intent, extent
	c.tofile('animaux.txt',frmat='cxt',encoding='utf-8')
	writeConcepts(c.lattice)
Beispiel #34
0
from concepts import Context

c = Context()
#c = Context.fromfile('examples/digits.cxt')
c.additem('cat',['a','b','eats fish'])

#c.additem('cat',['eats fish'])
print c.tostring()