def build_iceberg_lattice(filename, lattice, threshold): irreducable = [] for i, (intent, extent) in enumerate(lattice): coverage = list(intent) if (len(intent) < threshold): continue is_irreducable = True for j, (intent1, extent1) in enumerate(lattice): if (j == i or len(intent1) < threshold or len(intent) <= len(intent1)): continue is_subset = True for obj in intent1: if (not (obj in intent)): is_subset = False break if is_subset: for obj in intent1: if obj in coverage: coverage.remove(obj) if (len(coverage) == 0): is_irreducable = False break if is_irreducable: irreducable.append((intent, extent)) #print intent, extent #print '\n' df = Definition() for intent, extent in irreducable: obj_name = ';'.join(intent) df.add_object(obj_name, list(extent)) conc = Context(*df) conc.tofile(filename='iceberg.' + filename, frmat='csv')
def test_fromstring_serialized(tmp_path, source, filename, includes_lattice): if filename is None: context = Context.fromstring(source, frmat='python-literal') else: target = tmp_path / filename kwargs = {'encoding': 'utf-8'} target.write_text(source, **kwargs) context = Context.fromfile(str(target), frmat='python-literal', **kwargs) assert context.objects == SERIALIZED['objects'] assert context.properties == SERIALIZED['properties'] assert context.bools == [ (True, False, False, True, False, True, True, False, False, True), (True, False, False, True, False, True, False, True, True, False), (False, True, True, False, False, True, True, False, False, True), (False, True, True, False, False, True, False, True, True, False), (False, True, False, True, True, False, True, False, False, True), (False, True, False, True, True, False, False, True, True, False) ] if includes_lattice: assert 'lattice' in context.__dict__ else: assert 'lattice' not in context.__dict__
def test_dict_roundtrip(context, ignore_lattice): context = Context(context.objects, context.properties, context.bools) assert 'lattice' not in context.__dict__ d = context.todict(ignore_lattice=ignore_lattice) assert isinstance(d, dict) and d assert all(d[k] for k in ('objects', 'properties', 'context')) if ignore_lattice or ignore_lattice is None: assert 'lattice' not in context.__dict__ assert 'lattice' not in d else: assert 'lattice' in context.__dict__ assert d['lattice'] result = Context.fromdict(d) assert isinstance(result, Context) assert result == context if ignore_lattice or ignore_lattice is None: assert 'lattice' not in result.__dict__ else: assert 'lattice' in result.__dict__ assert result.lattice._eq(context.lattice)
def __init__(self, csv_location): # the Frame-capability lattice self.context = Context.fromfile(csv_location, frmat='csv') self.lattice = self.context.lattice # the Frame-uncapability lattice self.context_inv = Context(*self.context.definition().inverted()) self.lattice_inv = self.context_inv.lattice # the list of all capabilities and frames self.capabilities = self.context.properties self.frames = self.context.objects
def test_todict(context, d): assert 'lattice' not in context.__dict__ if 'lattice' in d: context = Context(context.objects, context.properties, context.bools) assert 'lattice' not in context.__dict__ for ignore_lattice in (False, None): assert context.todict(ignore_lattice=ignore_lattice) == d assert 'lattice' in context.__dict__ else: for ignore_lattice in (True, None): assert context.todict(ignore_lattice=ignore_lattice) == d assert 'lattice' not in context.__dict__
def main(): #print sys.argv filename = str(sys.argv[1]) iceberg_threshold = int(sys.argv[2]) draw_iceberg = False cols_to_use = [] i = 3 cols_started = False while (i < len(sys.argv)): if (sys.argv[i][0] == '-'): cols_started = False if (sys.argv[i] == '-draw'): draw_iceberg = True elif (sys.argv[i] == '-columns'): cols_started = True elif (cols_started): cols_to_use.append(sys.argv[i]) i += 1 #print cols_to_use dataframe = pd.read_csv(filename) if (len(cols_to_use) > 0): dataframe = dataframe[[dataframe.columns[0]] + cols_to_use] col_info = pd.read_csv('cols.' + filename) transform_columns(dataframe, col_info) dataframe = dataframe.drop_duplicates(subset=list(dataframe.columns[0:1]), keep='first') dataframe.to_csv('transformed.' + filename, index_label=False, index=False) context = Context.fromfile('transformed.' + filename, frmat='csv') lattice_str = str(context.lattice.graphviz()) f = open('lattice.dot', 'w') f.write(lattice_str) f.close() #context.lattice.graphviz() build_iceberg_lattice(filename, context.lattice, iceberg_threshold) iceberg_context = Context.fromfile(filename='iceberg.' + filename, frmat='csv') if draw_iceberg: iceberg_context.lattice.graphviz(view=True) lattice_str = str(iceberg_context.lattice.graphviz()) f = open('iceberg.dot', 'w') f.write(lattice_str) f.close() implication_basis = find_implication_basis(iceberg_context) print "Implication basis: " for i, e in implication_basis: print str(i) + " => " + str(e)
def test_json_roundtrip(context, path_or_fileobj, encoding): context = Context(context.objects, context.properties, context.bools) assert 'lattice' not in context.__dict__ is_fileobj = hasattr(path_or_fileobj, 'seek') kwargs = {'encoding': encoding} if encoding is not None else {} context.tojson(path_or_fileobj, ignore_lattice=True, **kwargs) if is_fileobj: path_or_fileobj.seek(0) assert 'lattice' not in context.__dict__ deserialized = Context.fromjson(path_or_fileobj, **kwargs) if is_fileobj: path_or_fileobj.seek(0) assert 'lattice' not in deserialized.__dict__ assert deserialized == context assert isinstance(context.lattice, Lattice) assert 'lattice' in context.__dict__ context.tojson(path_or_fileobj, ignore_lattice=None, **kwargs) if is_fileobj: path_or_fileobj.seek(0) deserialized = Context.fromjson(path_or_fileobj, **kwargs) assert 'lattice' in deserialized.__dict__ assert deserialized == context assert deserialized.lattice._eq(context.lattice)
def extract_concepts_in_order(objs, order:Order) -> [(set, set)]: var_idx = list(set(itertools.chain.from_iterable(variables for variables in objs.values()))) context_def = [ [var_idx.index(variable) for variable in variables] for (obj, variables) in objs.items() ] ctx = Context.fromdict({'objects': list(objs), 'properties': var_idx, 'context': context_def}) def linksof(c) -> set: "edges covered by the given concept" return set(itertools.product(c.extent, c.intent)) concepts_cover = {c: linksof(c) for c in iter(ctx.lattice)} treated = set() # set of edges already described def def_Random(concepts): return random.choice(tuple(concepts)) def def_LargestCoverFirst(concepts): return max(concepts.keys(), key=lambda c: len(linksof(c) - treated)) def def_LargestExtentFirst(concepts): return max(concepts.keys(), key=lambda c: len(c.extent)) def def_LargestIntentFirst(concepts): return max(concepts.keys(), key=lambda c: len(c.intent)) def def_LargestExtentOrIntentFirst(concepts): return max(concepts.keys(), key=lambda c: max(len(c.extent), len(c.intent))) while concepts_cover: best = locals()['def_' + order.name](concepts_cover) simplified_best = simplify_concept(best, treated) treated |= linksof(best) concepts_cover = {c: linksof(c) - treated for c in concepts_cover} concepts_cover = {c: links for c, links in concepts_cover.items() if len(links) > 0} if not simplified_best[0] or not simplified_best[1]: continue # ignore the extremums yield simplified_best
def render_all(filepattern='*.cxt', frmat=None, encoding=None, directory=None, out_format=None): from concepts import Context if directory is not None: get_name = lambda filename: os.path.basename(filename) else: get_name = lambda filename: filename if frmat is None: from concepts.formats import Format get_frmat = Format.by_extension.get else: get_frmat = lambda filename: frmat for cxtfile in glob.glob(filepattern): name, ext = os.path.splitext(cxtfile) filename = '%s.gv' % get_name(name) c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding) l = c.lattice dot = l.graphviz(filename, directory) if out_format is not None: dot.format = out_format dot.render()
def local_fca(self, molecules): props = self.get_properties(molecules) props = list(set(props)) sub = list(molecules[0].subjects())[0] molecule_properties = [ str(prop.encode('utf-8')) + '->' + str(value.encode('utf-8')) for prop, value in props ] molecule_names = [ "{}_{}".format(str(sub.encode('utf-8')), y) for y in [2014, 2015, 2016] ] mat = [] for molecule in molecules: row = [False] * len(props) for idx, (prop, val) in enumerate(props): if (sub, prop, val) in molecule: row[idx] = True mat.append(row) c = Context(molecule_names, molecule_properties, mat) res = c.lattice result = [] for (extent, intent) in res: result.append((list(extent), list(intent))) return result
def test_fromdict_raw(context, lattice, d, raw): def shuffled(items): result = list(items) random.shuffle(result) return result _lattice = d.get('lattice') d = { 'objects': d['objects'], 'properties': d['properties'], 'context': [shuffled(intent) for intent in d['context']] } if _lattice is not None: pairs = shuffled(enumerate(_lattice)) index_map = {old: new for new, (old, _) in enumerate(pairs)} d['lattice'] = [(shuffled(ex), shuffled(in_), shuffled(index_map[i] for i in up), shuffled(index_map[i] for i in lo)) for _, (ex, in_, up, lo) in pairs] result = Context.fromdict(d, raw=raw) assert isinstance(result, Context) assert result == context if _lattice is not None: if raw: assert result.lattice._eq(lattice) else: # instance broken by shuffled(d['lattice']) assert not result.lattice._eq(lattice)
def dictToConcept(data_matrix): """ From dictionnary to concepts """ definition = concepts.Definition() for (current_obj, current_values) in data_matrix.items(): definition.add_object(current_obj, current_values) context = Context(*definition) lattice = context.lattice return context, lattice
def nonascii_context(abba=(u'Agneta F\xe4ltskog', u'Anni-Frid Lyngstat', u'Benny Andersson', u'Bj\xf6rn Ulvaeus')): d = Definition() for o in abba: d.add_object(o, [u'human', u'singer']) d.add_property(u'female', abba[:2]) d.add_property(u'male', abba[2:]) d.add_property(u'keyboarder', [abba[2]]) d.add_property(u'guitarrist', [abba[3]]) d.add_property(u'sch\xf6n', abba[::2]) return Context(*d)
def generate_concept_matrix(filename, skill_list=None, render=False): # applying fca c = Context.fromfile(filename, frmat="csv") if render: c.lattice.graphviz(filename=filename.rstrip(".csv"), view=True) # reading csv headers csvfile = open(filename) csvreader = csv.reader(csvfile) # reading skills if skill_list is None: skill_list = csvreader.__next__() skill_list.pop(0) else: csvreader.__next__() # reading abstract names row_header = list() for row in csvreader: row_header.append(row[0]) csvfile.close() # matrix to return mat = list() for i, concept in enumerate(c.lattice): extent, intent = concept # skip for non-significant concept if len(extent) == 0 or len(intent) == 0: continue print("c{} = {} > {}".format(i, extent, intent)) row = list() for skill in skill_list: if skill in intent: row.append(1) else: row.append(0) for header in row_header: if header in extent: row.append(1) else: row.append(0) mat.append(row) return mat, row_header, skill_list
def test_fromdict(context, lattice, d, require_lattice, ignore_lattice, raw): if require_lattice and 'lattice' not in d: return result = Context.fromdict(d, require_lattice=require_lattice, ignore_lattice=ignore_lattice, raw=raw) assert result == context if ignore_lattice or 'lattice' not in d: assert 'lattice' not in result.__dict__ else: assert 'lattice' in result.__dict__ assert result.lattice._eq(lattice)
def predict_fca(s): properties = s.index.values objects = [str(s.name)] bools = tuple(s.astype(bool)) s_lattice = Context(objects, properties, [bools]) s_intents = set() for extent_s, intent_s in s_lattice.lattice: s_intents.add(intent_s) sets = set(list(s_intents)[1]) probs = [] for i in range(0, no_of_classes): for intent_c in class_intents_sets[i]: setc = set(intent_c) if sets.issubset(setc): probs.append(i) if len(probs) == 0: return -1 return max(probs, key=probs.count)
def test_json_roundtrip_nonascii_context(nonascii_context, encoding): assert isinstance(nonascii_context.lattice, Lattice) assert 'lattice' in nonascii_context.__dict__ kwargs = {'encoding': encoding} if encoding is not None else {} with io.StringIO() as f: nonascii_context.tojson(f, **kwargs) serialized = f.getvalue() f.seek(0) deserialized = Context.fromjson(f, **kwargs) assert 'lattice' in deserialized.__dict__ assert deserialized == nonascii_context assert deserialized.lattice._eq(nonascii_context.lattice) assert '"Agneta F\\u00e4ltskog"' in serialized assert '"Bj\\u00f6rn Ulvaeus"' in serialized assert '"sch\\u00f6n"' in serialized
def generate_concept_matrix(filename, skill_list=None): # applying fca c = Context.fromfile(filename, frmat="csv") # reading csv headers csvfile = open(filename) csvreader = csv.reader(csvfile) # reading skills if skill_list is None: skill_list = csvreader.__next__() skill_list.pop(0) else: csvreader.__next__() # reading abstract names row_header = list() for row in csvreader: row_header.append(row[0]) csvfile.close() # matrix to return mat = list() for extent, intent in c.lattice: print("{} > {}".format(extent, intent)) row = list() for skill in skill_list: if skill in intent: row.append(1) else: row.append(0) for header in row_header: if header in extent: row.append(1) else: row.append(0) mat.append(row) return mat, row_header, skill_list
# Creating and save context for implication rules X_train_one_hot['Class'] = y_train X_train_Class_split = pd.concat([ X_train_one_hot, pd.get_dummies(X_train_one_hot['Class'], prefix='Class') ], axis=1) X_train_Class_split = X_train_Class_split.drop(["Class"], axis=1).drop_duplicates() objects = X_train_Class_split.index.values objects = [str(oi) for oi in objects] properties = X_train_Class_split.columns.values bools = list( X_train_Class_split.astype(bool).itertuples(index=False, name=None)) cxt = Context(objects, properties, bools) cxt.tofile('diabetes_context.cxt', frmat='cxt', encoding='utf-8') ## Create concepts lattices for each class c = {} l = {} no_of_classes = 2 X_train_one_hot['Class'] = y_train X_train_one_hot = X_train_one_hot.drop_duplicates() for i in range(0, no_of_classes): X_temp = X_train_one_hot.copy(deep=True) X_temp = X_temp[X_temp['Class'] == i].drop(["Class"], axis=1) objects = X_temp.index.values objects = [str(oi) for oi in objects] properties = X_temp.columns.values
def test_fromdict_empty_lattice(d_invalid): d_invalid['lattice'] = [] with pytest.raises(ValueError, match='empty lattice'): Context.fromdict(d_invalid)
def test_fromdict_context_invalid_index(d_invalid): first = d_invalid['context'][0] d_invalid['context'][0] = (42, ) + first[1:] with pytest.raises(ValueError, match='invalid index'): Context.fromdict(d_invalid)
def test_fromdict_context_duplicates(d_invalid): first = d_invalid['context'][0] d_invalid['context'][0] = (first[0], first[0]) + first[2:] with pytest.raises(ValueError, match='duplicate'): Context.fromdict(d_invalid)
def test_fromdict_mismatch(d_invalid, short): d_invalid[short] = d_invalid[short][1:] lens = (5, 6) if short == 'objects' else (6, 5) match = r'mismatch: %d objects with %d context' % lens with pytest.raises(ValueError, match=match): Context.fromdict(d_invalid)
def test_fromdict_nonstring(d_invalid, nonstring): d_invalid[nonstring] = (42, ) + d_invalid[nonstring][1:] with pytest.raises(ValueError, match=r'non-string %s' % nonstring): Context.fromdict(d_invalid)
def buildLattice(pattern = True, inputFiles = "dec", inputAttributes = "arts"): if pattern == True: name = "WithPS" else: name = "WithoutPS" print inputFiles, inputAttributes, name #Le contexte a construire matrixAttribute = [] # listFiles = [] #Liste des fichiers lus pour construire le contexte if(inputFiles == "dec"): listAllFiles = fg.getAllDecisions() elif(inputFiles == "avis"): listAllFiles = fg.getAllAvis() elif(inputFiles == "all"): listAllFiles = fg.getAllFiles() else: print "choix non reconnu. Choix possibles : 'dec' 'avis' 'all'" listAllFiles = fg.getAllDecisions() #Nombre de fichiers lus lengthAllFiles = len(listAllFiles) #L'ensemble des attributs du contexte setOfAttributes = set() #L'ensemble des attributs modifiés du contexte setFormated = set() #L'expression régulière des attributs possibles des différents textes if (inputAttributes == "arts"): expre = expreAttribute() elif(inputAttributes == "artsdocs"): expre = expreAttribute()+'|'+regex.exprReguliereDecision() else: print "choix non reconnu. Choix possibles : 'arts' 'docs' 'artsdocs'" expre = expreAttribute() #Compteur de fichiers lus i = 0 #Lecture des fichiers pour lister les attributs for dfile in listAllFiles: f = open(dfile,'r') #Enlever les sauts de lignes dûs au copier/coller du pdf data = ' '.join([line.rstrip() for line in f]) #Pour chaque expression trouvée dans le texte for m in re.finditer(expre, data): #Expression réguliere attributFormated = m.group(0) #Lissage de l'expression : #Enlever les accents attributFormated = regex.removeAccent(attributFormated) #Corriger les erreurs potentielles attributFormated = correctSyntaxe(attributFormated) attributFormated = regex.supprNumero(attributFormated) setOfAttributes.add(attributFormated) i = i + 0.5 if i%100==0: print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) #Modification des attributs pour éviter les doublons setOfAttributes = list(setOfAttributes) for item in setOfAttributes: setFormated.add(regex.formatArticle(item)) if pattern == True: developAttributes = buildAttributes(setFormated) setFormated = list(developAttributes) else: setFormated = list(setFormated) #Nombre d'attributs dans le contexte lenset = len(setFormated) print str(lenset) writeAttributes(setFormated,name) #Construction du contexte for dfile in listAllFiles: f = open(dfile, 'r') data = ' '.join([line.rstrip() for line in f]) #Lister les documents pour la construction du contexte listFiles.append(regex.nomDocument(dfile)) #Construction d'une ligne du contexte nuplet = (False,)*lenset listuple = list(nuplet) #Pour chaque expression for m in re.finditer(expre, data): attributFormated = m.group(0) #Formater l'expression régulière attributFormated = regex.removeAccent(attributFormated) attributFormated = correctSyntaxe(attributFormated) attributFormated = regex.supprNumero(attributFormated) attributFormated = regex.formatArticle(attributFormated) #Si pattern, on découpe chaque attribut if pattern == True: listAtt = developAttribute(attributFormated) for item in listAtt: #Trouver l'indice de l'attribut index = setFormated.index(item) #Mettre à jour la valeur listuple[index] = True #Sinon on cherche juste les attributs else: index = setFormated.index(attributFormated) listuple[index] = True i = i + 0.5 if i%100==0: print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) nuplet = tuple(listuple) #Ajoute le nouvel objet au contexte matrixAttribute.append(nuplet) print str(int(i))+' fichiers lus sur '+str(lengthAllFiles) #Sauvegarde les attributs dans un txt #sauvegarde le contexte dans un json exportContext(listFiles,setFormated,matrixAttribute,name) c = Context(listFiles,setFormated,matrixAttribute) print "construction de la lattice. Cela peut prendre quelques instants" c.lattice.graphviz(view=True) #Sauvegarde le contexte dans un txt writeConcepts(c.lattice,name) c.tofile('latticeEtContext/saveLatticeWithPS.txt',frmat='cxt',encoding='utf-8')
if concept.properties: # dot.edge(name, name, taillabel=' '.join(concept.properties), labelangle='90', color='transparent') print("properties >", ' | '.join(concept.properties)) # dot.edges((name, node_name(c)) for c in sorted(concept.lower_neighbors, key=sortkey)) print("edges :") for i in sorted(concept.lower_neighbors, key=sortkey): print(name, "->", node_name(i)) edgecount += 1 print() print("nodes:", nodecount, "edges:", edgecount) # if render or view: # dot.render(view=view) # pragma: no cover # return dot # c = Context.fromfile("test_files/tech_formal_context.csv",frmat="csv") c = Context.fromfile("test_files/student_formal_context.csv", frmat="csv") # max_e_len = 0 # for e,i in c.lattice: # if len(e) > max_e_len: # max_e_len = len(e) for i, exin in enumerate(c.lattice): extent, intent = exin print("c" + str(i), ">", extent, "\t->", intent) # # c.lattice.graphviz(view=True,filename="temp_show.pdf") # show_graph(c.lattice,filename="temp_show.pdf",directory="output_trees",view=True)
outputCSVFile = open('train_output.csv', 'w+') wtr = csv.writer(outputCSVFile, delimiter=',', lineterminator='\n') for i in range(number_of_objects + 1): for j in range(number_of_columns + 1): if i == 0 and j == 0: output_matrix[i][j] = '' elif i == 0 and j > 0: output_matrix[i][j] = 'c' + str(j - 1) elif i > 0 and j == 0: output_matrix[i][j] = str(i - 1) else: output_matrix[i][j] = str(context_matrix[i - 1][j - 1]) wtr.writerow(output_matrix[i]) outputCSVFile.close() train_dict = {} c = Context.fromfile('train_output.csv', 'csv') # sys.stdout = open('output1.txt', 'w+') for extent, intent in c.lattice: #print('%r %r' % (extent, intent)) # attribute_combinations = np.asarray(intent) if intent not in train_dict: count = 0 extent_array = np.asarray(extent) for row in extent_array: if count == 0: train_dict[intent] = [ int(float(tableCells[int(row)][number_of_columns])) ] count = count + 1 else:
def __init__(self, dataframe, leaves, annotate=None, dummy_formatter=None, keep_names=True, comp_prefix=None, col_formatter=None, na_value=None, AOC=False, collections=False, verbose=True): """ Arguments: dataframe (:class:`pandas:pandas.DataFrame`): A dataframe leaves (dict): Dictionnaire de microclasses annotate (dict): Extra annotations to add on lattice. Of the form: {<object label>:<annotation>} dummy_formatter (func): Function to make dummies from the table. (default to panda's) keep_names (bool): whether to keep original column names when dropping duplicate dummy columns. comp_prefix (str): If there are two sets of properties, the prefix used to distinguish column names. AOC (bool): Whether to limit ourselves to Attribute or Object Concepts. col_formatter (func): Function to format columns in the context table. na_value : A value tu use as "Na". Defaults to `None` collections (bool): Whether the table contains :class:`representations.patterns.PatternCollection` objects. """ self.comp = comp_prefix # whether there are two sets of properties. if na_value is not None: dataframe = dataframe.applymap(lambda x: None if x == na_value else x) if collections: dummies = to_dummies(dataframe, keep_names=keep_names) elif dummy_formatter: dummies = dummy_formatter(dataframe) else: dummies = pd.get_dummies(dataframe, prefix_sep="=") dummies = dummies.applymap(lambda x: "X" if x == 1 else "") if col_formatter: dummies.columns = col_formatter(dummies.columns) if verbose: print("Reading the context and building the lattice...") context_str = dummies.to_csv() c1 = Context.fromstring(context_str, frmat='csv') self.context = c1 self.lattice = c1.lattice if annotate: for label in annotate: if label in self.lattice.supremum.extent: self.lattice[[label ]]._extra_qumin_annotation = annotate[label] self.leaves = leaves if verbose: print("Converting to qumin node...") if AOC: self.nodes = self._lattice_to_nodeAOC() else: self.nodes = self._lattice_to_node() font = {'family': 'DejaVu Sans', 'weight': 'normal', 'size': 9} matplotlib.rc('font', **font)
def main(): import os import sys import csv if not os.path.isdir(esbm_benchmark_path): print 'The esbm benchmark directory is required.' sys.exit(1) given_entities = esbm_benchmark_path + 'elist.txt' target_entities = set([]) for row in open(given_entities): target_entities.add('<' + row.strip().split('\t')[2] + '>') for entity_idx in range(1, 141): if entity_idx > 100: targetKB = 'lmdb' else: targetKB = 'dbpedia' # One given entity description file entity_decriptions = esbm_benchmark_path + targetKB + '/' + str( entity_idx) + '/' + str(entity_idx) + '_desc.nt' # Creating a grid of formal concepts and save it as a CSV file if not os.path.isdir(fca_lattice_path): os.mkdir(fca_lattice_path) fcs_lattice_filename = fca_lattice_path + 'FCA_' + str( entity_idx) + '.csv' fcs_lattice_file = open(fcs_lattice_filename, 'w') sep = ':-:' property_set = set([]) target_facts = set([]) for row in open(entity_decriptions, 'r'): s = row.strip().split()[0] p = row.strip().split()[1] o = ' '.join(row.strip().split()[2:])[:-2] if s not in target_entities and o in target_entities: _s = s s = o + '[FALSE]' o = _s property_set.add(p) target_facts.add(s + sep + p + sep + o) property_list = list(property_set) property_list.insert(0, '') fca_csv = [property_list] final_rank = {} attribute_map = {} for spo in target_facts: default_score = 1 s, p, o = spo.split(sep) s = s.replace('[FALSE]', '') # If there is less information available from the surface information, the score will be lower. for uninform_str in uninformative_values: if uninform_str in o: default_score = 0 if default_score > 0: # building attribute-token dict try: attribute_map[p] = attribute_map[p] | extract_key_tokens(o) except KeyError: attribute_map[p] = extract_key_tokens(o) final_rank[s + sep + p + sep + o] = default_score for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): tmp_fca_list = [''] * len(property_list) s, p, o = spo.split(sep) tmp_fca_list[0] = p + sep + o tmp_fca_list[property_list.index(p)] = 'X' for prop, tokens in attribute_map.items(): for token in tokens: if token in o.lower(): tmp_fca_list[property_list.index(prop)] = 'X' # print tmp_fca_list fca_csv.append(tmp_fca_list) with fcs_lattice_file: writer = csv.writer(fcs_lattice_file) writer.writerows(fca_csv) # Formal concept analysis from concepts import Context c = Context.fromfile(fcs_lattice_filename, frmat='csv') hierarchical_layer = 0 for extents, intents in c.lattice: # print extents, intents for extent in extents: if final_rank[s + sep + extent] == 1: final_rank[s + sep + extent] = len(target_facts) - hierarchical_layer hierarchical_layer += 1 # Generating result file if not os.path.isdir(kafca_final_result_path): os.mkdir(kafca_final_result_path) if not os.path.isdir(kafca_final_result_path + targetKB): os.mkdir(kafca_final_result_path + targetKB) output_filepath = kafca_final_result_path + targetKB + '/' + str( entity_idx) + '/' if not os.path.isdir(output_filepath): os.mkdir(output_filepath) fo_top5 = open(output_filepath + str(entity_idx) + '_top5.nt', 'wb') fo_top10 = open(output_filepath + str(entity_idx) + '_top10.nt', 'wb') fo_rank = open(output_filepath + str(entity_idx) + '_rank.nt', 'wb') chkcount = 0 for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): s, p, o = spo.split(sep) if spo not in target_facts: _s = s s = o o = _s chkcount += 1 try: fo_rank.write("%s %s %s .\n" % (s, p, o)) fo_top10.write("%s %s %s .\n" % (s, p, o)) fo_top5.write("%s %s %s .\n" % (s, p, o)) except ValueError: pass if chkcount == 5: fo_top5.close() if chkcount == 10: fo_top10.close() fo_rank.close()
def test_fromdict_missing(d_invalid, missing): del d_invalid[missing] with pytest.raises(ValueError, match=r'missing .*%s' % missing): Context.fromdict(d_invalid, require_lattice=(missing == 'lattice'))
def extract_sumz(): import os import csv import copy print 'summary' input_json = request.get_json(force=True) #print input_json print type(input_json) input_entity = input_json['entity'] input_KB = input_json['KB'] target_entity = set([]) target_entity.add(input_entity) if not os.path.isdir(fca_lattice_path): os.mkdir(fca_lattice_path) fcs_lattice_filename = fca_lattice_path + 'FCA_' + input_entity + '.csv' fcs_lattice_file = open(fcs_lattice_filename, 'w') sep = ':-:' property_set = set([]) target_facts = set([]) for row in input_KB: s = row.strip().split()[0] p = row.strip().split()[1] o = ' '.join(row.strip().split()[2:])[:-2] if s not in target_entity and o in target_entity: _s = s s = o + '[FALSE]' o = _s property_set.add(p) target_facts.add(s + sep + p + sep + o) property_list = list(property_set) property_list.insert(0, '') fca_csv = [property_list] final_rank = {} attribute_map = {} for spo in target_facts: default_score = 1 s, p, o = spo.split(sep) s = s.replace('[FALSE]', '') # If there is less information available from the surface information, the score will be lower. for uninform_str in uninformative_values: if uninform_str in o: default_score = 0 if default_score > 0: # building attribute-token dict try: attribute_map[p] = attribute_map[p] | extract_key_tokens(o) except KeyError: attribute_map[p] = extract_key_tokens(o) final_rank[s + sep + p + sep + o] = default_score for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): tmp_fca_list = [''] * len(property_list) s, p, o = spo.split(sep) tmp_fca_list[0] = p + sep + o tmp_fca_list[property_list.index(p)] = 'X' for prop, tokens in attribute_map.items(): for token in tokens: if token in o.lower(): tmp_fca_list[property_list.index(prop)] = 'X' # print tmp_fca_list fca_csv.append(tmp_fca_list) tmp_list = copy.deepcopy(fca_csv) with fcs_lattice_file: writer = csv.writer(fcs_lattice_file) for index, row in enumerate(fca_csv): for index_se, ele in enumerate(row): #print ele tmp_list[index][index_se] = ele.encode('utf-8') writer.writerows(tmp_list) # Formal concept analysis from concepts import Context #fcs_lattice_filename = './KAFCA_lattice/FCA_141.csv' c = Context.fromfile(fcs_lattice_filename, frmat='csv') hierarchical_layer = 0 for extents, intents in c.lattice: #print extents, intents #f = open('text2.json', 'w') #json.dump(final_rank, f, ensure_ascii=False, indent=4) for extent in extents: try: extent_de = extent.decode('utf-8') if final_rank[s+sep+extent_de] == 1: final_rank[s+sep+extent_de] = len(target_facts) - hierarchical_layer except KeyError: print s+sep+extent_de continue hierarchical_layer += 1 #print '-'*10 #print final_rank.keys() os.remove(fcs_lattice_filename) result_top5 = [] chkcount = 0 for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): s, p, o = spo.split(sep) if spo not in target_facts: _s = s s = o o = _s chkcount += 1 result_top5.append(s+'\t'+p+'\t'+o) if chkcount == 5: break result = {} result['top5'] = result_top5 return jsonify(result)
print res return res if __name__ == '__main__': animaux = ["Bat","Eagle","Monkey","Parrot fish","Penguin","Shark","Lantern fish"] proprietes = ["breathes in water","can fly","has beak","has hands","has skeleton","has wings","lives in water","is viviparous","produces light"] matrix = [ (False, True, False, False, True, True, False, True, False), # Bat (False, True, True, False, True, True, False, False, False), # Eagle (False, False, False, True, True, False, False, True, False), # Monkey (True, False, True, False, True, False, True, False, False), # Parrot Fish (False, False, True, False, True, True, True, False, False), # Penguin (True, False, False, False, True, False, True, False, False), # Shark (True, False, False, False, True, False, True, False, True)] # Lantern Fish exportContext(animaux,proprietes,matrix) c = Context(animaux, proprietes, matrix) # doctest: +ELLIPSIS '''clients = ['Anne','Basile','Carole'] articles = ['fromage','vin','lait','lessive'] matrix = [ (True, False, True, False), #A (True, True, False, True), #B (True,False,True,True)] #C c = Context(clients, articles, matrix)''' #print c #c.lattice.graphviz(view=True) #for intent, extent in c.lattice: #print intent, extent c.tofile('animaux.txt',frmat='cxt',encoding='utf-8') writeConcepts(c.lattice)
from concepts import Context c = Context() #c = Context.fromfile('examples/digits.cxt') c.additem('cat',['a','b','eats fish']) #c.additem('cat',['eats fish']) print c.tostring()