def main(): #print sys.argv filename = str(sys.argv[1]) iceberg_threshold = int(sys.argv[2]) draw_iceberg = False cols_to_use = [] i = 3 cols_started = False while (i < len(sys.argv)): if (sys.argv[i][0] == '-'): cols_started = False if (sys.argv[i] == '-draw'): draw_iceberg = True elif (sys.argv[i] == '-columns'): cols_started = True elif (cols_started): cols_to_use.append(sys.argv[i]) i += 1 #print cols_to_use dataframe = pd.read_csv(filename) if (len(cols_to_use) > 0): dataframe = dataframe[[dataframe.columns[0]] + cols_to_use] col_info = pd.read_csv('cols.' + filename) transform_columns(dataframe, col_info) dataframe = dataframe.drop_duplicates(subset=list(dataframe.columns[0:1]), keep='first') dataframe.to_csv('transformed.' + filename, index_label=False, index=False) context = Context.fromfile('transformed.' + filename, frmat='csv') lattice_str = str(context.lattice.graphviz()) f = open('lattice.dot', 'w') f.write(lattice_str) f.close() #context.lattice.graphviz() build_iceberg_lattice(filename, context.lattice, iceberg_threshold) iceberg_context = Context.fromfile(filename='iceberg.' + filename, frmat='csv') if draw_iceberg: iceberg_context.lattice.graphviz(view=True) lattice_str = str(iceberg_context.lattice.graphviz()) f = open('iceberg.dot', 'w') f.write(lattice_str) f.close() implication_basis = find_implication_basis(iceberg_context) print "Implication basis: " for i, e in implication_basis: print str(i) + " => " + str(e)
def render_all(filepattern='*.cxt', frmat=None, encoding=None, directory=None, out_format=None): from concepts import Context if directory is not None: get_name = lambda filename: os.path.basename(filename) else: get_name = lambda filename: filename if frmat is None: from concepts.formats import Format get_frmat = Format.by_extension.get else: get_frmat = lambda filename: frmat for cxtfile in glob.glob(filepattern): name, ext = os.path.splitext(cxtfile) filename = '%s.gv' % get_name(name) c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding) l = c.lattice dot = l.graphviz(filename, directory) if out_format is not None: dot.format = out_format dot.render()
def test_fromstring_serialized(tmp_path, source, filename, includes_lattice): if filename is None: context = Context.fromstring(source, frmat='python-literal') else: target = tmp_path / filename kwargs = {'encoding': 'utf-8'} target.write_text(source, **kwargs) context = Context.fromfile(str(target), frmat='python-literal', **kwargs) assert context.objects == SERIALIZED['objects'] assert context.properties == SERIALIZED['properties'] assert context.bools == [ (True, False, False, True, False, True, True, False, False, True), (True, False, False, True, False, True, False, True, True, False), (False, True, True, False, False, True, True, False, False, True), (False, True, True, False, False, True, False, True, True, False), (False, True, False, True, True, False, True, False, False, True), (False, True, False, True, True, False, False, True, True, False) ] if includes_lattice: assert 'lattice' in context.__dict__ else: assert 'lattice' not in context.__dict__
def __init__(self, csv_location): # the Frame-capability lattice self.context = Context.fromfile(csv_location, frmat='csv') self.lattice = self.context.lattice # the Frame-uncapability lattice self.context_inv = Context(*self.context.definition().inverted()) self.lattice_inv = self.context_inv.lattice # the list of all capabilities and frames self.capabilities = self.context.properties self.frames = self.context.objects
def generate_concept_matrix(filename, skill_list=None, render=False): # applying fca c = Context.fromfile(filename, frmat="csv") if render: c.lattice.graphviz(filename=filename.rstrip(".csv"), view=True) # reading csv headers csvfile = open(filename) csvreader = csv.reader(csvfile) # reading skills if skill_list is None: skill_list = csvreader.__next__() skill_list.pop(0) else: csvreader.__next__() # reading abstract names row_header = list() for row in csvreader: row_header.append(row[0]) csvfile.close() # matrix to return mat = list() for i, concept in enumerate(c.lattice): extent, intent = concept # skip for non-significant concept if len(extent) == 0 or len(intent) == 0: continue print("c{} = {} > {}".format(i, extent, intent)) row = list() for skill in skill_list: if skill in intent: row.append(1) else: row.append(0) for header in row_header: if header in extent: row.append(1) else: row.append(0) mat.append(row) return mat, row_header, skill_list
def generate_concept_matrix(filename, skill_list=None): # applying fca c = Context.fromfile(filename, frmat="csv") # reading csv headers csvfile = open(filename) csvreader = csv.reader(csvfile) # reading skills if skill_list is None: skill_list = csvreader.__next__() skill_list.pop(0) else: csvreader.__next__() # reading abstract names row_header = list() for row in csvreader: row_header.append(row[0]) csvfile.close() # matrix to return mat = list() for extent, intent in c.lattice: print("{} > {}".format(extent, intent)) row = list() for skill in skill_list: if skill in intent: row.append(1) else: row.append(0) for header in row_header: if header in extent: row.append(1) else: row.append(0) mat.append(row) return mat, row_header, skill_list
if concept.properties: # dot.edge(name, name, taillabel=' '.join(concept.properties), labelangle='90', color='transparent') print("properties >", ' | '.join(concept.properties)) # dot.edges((name, node_name(c)) for c in sorted(concept.lower_neighbors, key=sortkey)) print("edges :") for i in sorted(concept.lower_neighbors, key=sortkey): print(name, "->", node_name(i)) edgecount += 1 print() print("nodes:", nodecount, "edges:", edgecount) # if render or view: # dot.render(view=view) # pragma: no cover # return dot # c = Context.fromfile("test_files/tech_formal_context.csv",frmat="csv") c = Context.fromfile("test_files/student_formal_context.csv", frmat="csv") # max_e_len = 0 # for e,i in c.lattice: # if len(e) > max_e_len: # max_e_len = len(e) for i, exin in enumerate(c.lattice): extent, intent = exin print("c" + str(i), ">", extent, "\t->", intent) # # c.lattice.graphviz(view=True,filename="temp_show.pdf") # show_graph(c.lattice,filename="temp_show.pdf",directory="output_trees",view=True)
def main(): import os import sys import csv if not os.path.isdir(esbm_benchmark_path): print 'The esbm benchmark directory is required.' sys.exit(1) given_entities = esbm_benchmark_path + 'elist.txt' target_entities = set([]) for row in open(given_entities): target_entities.add('<' + row.strip().split('\t')[2] + '>') for entity_idx in range(1, 141): if entity_idx > 100: targetKB = 'lmdb' else: targetKB = 'dbpedia' # One given entity description file entity_decriptions = esbm_benchmark_path + targetKB + '/' + str( entity_idx) + '/' + str(entity_idx) + '_desc.nt' # Creating a grid of formal concepts and save it as a CSV file if not os.path.isdir(fca_lattice_path): os.mkdir(fca_lattice_path) fcs_lattice_filename = fca_lattice_path + 'FCA_' + str( entity_idx) + '.csv' fcs_lattice_file = open(fcs_lattice_filename, 'w') sep = ':-:' property_set = set([]) target_facts = set([]) for row in open(entity_decriptions, 'r'): s = row.strip().split()[0] p = row.strip().split()[1] o = ' '.join(row.strip().split()[2:])[:-2] if s not in target_entities and o in target_entities: _s = s s = o + '[FALSE]' o = _s property_set.add(p) target_facts.add(s + sep + p + sep + o) property_list = list(property_set) property_list.insert(0, '') fca_csv = [property_list] final_rank = {} attribute_map = {} for spo in target_facts: default_score = 1 s, p, o = spo.split(sep) s = s.replace('[FALSE]', '') # If there is less information available from the surface information, the score will be lower. for uninform_str in uninformative_values: if uninform_str in o: default_score = 0 if default_score > 0: # building attribute-token dict try: attribute_map[p] = attribute_map[p] | extract_key_tokens(o) except KeyError: attribute_map[p] = extract_key_tokens(o) final_rank[s + sep + p + sep + o] = default_score for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): tmp_fca_list = [''] * len(property_list) s, p, o = spo.split(sep) tmp_fca_list[0] = p + sep + o tmp_fca_list[property_list.index(p)] = 'X' for prop, tokens in attribute_map.items(): for token in tokens: if token in o.lower(): tmp_fca_list[property_list.index(prop)] = 'X' # print tmp_fca_list fca_csv.append(tmp_fca_list) with fcs_lattice_file: writer = csv.writer(fcs_lattice_file) writer.writerows(fca_csv) # Formal concept analysis from concepts import Context c = Context.fromfile(fcs_lattice_filename, frmat='csv') hierarchical_layer = 0 for extents, intents in c.lattice: # print extents, intents for extent in extents: if final_rank[s + sep + extent] == 1: final_rank[s + sep + extent] = len(target_facts) - hierarchical_layer hierarchical_layer += 1 # Generating result file if not os.path.isdir(kafca_final_result_path): os.mkdir(kafca_final_result_path) if not os.path.isdir(kafca_final_result_path + targetKB): os.mkdir(kafca_final_result_path + targetKB) output_filepath = kafca_final_result_path + targetKB + '/' + str( entity_idx) + '/' if not os.path.isdir(output_filepath): os.mkdir(output_filepath) fo_top5 = open(output_filepath + str(entity_idx) + '_top5.nt', 'wb') fo_top10 = open(output_filepath + str(entity_idx) + '_top10.nt', 'wb') fo_rank = open(output_filepath + str(entity_idx) + '_rank.nt', 'wb') chkcount = 0 for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): s, p, o = spo.split(sep) if spo not in target_facts: _s = s s = o o = _s chkcount += 1 try: fo_rank.write("%s %s %s .\n" % (s, p, o)) fo_top10.write("%s %s %s .\n" % (s, p, o)) fo_top5.write("%s %s %s .\n" % (s, p, o)) except ValueError: pass if chkcount == 5: fo_top5.close() if chkcount == 10: fo_top10.close() fo_rank.close()
def extract_sumz(): import os import csv import copy print 'summary' input_json = request.get_json(force=True) #print input_json print type(input_json) input_entity = input_json['entity'] input_KB = input_json['KB'] target_entity = set([]) target_entity.add(input_entity) if not os.path.isdir(fca_lattice_path): os.mkdir(fca_lattice_path) fcs_lattice_filename = fca_lattice_path + 'FCA_' + input_entity + '.csv' fcs_lattice_file = open(fcs_lattice_filename, 'w') sep = ':-:' property_set = set([]) target_facts = set([]) for row in input_KB: s = row.strip().split()[0] p = row.strip().split()[1] o = ' '.join(row.strip().split()[2:])[:-2] if s not in target_entity and o in target_entity: _s = s s = o + '[FALSE]' o = _s property_set.add(p) target_facts.add(s + sep + p + sep + o) property_list = list(property_set) property_list.insert(0, '') fca_csv = [property_list] final_rank = {} attribute_map = {} for spo in target_facts: default_score = 1 s, p, o = spo.split(sep) s = s.replace('[FALSE]', '') # If there is less information available from the surface information, the score will be lower. for uninform_str in uninformative_values: if uninform_str in o: default_score = 0 if default_score > 0: # building attribute-token dict try: attribute_map[p] = attribute_map[p] | extract_key_tokens(o) except KeyError: attribute_map[p] = extract_key_tokens(o) final_rank[s + sep + p + sep + o] = default_score for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): tmp_fca_list = [''] * len(property_list) s, p, o = spo.split(sep) tmp_fca_list[0] = p + sep + o tmp_fca_list[property_list.index(p)] = 'X' for prop, tokens in attribute_map.items(): for token in tokens: if token in o.lower(): tmp_fca_list[property_list.index(prop)] = 'X' # print tmp_fca_list fca_csv.append(tmp_fca_list) tmp_list = copy.deepcopy(fca_csv) with fcs_lattice_file: writer = csv.writer(fcs_lattice_file) for index, row in enumerate(fca_csv): for index_se, ele in enumerate(row): #print ele tmp_list[index][index_se] = ele.encode('utf-8') writer.writerows(tmp_list) # Formal concept analysis from concepts import Context #fcs_lattice_filename = './KAFCA_lattice/FCA_141.csv' c = Context.fromfile(fcs_lattice_filename, frmat='csv') hierarchical_layer = 0 for extents, intents in c.lattice: #print extents, intents #f = open('text2.json', 'w') #json.dump(final_rank, f, ensure_ascii=False, indent=4) for extent in extents: try: extent_de = extent.decode('utf-8') if final_rank[s+sep+extent_de] == 1: final_rank[s+sep+extent_de] = len(target_facts) - hierarchical_layer except KeyError: print s+sep+extent_de continue hierarchical_layer += 1 #print '-'*10 #print final_rank.keys() os.remove(fcs_lattice_filename) result_top5 = [] chkcount = 0 for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True): s, p, o = spo.split(sep) if spo not in target_facts: _s = s s = o o = _s chkcount += 1 result_top5.append(s+'\t'+p+'\t'+o) if chkcount == 5: break result = {} result['top5'] = result_top5 return jsonify(result)
outputCSVFile = open('train_output.csv', 'w+') wtr = csv.writer(outputCSVFile, delimiter=',', lineterminator='\n') for i in range(number_of_objects + 1): for j in range(number_of_columns + 1): if i == 0 and j == 0: output_matrix[i][j] = '' elif i == 0 and j > 0: output_matrix[i][j] = 'c' + str(j - 1) elif i > 0 and j == 0: output_matrix[i][j] = str(i - 1) else: output_matrix[i][j] = str(context_matrix[i - 1][j - 1]) wtr.writerow(output_matrix[i]) outputCSVFile.close() train_dict = {} c = Context.fromfile('train_output.csv', 'csv') # sys.stdout = open('output1.txt', 'w+') for extent, intent in c.lattice: #print('%r %r' % (extent, intent)) # attribute_combinations = np.asarray(intent) if intent not in train_dict: count = 0 extent_array = np.asarray(extent) for row in extent_array: if count == 0: train_dict[intent] = [ int(float(tableCells[int(row)][number_of_columns])) ] count = count + 1 else:
#!/bin/env python3 from concepts import Context c = Context.fromfile('nba_teams.csv', frmat='csv') c.lattice.graphviz(view=True)