Beispiel #1
0
def main():
    #print sys.argv
    filename = str(sys.argv[1])
    iceberg_threshold = int(sys.argv[2])
    draw_iceberg = False
    cols_to_use = []
    i = 3
    cols_started = False
    while (i < len(sys.argv)):
        if (sys.argv[i][0] == '-'):
            cols_started = False
            if (sys.argv[i] == '-draw'):
                draw_iceberg = True
            elif (sys.argv[i] == '-columns'):
                cols_started = True
        elif (cols_started):
            cols_to_use.append(sys.argv[i])
        i += 1
    #print cols_to_use

    dataframe = pd.read_csv(filename)
    if (len(cols_to_use) > 0):
        dataframe = dataframe[[dataframe.columns[0]] + cols_to_use]
    col_info = pd.read_csv('cols.' + filename)
    transform_columns(dataframe, col_info)
    dataframe = dataframe.drop_duplicates(subset=list(dataframe.columns[0:1]),
                                          keep='first')
    dataframe.to_csv('transformed.' + filename, index_label=False, index=False)

    context = Context.fromfile('transformed.' + filename, frmat='csv')
    lattice_str = str(context.lattice.graphviz())
    f = open('lattice.dot', 'w')
    f.write(lattice_str)
    f.close()
    #context.lattice.graphviz()

    build_iceberg_lattice(filename, context.lattice, iceberg_threshold)
    iceberg_context = Context.fromfile(filename='iceberg.' + filename,
                                       frmat='csv')
    if draw_iceberg:
        iceberg_context.lattice.graphviz(view=True)

    lattice_str = str(iceberg_context.lattice.graphviz())
    f = open('iceberg.dot', 'w')
    f.write(lattice_str)
    f.close()

    implication_basis = find_implication_basis(iceberg_context)
    print "Implication basis: "
    for i, e in implication_basis:
        print str(i) + " => " + str(e)
Beispiel #2
0
def render_all(filepattern='*.cxt',
               frmat=None,
               encoding=None,
               directory=None,
               out_format=None):
    from concepts import Context

    if directory is not None:
        get_name = lambda filename: os.path.basename(filename)
    else:
        get_name = lambda filename: filename

    if frmat is None:
        from concepts.formats import Format
        get_frmat = Format.by_extension.get
    else:
        get_frmat = lambda filename: frmat

    for cxtfile in glob.glob(filepattern):
        name, ext = os.path.splitext(cxtfile)
        filename = '%s.gv' % get_name(name)

        c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding)
        l = c.lattice
        dot = l.graphviz(filename, directory)

        if out_format is not None:
            dot.format = out_format
        dot.render()
Beispiel #3
0
def render_all(filepattern='*.cxt', frmat=None, encoding=None,
               directory=None, out_format=None):
    from concepts import Context

    if directory is not None:
        get_name = lambda filename: os.path.basename(filename)
    else:
        get_name = lambda filename: filename

    if frmat is None:
        from concepts.formats import Format
        get_frmat = Format.by_extension.get
    else:
        get_frmat = lambda filename: frmat

    for cxtfile in glob.glob(filepattern):
        name, ext = os.path.splitext(cxtfile)
        filename = '%s.gv' % get_name(name)

        c = Context.fromfile(cxtfile, get_frmat(ext), encoding=encoding)
        l = c.lattice
        dot = l.graphviz(filename, directory)

        if out_format is not None:
            dot.format = out_format
        dot.render()
Beispiel #4
0
def test_fromstring_serialized(tmp_path, source, filename, includes_lattice):
    if filename is None:
        context = Context.fromstring(source, frmat='python-literal')
    else:
        target = tmp_path / filename
        kwargs = {'encoding': 'utf-8'}
        target.write_text(source, **kwargs)
        context = Context.fromfile(str(target),
                                   frmat='python-literal',
                                   **kwargs)

    assert context.objects == SERIALIZED['objects']
    assert context.properties == SERIALIZED['properties']
    assert context.bools == [
        (True, False, False, True, False, True, True, False, False, True),
        (True, False, False, True, False, True, False, True, True, False),
        (False, True, True, False, False, True, True, False, False, True),
        (False, True, True, False, False, True, False, True, True, False),
        (False, True, False, True, True, False, True, False, False, True),
        (False, True, False, True, True, False, False, True, True, False)
    ]

    if includes_lattice:
        assert 'lattice' in context.__dict__
    else:
        assert 'lattice' not in context.__dict__
Beispiel #5
0
 def __init__(self, csv_location):
     # the Frame-capability lattice
     self.context = Context.fromfile(csv_location, frmat='csv')
     self.lattice = self.context.lattice
     # the Frame-uncapability lattice
     self.context_inv = Context(*self.context.definition().inverted())
     self.lattice_inv = self.context_inv.lattice
     # the list of all capabilities and frames
     self.capabilities = self.context.properties
     self.frames = self.context.objects
Beispiel #6
0
def generate_concept_matrix(filename, skill_list=None, render=False):
    # applying fca
    c = Context.fromfile(filename, frmat="csv")
    if render:
        c.lattice.graphviz(filename=filename.rstrip(".csv"), view=True)

    # reading csv headers
    csvfile = open(filename)
    csvreader = csv.reader(csvfile)

    # reading skills
    if skill_list is None:
        skill_list = csvreader.__next__()
        skill_list.pop(0)
    else:
        csvreader.__next__()

    # reading abstract names
    row_header = list()
    for row in csvreader:
        row_header.append(row[0])

    csvfile.close()

    # matrix to return
    mat = list()
    for i, concept in enumerate(c.lattice):
        extent, intent = concept

        # skip for non-significant concept
        if len(extent) == 0 or len(intent) == 0:
            continue

        print("c{} = {} > {}".format(i, extent, intent))
        row = list()
        for skill in skill_list:
            if skill in intent:
                row.append(1)
            else:
                row.append(0)
        for header in row_header:
            if header in extent:
                row.append(1)
            else:
                row.append(0)

        mat.append(row)

    return mat, row_header, skill_list
def generate_concept_matrix(filename, skill_list=None):

    # applying fca
    c = Context.fromfile(filename, frmat="csv")

    # reading csv headers
    csvfile = open(filename)
    csvreader = csv.reader(csvfile)

    # reading skills
    if skill_list is None:
        skill_list = csvreader.__next__()
        skill_list.pop(0)
    else:
        csvreader.__next__()

    # reading abstract names
    row_header = list()
    for row in csvreader:
        row_header.append(row[0])

    csvfile.close()

    # matrix to return
    mat = list()
    for extent, intent in c.lattice:
        print("{} > {}".format(extent, intent))
        row = list()
        for skill in skill_list:
            if skill in intent:
                row.append(1)
            else:
                row.append(0)
        for header in row_header:
            if header in extent:
                row.append(1)
            else:
                row.append(0)

        mat.append(row)

    return mat, row_header, skill_list
        if concept.properties:
            # dot.edge(name, name, taillabel=' '.join(concept.properties), labelangle='90', color='transparent')
            print("properties >", ' | '.join(concept.properties))
        # dot.edges((name, node_name(c)) for c in sorted(concept.lower_neighbors, key=sortkey))
        print("edges :")
        for i in sorted(concept.lower_neighbors, key=sortkey):
            print(name, "->", node_name(i))
            edgecount += 1

        print()
        print("nodes:", nodecount, "edges:", edgecount)

    # if render or view:
    #     dot.render(view=view)  # pragma: no cover
    # return dot


# c = Context.fromfile("test_files/tech_formal_context.csv",frmat="csv")
c = Context.fromfile("test_files/student_formal_context.csv", frmat="csv")
# max_e_len = 0
# for e,i in c.lattice:
#     if len(e) > max_e_len:
#         max_e_len = len(e)
for i, exin in enumerate(c.lattice):
    extent, intent = exin
    print("c" + str(i), ">", extent, "\t->", intent)
#
# c.lattice.graphviz(view=True,filename="temp_show.pdf")

# show_graph(c.lattice,filename="temp_show.pdf",directory="output_trees",view=True)
Beispiel #9
0
def main():
    import os
    import sys
    import csv

    if not os.path.isdir(esbm_benchmark_path):
        print 'The esbm benchmark directory is required.'
        sys.exit(1)

    given_entities = esbm_benchmark_path + 'elist.txt'
    target_entities = set([])
    for row in open(given_entities):
        target_entities.add('<' + row.strip().split('\t')[2] + '>')

    for entity_idx in range(1, 141):

        if entity_idx > 100:
            targetKB = 'lmdb'
        else:
            targetKB = 'dbpedia'

        # One given entity description file
        entity_decriptions = esbm_benchmark_path + targetKB + '/' + str(
            entity_idx) + '/' + str(entity_idx) + '_desc.nt'

        # Creating a grid of formal concepts and save it as a CSV file
        if not os.path.isdir(fca_lattice_path):
            os.mkdir(fca_lattice_path)
        fcs_lattice_filename = fca_lattice_path + 'FCA_' + str(
            entity_idx) + '.csv'
        fcs_lattice_file = open(fcs_lattice_filename, 'w')

        sep = ':-:'

        property_set = set([])
        target_facts = set([])
        for row in open(entity_decriptions, 'r'):
            s = row.strip().split()[0]
            p = row.strip().split()[1]
            o = ' '.join(row.strip().split()[2:])[:-2]

            if s not in target_entities and o in target_entities:
                _s = s
                s = o + '[FALSE]'
                o = _s

            property_set.add(p)
            target_facts.add(s + sep + p + sep + o)
        property_list = list(property_set)
        property_list.insert(0, '')

        fca_csv = [property_list]

        final_rank = {}

        attribute_map = {}
        for spo in target_facts:
            default_score = 1
            s, p, o = spo.split(sep)
            s = s.replace('[FALSE]', '')

            # If there is less information available from the surface information, the score will be lower.
            for uninform_str in uninformative_values:
                if uninform_str in o:
                    default_score = 0

            if default_score > 0:

                # building attribute-token dict
                try:
                    attribute_map[p] = attribute_map[p] | extract_key_tokens(o)
                except KeyError:
                    attribute_map[p] = extract_key_tokens(o)

            final_rank[s + sep + p + sep + o] = default_score

        for spo, v in sorted(final_rank.items(),
                             key=lambda x: x[1],
                             reverse=True):
            tmp_fca_list = [''] * len(property_list)

            s, p, o = spo.split(sep)
            tmp_fca_list[0] = p + sep + o
            tmp_fca_list[property_list.index(p)] = 'X'

            for prop, tokens in attribute_map.items():
                for token in tokens:
                    if token in o.lower():
                        tmp_fca_list[property_list.index(prop)] = 'X'

            # print tmp_fca_list
            fca_csv.append(tmp_fca_list)

        with fcs_lattice_file:
            writer = csv.writer(fcs_lattice_file)
            writer.writerows(fca_csv)

        # Formal concept analysis
        from concepts import Context
        c = Context.fromfile(fcs_lattice_filename, frmat='csv')
        hierarchical_layer = 0
        for extents, intents in c.lattice:
            # print extents, intents
            for extent in extents:

                if final_rank[s + sep + extent] == 1:
                    final_rank[s + sep +
                               extent] = len(target_facts) - hierarchical_layer

            hierarchical_layer += 1

        # Generating result file
        if not os.path.isdir(kafca_final_result_path):
            os.mkdir(kafca_final_result_path)

        if not os.path.isdir(kafca_final_result_path + targetKB):
            os.mkdir(kafca_final_result_path + targetKB)

        output_filepath = kafca_final_result_path + targetKB + '/' + str(
            entity_idx) + '/'
        if not os.path.isdir(output_filepath):
            os.mkdir(output_filepath)

        fo_top5 = open(output_filepath + str(entity_idx) + '_top5.nt', 'wb')
        fo_top10 = open(output_filepath + str(entity_idx) + '_top10.nt', 'wb')
        fo_rank = open(output_filepath + str(entity_idx) + '_rank.nt', 'wb')

        chkcount = 0
        for spo, score in sorted(final_rank.items(),
                                 key=lambda x: x[1],
                                 reverse=True):
            s, p, o = spo.split(sep)

            if spo not in target_facts:
                _s = s
                s = o
                o = _s
            chkcount += 1

            try:
                fo_rank.write("%s %s %s .\n" % (s, p, o))
                fo_top10.write("%s %s %s .\n" % (s, p, o))
                fo_top5.write("%s %s %s .\n" % (s, p, o))
            except ValueError:
                pass

            if chkcount == 5:
                fo_top5.close()

            if chkcount == 10:
                fo_top10.close()

        fo_rank.close()
Beispiel #10
0
def extract_sumz():
	import os
	import csv
	import copy
	print 'summary'
	input_json = request.get_json(force=True)
	#print input_json
	print type(input_json)
	input_entity = input_json['entity']
	input_KB = input_json['KB']

	target_entity = set([])
	target_entity.add(input_entity)

	if not os.path.isdir(fca_lattice_path):
		os.mkdir(fca_lattice_path)
	fcs_lattice_filename = fca_lattice_path + 'FCA_' + input_entity + '.csv'
	fcs_lattice_file = open(fcs_lattice_filename, 'w')

	sep = ':-:'

	property_set = set([])
	target_facts = set([])
	for row in input_KB:
		s = row.strip().split()[0]
		p = row.strip().split()[1]
		o = ' '.join(row.strip().split()[2:])[:-2]

		if s not in target_entity and o in target_entity:
			_s = s
			s = o + '[FALSE]'
			o = _s

		property_set.add(p)
		target_facts.add(s + sep + p + sep + o)
	property_list = list(property_set)
	property_list.insert(0, '')

	fca_csv = [property_list]

	final_rank = {}

	attribute_map = {}
	for spo in target_facts:
		default_score = 1
		s, p, o = spo.split(sep)
		s = s.replace('[FALSE]', '')

		# If there is less information available from the surface information, the score will be lower.
		for uninform_str in uninformative_values:
			if uninform_str in o:
				default_score = 0

		if default_score > 0:

			# building attribute-token dict
			try:
				attribute_map[p] = attribute_map[p] | extract_key_tokens(o)
			except KeyError:
				attribute_map[p] = extract_key_tokens(o)

		final_rank[s + sep + p + sep + o] = default_score

	for spo, v in sorted(final_rank.items(), key=lambda x: x[1], reverse=True):
		tmp_fca_list = [''] * len(property_list)

		s, p, o = spo.split(sep)
		tmp_fca_list[0] = p + sep + o
		tmp_fca_list[property_list.index(p)] = 'X'

		for prop, tokens in attribute_map.items():
			for token in tokens:
				if token in o.lower():
					tmp_fca_list[property_list.index(prop)] = 'X'

		# print tmp_fca_list
		fca_csv.append(tmp_fca_list)

	tmp_list = copy.deepcopy(fca_csv)
	with fcs_lattice_file:
		writer = csv.writer(fcs_lattice_file)
		for index, row in enumerate(fca_csv):
			for index_se, ele in enumerate(row):
				#print ele
				tmp_list[index][index_se] = ele.encode('utf-8')
		writer.writerows(tmp_list)

	# Formal concept analysis
	from concepts import Context
	#fcs_lattice_filename = './KAFCA_lattice/FCA_141.csv'
	c = Context.fromfile(fcs_lattice_filename, frmat='csv')
	hierarchical_layer = 0
	for extents, intents in c.lattice:
		#print extents, intents
		#f = open('text2.json', 'w')
		#json.dump(final_rank, f, ensure_ascii=False, indent=4)
		for extent in extents:
			try:
				extent_de = extent.decode('utf-8')
				if final_rank[s+sep+extent_de] == 1:
					final_rank[s+sep+extent_de] = len(target_facts) - hierarchical_layer
			except KeyError:
				print s+sep+extent_de
				continue

		hierarchical_layer += 1
	#print '-'*10
	#print final_rank.keys()

	os.remove(fcs_lattice_filename)
	result_top5 = []
	chkcount = 0
	for spo, score in sorted(final_rank.items(), key=lambda x: x[1], reverse=True):
		s, p, o = spo.split(sep)

		if spo not in target_facts:
			_s = s
			s = o
			o = _s
		chkcount += 1

		result_top5.append(s+'\t'+p+'\t'+o)

		if chkcount == 5:
			break

	result = {}
	result['top5'] = result_top5

	return jsonify(result)
Beispiel #11
0
    outputCSVFile = open('train_output.csv', 'w+')
    wtr = csv.writer(outputCSVFile, delimiter=',', lineterminator='\n')
    for i in range(number_of_objects + 1):
        for j in range(number_of_columns + 1):
            if i == 0 and j == 0:
                output_matrix[i][j] = ''
            elif i == 0 and j > 0:
                output_matrix[i][j] = 'c' + str(j - 1)
            elif i > 0 and j == 0:
                output_matrix[i][j] = str(i - 1)
            else:
                output_matrix[i][j] = str(context_matrix[i - 1][j - 1])
        wtr.writerow(output_matrix[i])
    outputCSVFile.close()
    train_dict = {}
    c = Context.fromfile('train_output.csv', 'csv')

    # sys.stdout = open('output1.txt', 'w+')
    for extent, intent in c.lattice:
        #print('%r %r' % (extent, intent))
        # attribute_combinations = np.asarray(intent)
        if intent not in train_dict:
            count = 0
            extent_array = np.asarray(extent)
            for row in extent_array:
                if count == 0:
                    train_dict[intent] = [
                        int(float(tableCells[int(row)][number_of_columns]))
                    ]
                    count = count + 1
                else:
#!/bin/env python3

from concepts import Context
c = Context.fromfile('nba_teams.csv', frmat='csv')
c.lattice.graphviz(view=True)