def exec_ex13(filepath, max_parts):
    """
    Example 13 - Canonical Base when extents are Partition Pattern Structures

    Calculates the canonical base of implications using partition pattern structures
    as extents, this actually amounts to calculate functional dependencies

    We include a maximum parts threshold for mining partitions with at most
    max_parts elements in the partition
    """
    # PATTERNS HAVE SINGLETONS THAT NEED TO BE RESETED
    # WHEN REUSING THEM, WHENEVER YOU CALCULATE PATTERN STRUCTURES
    # MULTIPLE TIMES, YOU NEED TO RESET THEM BEFORE RE-USING
    # THEM, NOT DOING THIS MAY LEAD TO INCONSISTENCIES
    TrimmedPartitionPattern.reset()

    conditions = [lambda pattern: len(pattern) <= max_parts]
    fctx = PatternStructureModel(filepath=filepath,
                                 transformer=List2PartitionsTransformer(int),
                                 transposed=True,
                                 file_manager_params={'style': 'tab'})

    canonical_base = PSCanonicalBase(fctx,
                                     pattern=PartitionPattern,
                                     conditions=conditions,
                                     lazy=False,
                                     silent=True)

    for rule, support in canonical_base.get_implications():
        ant, con = rule
        print('{:>10s} => {:10s}'.format(lst2str(ant), lst2str(con)), support)
def exec_ex12(filepath):
    """
    Example 12 - Partition Pattern Structure Mining with PreviousClosure

    Calculates the partition pattern structures based on equivalence classes
    using PreviousClosure algorithm

    We include a maximum parts threshold for mining partitions with at most
    max_parts elements in the partition
    """
    # PATTERNS HAVE SINGLETONS THAT NEED TO BE RESETED
    # WHEN REUSING THEM, WHENEVER YOU CALCULATE PATTERN STRUCTURES
    # MULTIPLE TIMES, YOU NEED TO RESET THEM BEFORE RE-USING
    # THEM, NOT DOING THIS MAY LEAD TO INCONSISTENCIES

    transposed = True

    fctx = PatternStructureModel(
        filepath=filepath,
        transformer=List2PartitionsTransformer(transposed),
        transposed=transposed,
        file_manager_params={'style': 'tab'})

    poset = PSLecEnumClosures(fctx,
                              pattern=PartitionPattern,
                              lazy=False,
                              silent=True).poset
    dict_printer(poset, transposed=transposed)
Esempio n. 3
0
def exec_ex21(filepath, output_fname=None):
    """
    Example 21: Duquenne Guigues Base using StrippedPartitions with LecEnumClosures OnDisk - Streaming patterns to disk
    """
    transposed = True
    StrippedPartitions.reset()

    fctx = PatternStructureModel(
        filepath=filepath,
        transformer=List2PartitionsTransformer(transposed),
        sorter=PartitionSorter(),
        transposed=transposed,
        file_manager_params={'style': 'tab'})
    canonical_base = PSCanonicalBase(
        # PSPreviousClosure(
        fctx,
        pattern=StrippedPartitions,
        lazy=False,
        silent=True,
        ondisk=True,
        ondisk_kwargs={
            'output_path': '/tmp',
            'output_fname': output_fname,
            'write_support': True,
            'write_extent': False
        })
    output_path = canonical_base.poset.close()

    fctx.transformer.attribute_index = {
        i: j
        for i, j in enumerate(fctx.sorter.processing_order)
    }

    for i, (rule, support) in enumerate(canonical_base.get_implications()):
        ant, con = rule
        print('{}: {:10s} => {:10s}'.format(i + 1, lst2str(ant), lst2str(con)),
              support)

    print("\t=> Pseudo closures stored in {}".format(output_path))
Esempio n. 4
0
def exec_ex19(filepath, output_fname=None):
    """
    Example 19: StrippedPartitions with PreviousClosure OnDisk - Streaming patterns to disk
    """
    transposed = True
    StrippedPartitions.reset()

    fctx = PatternStructureModel(
        filepath=filepath,
        transformer=List2PartitionsTransformer(transposed),
        transposed=transposed,
        file_manager_params={'style': 'tab'})

    ondisk_poset = PSLecEnumClosures(fctx,
                                     pattern=StrippedPartitions,
                                     ondisk=True,
                                     ondisk_kwargs={
                                         'output_path': '/tmp',
                                         'output_fname': output_fname,
                                         'write_extent': True
                                     },
                                     silent=True).poset
    output_path = ondisk_poset.close()
    print("\t=> Results stored in {}".format(output_path))
Esempio n. 5
0
from fca.algorithms.previous_closure import PSPreviousClosure
from lib.minimum_hitting_set import berges_mhs as my_mhs
from fca.io import read_representations
from fca.io.input_models import PatternStructureModel
from fca.io.sorters import PartitionSorter
import csv

from uis_miner_naive import find_uis, print_premises

if __name__ == "__main__":
    filepath = sys.argv[1]
    transposed = True
    fctx = PatternStructureModel(
        filepath=filepath,
        #        sorter=PartitionSorter(),
        transformer=List2PartitionsTransformer(transposed),
        transposed=transposed,
        file_manager_params={'style': 'tab'})

    ondisk_poset = PSPreviousClosure(fctx,
                                     pattern=PartitionPattern,
                                     ondisk=True,
                                     ondisk_kwargs={
                                         'output_path': '/tmp',
                                         'output_fname': None,
                                         'write_extent': False
                                     },
                                     silent=False).poset
    ctx = []
    fout_name = ondisk_poset.close()
    with open(fout_name, 'r') as fin:
Esempio n. 6
0
def d_prime(delta, ctx):
    # print ''
    result = set([])
    for x, delta_x in enumerate(ctx):
        # print '\t=>', x, delta, delta_x, PartitionPattern.leq(delta, delta_x)
        if PartitionPattern.leq(delta, delta_x):
            result.add(x)
    return result


if __name__ == "__main__":
    fmgr = FileModelFactory(sys.argv[1], style='tab').file_manager
    ctx = map(
        PartitionPattern.fix_desc,
        map(
            List2PartitionsTransformer(transposed=False).transform,
            fmgr.entries_transposed()))

    M = set(range(len(ctx)))
    all_elements = set([])
    map(all_elements.update, chain(*ctx))
    PartitionPattern.top([all_elements])
    # print PartitionPattern.top()
    premises = {}
    for i, m in enumerate(M):
        print '\n PRocessing', i, m
        res = d_darrow_m(i, ctx, start=0, current_delta=PartitionPattern.top())
        res = list(res)
        print 'RESULT:', len(res)
        # res2 = [d_prime(i, ctx) for i in res]