Ejemplo n.º 1
0
 def _get_all_freq_term_sets(self, T, D,  minsup):
     S = set([t for t in T if len(cov({t}, D)) >= minsup * len(D)])
     F = []
     for i in range(1, len(S) + 1):
         F += [set(j) for j in combinations(S,i)]
     i = 0
     while i < len(F):
         term = F[i]
         if len(cov(term, D)) >= minsup * len(D):
             i += 1
             continue
         else:
             F.remove(term)
     return F    
Ejemplo n.º 2
0
 def _get_all_freq_term_sets(self, T, D, minsup):
     S = set([t for t in T if len(cov({t}, D)) >= minsup * len(D)])
     F = []
     for i in range(1, len(S) + 1):
         F += [set(j) for j in combinations(S, i)]
     i = 0
     while i < len(F):
         term = F[i]
         if len(cov(term, D)) >= minsup * len(D):
             i += 1
             continue
         else:
             F.remove(term)
     return F
Ejemplo n.º 3
0
def main(*args):

    minsup = .2
    docs = [
        "Human machine interface for ABC computer applications",
        "A survey of user opinion of computer system response time",
        "The EPS user interface management system",
        "System an human system engineering testing for EPS",
        "Relation of user perceived response time to error measurement",
        "The generation of random, binary, ordered trees",
        "The intersection graph of paths in trees",
        "Graph minors IV: Widths of trees and well-quasi-ordering",
        "Graph minors: A survey"
    ]
    #     docs = [title for title in open("/home/netzsooc/Documents/infotec/"\
    #                                         "Investigacion/justTitles.txt")]

    print("getting vocabulary")
    T = get_vocabulary(docs)
    print("building encoding and decoding dictionaries")
    enc = index_words(T)
    #     dec = dict([(v,k) for k,v in enc.items()])
    D = {}
    print("Building D")
    for i in range(len(docs)):
        D["D" + str(i)] = get_terms(docs[i])
    print("Done")
    print(len(T))
    print("getting FTC")
    out = FTC(D, minsup, T)
    print(out.rem_term_sets)
    print("done")
    x = set([i for i in t if len(cov({t}, D)) >= minsup * len(D)])
Ejemplo n.º 4
0
def main(*args):
    
    minsup = .2
    docs = [
            "Human machine interface for ABC computer applications",
            "A survey of user opinion of computer system response time",
            "The EPS user interface management system",
            "System an human system engineering testing for EPS",
            "Relation of user perceived response time to error measurement",
            "The generation of random, binary, ordered trees",
            "The intersection graph of paths in trees",
            "Graph minors IV: Widths of trees and well-quasi-ordering",
            "Graph minors: A survey"
            ]
#     docs = [title for title in open("/home/netzsooc/Documents/infotec/"\
#                                         "Investigacion/justTitles.txt")]
    
    print("getting vocabulary")
    T = get_vocabulary(docs)
    print("building encoding and decoding dictionaries")
    enc = index_words(T)
#     dec = dict([(v,k) for k,v in enc.items()])
    D = {}
    print("Building D")
    for i in range(len(docs)):
        D["D" + str(i)] = get_terms(docs[i])
    print("Done")
    print(len(T))
    print("getting FTC")
    out = FTC(D, minsup, T)
    print(out.rem_term_sets)
    print("done")
    x = set([i for i in t if len(cov({t}, D)) >= minsup * len(D)])
Ejemplo n.º 5
0
 def __init__(self, D, minsup, enc, T):
     n = len(D)
     rem_term_sets = get_all_freq_term_sets(T, D, enc, minsup)
 #     print(len(sel_terms))
     while len(cov(sel_terms, D)) != n:
         candidates = []
         for t in rem_term_sets:
             c = cov(t, D)
             eo = EO(c, rem_term_sets, D)
             candidates.append((eo,t))
         best_cand = min(candidates)
 #         print(best_cand)
         sel_terms.union(best_cand[1])
         rem_term_sets.remove(best_cand[1])
         
         Ds = [d for d in cov(best_cand[1], D)]
         for d in Ds:
             D.pop(d, None)
 
     for i in range(len(sel_terms)):
         sel_terms[i] = (sel_terms[i], cov(sel_terms[i], D))
     return sel_terms
Ejemplo n.º 6
0
    def __init__(self, D, minsup, enc, T):
        n = len(D)
        rem_term_sets = get_all_freq_term_sets(T, D, enc, minsup)
        #     print(len(sel_terms))
        while len(cov(sel_terms, D)) != n:
            candidates = []
            for t in rem_term_sets:
                c = cov(t, D)
                eo = EO(c, rem_term_sets, D)
                candidates.append((eo, t))
            best_cand = min(candidates)
            #         print(best_cand)
            sel_terms.union(best_cand[1])
            rem_term_sets.remove(best_cand[1])

            Ds = [d for d in cov(best_cand[1], D)]
            for d in Ds:
                D.pop(d, None)

        for i in range(len(sel_terms)):
            sel_terms[i] = (sel_terms[i], cov(sel_terms[i], D))
        return sel_terms
Ejemplo n.º 7
0
        return F


def EO(C, R, D):
    sm = 0

    for d in C:
        f = len([s for s in R if s.issubset(D[d])])
        #         if f == 0:
        #             f = 0.000001
        sm += (-(1 / f) * log(1 / f))

    return sm


[x for x in T if len(cov(x, D)) >= (minsup * len(D))]


def main(*args):

    minsup = .2
    docs = [
        "Human machine interface for ABC computer applications",
        "A survey of user opinion of computer system response time",
        "The EPS user interface management system",
        "System an human system engineering testing for EPS",
        "Relation of user perceived response time to error measurement",
        "The generation of random, binary, ordered trees",
        "The intersection graph of paths in trees",
        "Graph minors IV: Widths of trees and well-quasi-ordering",
        "Graph minors: A survey"
Ejemplo n.º 8
0
                F.remove(term)
        return F    


def EO(C, R, D):
    sm = 0

    for d in C:
        f = len([s for s in R if s.issubset(D[d])])
#         if f == 0:
#             f = 0.000001
        sm += (-(1/f) * log(1/f))

    return sm

[x for x in T if len(cov(x, D)) >= (minsup * len(D))]
def main(*args):
    
    minsup = .2
    docs = [
            "Human machine interface for ABC computer applications",
            "A survey of user opinion of computer system response time",
            "The EPS user interface management system",
            "System an human system engineering testing for EPS",
            "Relation of user perceived response time to error measurement",
            "The generation of random, binary, ordered trees",
            "The intersection graph of paths in trees",
            "Graph minors IV: Widths of trees and well-quasi-ordering",
            "Graph minors: A survey"
            ]
#     docs = [title for title in open("/home/netzsooc/Documents/infotec/"\