Example #1
0
def sequence_idp(params, patterns):
    for p in patterns:
        print p
    indices = set([seq.id for seq in patterns])
    nonclosed_indices = set()

    # closed pattern mining by generated IDP code
    idp_gen = IDPGenerator(params)
    path, filename = os.path.split(params["data"])
    idp_program_name = "{0}_{1}_{2}".format(params["dominance"], params["type"], filename.split(".")[0])

    if params["dominance"] == "closed":
        support_mapping = make_grouping_by_support(patterns)
    else:
        support_mapping = None

    attribute_mapping = make_attribute_mapping(patterns)

    for support, group in support_mapping.items():
        if len(group) == 1:
            print group
            continue
        check_mapping = defaultdict(set)
        for seq in group:
            patterns_to_check = get_attribute_intersection(seq, attribute_mapping, support_mapping)
            if patterns_to_check:
                check_mapping[seq] = patterns_to_check

        if len(check_mapping.values()) != 0:
            idp_gen.gen_IDP_code_group(check_mapping, idp_program_name)
            idp_output = idp_gen.run_IDP(idp_program_name)

        lines = idp_output.split("\n")
        for line in lines:
            if "selected_seq" in line:
                nonclosed_indices.add(int(line[19]))
    indices = indices - nonclosed_indices
    print indices

    """
    for seq in tqdm(patterns):
        #if we make it a function, is_closed(seq)
        #then we need just need async_map(is_closed,patterns)
        patterns_to_check = get_attribute_intersection(seq,mapping,support_mapping)
        if len(patterns_to_check) > 1: #the pattern itself and other patterns
          # generate idp code for finding pattern with constraints for this seq
          idp_gen.gen_IDP_code(patterns_to_check, idp_program_name, seq.id)
          idp_output = idp_gen.run_IDP(idp_program_name)
          if 'Unsatisfiable' in idp_output:
              print(seq.id)
              os.system("cp IDP/closed_sequence_test.idp tmp/seq_test_{id}".format(id=seq.id))
              return # break here look at the INDEX, it should be 1 but it is 2 for some reason;
                     # the same for the case of id = 5, it is selected as 2 for some reason
              indices.append(seq.id)
        else:
          indices.append(seq.id)
    """

    return indices
Example #2
0
def itemset_idp_new(params, patterns):
    indices = []

    # closed pattern mining by generated IDP code
    idp_gen = IDPGenerator(params)
    path, filename = os.path.split(params["data"])
    idp_program_name = "{0}_{1}_{2}".format(params["dominance"], params["type"], filename.split(".")[0])

    if params["dominance"] == "closed":
        support_mapping = make_grouping_by_support(patterns)
    else:
        support_mapping = None

    mapping = make_attribute_mapping(patterns)

    return indices
Example #3
0
def sequence_idp_multiple(params, patterns):
    indices = set([seq.id for seq in patterns])
    nonclosed_indices = set()

    # closed pattern mining by generated IDP code
    idp_gen = IDPGenerator(params)
    path, filename = os.path.split(params["data"])
    idp_program_name = "{0}_{1}_{2}".format(params["dominance"], params["type"], filename.split(".")[0])

    if params["dominance"] == "closed":
        support_mapping = make_grouping_by_support(patterns)
    else:
        support_mapping = None

    attribute_mapping = make_attribute_mapping(patterns)

    """ group testing """
    mapping_groups = []
    for group in support_mapping.values():
        if len(group) == 1:
            print group
            continue
        check_mapping = defaultdict(set)
        for seq in group:
            patterns_to_check = get_attribute_intersection(seq, attribute_mapping, support_mapping)
            if len(patterns_to_check) > 1:
                check_mapping[seq] = patterns_to_check
        if check_mapping:
            mapping_groups.append(check_mapping)
    nonclosed_indices = async_mapping(mapping_groups, idp_gen, idp_program_name)
    # nonclosed_indices = async_mapping_withoutLock(mapping_groups, idp_gen, idp_program_name)

    """
    lines = idp_output.split('\n')
    for line in lines:
        if 'selected_seq' in line:
            nonclosed_indices.add(int(line[19]))
    """
    indices = indices - nonclosed_indices
    print indices

    return indices