예제 #1
0
def estimate_pattern_by_misamp(pattern: Pattern,
                               mallows: Mallows,
                               threshold=0.01,
                               single_core_workload=50,
                               num_cores=None,
                               verbose=False):
    pattern.calculate_tc()
    seeds = decompose_pattern(pattern)

    if verbose:
        print(f'\npattern: {pattern}\n{mallows}\n#seeds={len(seeds)}\n')

    return estimate_union_of_prefs(seeds, mallows, threshold,
                                   single_core_workload, num_cores, verbose)
예제 #2
0
def test_a_single_case():
    verbose = True
    threshold = 0
    from inference.ltm.ltm_wrapper import calculate_marginal_prob_over_mallows_by_ltm

    patterns, mallows, p_exact = get_test_case_of_patterns_from_synthetic_4_labels(
        2)

    mallows = Mallows(list(range(10)), 0.03)
    pattern = Pattern(label_to_children={
        'a': {'b'},
        'b': {'c'}
    },
                      label_to_items={
                          'a': {4, 8},
                          'b': {1, 3},
                          'c': {4}
                      })
    # pattern = patterns[0]
    # mallows = Mallows(mallows.center, 0.006)

    res_exact = calculate_marginal_prob_over_mallows_by_ltm(mallows=mallows,
                                                            pattern=pattern)
    print(res_exact)

    res_samp = estimate_pattern_by_misamp1(mallows=mallows,
                                           pattern=pattern,
                                           threshold=threshold,
                                           verbose=verbose)
    print(res_samp)
def test_2_label():
    import pandas as pd
    from core.patterns import PATTERN_SEP

    df_in = pd.read_csv('../../data/input_movielens_ramp-vs-amp_2labels.csv')
    df_exact = pd.read_csv(
        '../../data/output_movielens_ramp-vs-amp_2labels_exact.csv')

    for rid in df_exact['rid']:
        p_exact = df_exact.loc[rid, 'p_exact']

        row = df_in.loc[rid]

        center_ranking = eval(row['ranking'])
        model = Mallows(center=center_ranking, phi=row['phi'])

        pattern_list = [
            Pattern.from_string(pattern_str)
            for pattern_str in row['patterns'].split(PATTERN_SEP)
        ]

        res = isramp_over_patterns(pattern_list, model)

        print(res, f'p_exact = {p_exact}')
        break
예제 #4
0
def calculate_marginal_prob_over_mallows_by_ltm(pattern: Pattern,
                                                mallows: Mallows,
                                                num_cores=None,
                                                timeout=None):
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    jar_file = cur_dir + '/ltm.jar'
    json_file = cur_dir + '/temp.json'
    ltm_verbose_file = cur_dir + '/ltm_verbose.txt'

    # num_cores is for ltm.jar multi-threading
    num_cores = num_cores or os.cpu_count()

    # represent a pattern by a list of nodes. Each node has three properties {name, items, children}.
    nodes = []
    for node_name, items in pattern.label_to_items.items():
        items_in_node = [int(item) for item in items]
        children_names = [
            f'L-{child}'
            for child in pattern.iter_direct_children_of_label(node_name)
        ]
        nodes.append({
            'name': f'L-{node_name}',
            'items': items_in_node,
            'children': children_names
        })
    # save pattern and Mallows core info in a local JSON file
    print(nodes)
    with open(json_file, 'w') as outfile:
        json.dump(
            {
                'pattern': nodes,
                'center': mallows.center,
                'phi_list': [mallows.phi]
            },
            outfile,
            indent=4)

    # original cmd in terminal is java -Xmx500g -Xms4g -jar ltm.jar temp.json 48 >> out.txt 2>&1
    execute_jar = f'java -Xmx{calculate_jvm_xmx()}g -Xms4g -jar {jar_file} {json_file} {num_cores}'

    with open(ltm_verbose_file, 'a') as outfile:
        try:
            subprocess.run(execute_jar.split(),
                           stdout=outfile,
                           stderr=outfile,
                           timeout=timeout)
            outfile.write('\n')
        except subprocess.TimeoutExpired:
            return False, 0, 0

    with open(json_file, 'r') as file:
        res = json.load(file)

    # # delete temp.json
    # subprocess.run(['rm', json_file])

    return True, res['prob_list'][0], res['runtime(ms)']
예제 #5
0
def get_test_case_of_patterns_from_movielens_linear(rid=0):
    row = pd.read_csv('data/input_movielens_ramp-vs-amp.csv').loc[rid]
    center = eval(row['ranking'])
    mallows = Mallows(center=center, phi=row['phi'])
    patterns = [
        Pattern.from_string(pattern_str)
        for pattern_str in row['patterns'].split(PATTERN_SEP)
    ]

    return patterns, mallows
예제 #6
0
    def __init__(self, sb):
        """Initialization"""
        Deobfuscation.__init__(self, sb)
        self.cfg = cfg.PATTERNS["push_reg"]
        self.pattern = Pattern(self.cfg, fpath=self.sb.fname)

        # deobfuscation
        self.deobfuscation()

        # patch
        self.patch()
예제 #7
0
def calculate_upper_bound_bipartite_pattern(pattern: Pattern):
    pattern.calculate_tc()

    l_labels, r_labels = set(), set()
    label_to_children = {}
    label_to_items = {}
    for l, r in pattern.tc.edges:
        l_name, r_name = f'L-{l}', f'R-{r}'

        if l_name not in l_labels:
            l_labels.add(l_name)
            label_to_items[l_name] = pattern.get_items_in_label(l)

        if r_name not in r_labels:
            r_labels.add(r_name)
            label_to_items[r_name] = pattern.get_items_in_label(r)

        label_to_children.setdefault(l_name, set()).add(r_name)

    return BipartitePattern(label_to_children, label_to_items)
예제 #8
0
def get_test_case_of_patterns_from_movielens_5_labels(rid=0):
    """
    Hard cases for rAMP are 36, 52, 68, 84, 100, 116, 132, 148
    """
    row = pd.read_csv('data/input_movielens_ramp-vs-amp_5_labels.csv').loc[rid]

    mallows = Mallows(center=eval(row['ranking']), phi=row['phi'])
    patterns = [
        Pattern.from_string(pattern_str)
        for pattern_str in row['patterns'].split(' <> ')
    ]

    return patterns, mallows
예제 #9
0
def estimate_pattern_by_misamp1(pattern: Pattern,
                                mallows: Mallows,
                                k=100,
                                threshold=0.01,
                                single_core_workload=50,
                                num_cores=None,
                                verbose=False):
    num_cores = num_cores or cpu_count()
    round_size = single_core_workload * num_cores

    pattern = deepcopy(pattern)
    pattern.calculate_tc()

    if verbose:
        print(f'\nPattern: {pattern}\n{mallows}\n')

    para_tuple = (pattern, mallows, single_core_workload, k)
    prob_max, prob_sum, round_i, start_time = 0, 0, 0, time()
    while True:
        round_i += 1
        num_samples = round_i * round_size

        with Pool(processes=num_cores) as pool:
            res_list = pool.map(worker, [para_tuple for _ in range(num_cores)])

        for (prob_sum_i, prob_max_i) in res_list:
            prob_sum += prob_sum_i
            prob_max = max(prob_max, prob_max_i)

        prob_now = prob_sum / num_samples

        if verbose:
            print(
                f"prob={prob_now}, #samples={num_samples}, convergence={prob_max / prob_sum}"
            )

        if prob_max < threshold * prob_sum:
            runtime = int((time() - start_time) * 1000)
            return prob_now, num_samples, runtime
예제 #10
0
def get_test_case_of_patterns_from_movielens_2_labels(rid=0):
    p_exact = pd.read_csv(
        'data/output_movielens_ramp-vs-amp_2labels_exact.csv').loc[rid,
                                                                   'p_exact']

    row = pd.read_csv('data/input_movielens_ramp-vs-amp_2labels.csv').loc[rid]
    center = eval(row['ranking'])
    mallows = Mallows(center=center, phi=row['phi'])
    patterns = [
        Pattern.from_string(pattern_str)
        for pattern_str in row['patterns'].split(PATTERN_SEP)
    ]

    return patterns, mallows, p_exact
예제 #11
0
def get_test_case_of_patterns_from_synthetic_4_labels(pid=0):
    df_ans = pd.read_csv(
        'data/test_cases_4_labels_sharing_BD_3_subs_convergence_by_ramp_3.csv')
    df_ans = df_ans.groupby('rid').first()
    p_exact = df_ans.loc[pid, 'p_exact']

    row = pd.read_csv(
        'data/test_cases_4_labels_sharing_BD_3_subs.csv').loc[pid]
    patterns_str = row['pref(A>C|A>D|B>D)']
    patterns = [
        Pattern.from_string(pattern_str)
        for pattern_str in patterns_str.split('\n')
    ]
    mallows = Mallows(list(range(row['m'])), row['phi'])
    return patterns, mallows, p_exact
예제 #12
0
def generate_a_large_pattern_from_sub_patterns(patterns: Iterable[Pattern]):
    label_to_children = {}
    label_to_items = {}

    for idx, pattern in enumerate(patterns):

        for parent, children in pattern.label_to_children.items():
            parent_name = f'{idx}-{parent}'
            label_to_children[parent_name] = set()
            for child in children:
                label_to_children[parent_name].add(f'{idx}-{child}')

        for label, items in pattern.label_to_items.items():
            label_to_items[f'{idx}-{label}'] = items

    return Pattern(label_to_children, label_to_items)
def test_linear():
    import pandas as pd
    from core.patterns import PATTERN_SEP

    df_in = pd.read_csv('../../data/input_movielens_ramp-vs-amp.csv').tail()

    for rid, row in df_in.iterrows():
        center_ranking = eval(row['ranking'])
        model = Mallows(center=center_ranking, phi=row['phi'])

        print(f'center ranking  = {center_ranking}')

        pattern_list = [
            Pattern.from_string(pattern_str)
            for pattern_str in row['patterns'].split(PATTERN_SEP)
        ]

        res = isramp_over_patterns(pattern_list, model)

        print(res, f'p_exact = unknown')
        break
예제 #14
0
def get_test_case_of_pattern(pid=0):
    row = pd.read_csv('data/test_cases_label_patterns.csv').iloc[pid]
    pattern = Pattern.from_string(row['pattern'])
    mallows = Mallows(list(range(row['m'])), row['phi'])
    p_exact = e**row['log_p']
    return pattern, mallows, p_exact