def merge_small_near_uniform_vandermonde_generator(
            left_zone_elements,
            right_zone_elements,
            choice_number,
            max_space_size=2000000000):
        left_zone_length = len(left_zone_elements)
        right_zone_length = len(right_zone_elements)

        if choice_number > left_zone_length + right_zone_length:
            raise RuntimeError(
                "choice_number > left_zone_length + right_zone_length")

        for left_choice_number in range(choice_number + 1):
            right_choice_number = choice_number - left_choice_number
            if left_choice_number > left_zone_length or right_choice_number > right_zone_length:
                continue

            sub_space_size = CombinaryCounter.compute_comb(left_zone_length, left_choice_number) * \
                             CombinaryCounter.compute_comb(right_zone_length, right_choice_number)

            if sub_space_size / max_space_size < 100:
                left_split = False
                slice = (left_split, left_choice_number, left_zone_length,
                         right_choice_number)
                yield slice
            else:
                left_split = True
                split_iter = itertools.combinations(left_zone_elements,
                                                    left_choice_number)
                for lce in split_iter:
                    left_choice = list(copy.deepcopy(lce))
                    slice = (left_split, left_choice, left_zone_length,
                             right_choice_number)
                    yield slice
        return True
def yanghui_split_checker(max_elements_size=10):
    all_elements = {i for i in range(max_elements_size)}
    split_size = random.randint(0, max_elements_size + 1)
    choice_elements = {
        random.randint(-1, max_elements_size)
        for i in range(split_size)
    }
    print("split elements: ", choice_elements)
    for choice_number in range(max_elements_size + 1):
        total_search_number = CombinaryCounter.compute_comb(
            max_elements_size, choice_number)
        slices_search_number = 0
        # all_elements = copy.deepcopy(elements)
        is_split, searching_slices = CombinationSearchingSpaceSplitter.yanghui_split(
            all_elements, choice_number, choice_elements)
        for ts in searching_slices:
            slices_search_number += CombinaryCounter.compute_comb(
                len(ts[1]), ts[2])
        msg_text = "C(%d, %d), search slices number %d: real = %d, slices sum = %d, is same %s, is split %s" % (
            max_elements_size, choice_number, len(searching_slices),
            total_search_number, slices_search_number,
            str(total_search_number == slices_search_number), str(is_split))
        print(msg_text)

        if total_search_number != slices_search_number:
            raise RuntimeError(msg_text)
def near_uniform_vandermonde_generator_checker(max_elements_size=10):
    elements = [i for i in range(max_elements_size)]
    left_length = max_elements_size // 3
    if left_length > 12:
        left_length = 12
    left_zone = elements[0:left_length]
    right_zone = elements[left_length:]
    for choice_number in range(max_elements_size + 1):
        total_search_number = CombinaryCounter.compute_comb(
            max_elements_size, choice_number)
        slices_search_number = 0
        searching_slices = CombinationSearchingSpaceSplitter.near_uniform_vandermonde_generator(
            left_zone, right_zone, choice_number, 20)
        slice_cnt = 0
        for ts in searching_slices:
            slice_cnt += 1
            ts_size = CombinaryCounter.compute_comb(len(ts[1]), ts[2])
            print(ts, "size: ", ts_size)
            slices_search_number += ts_size

        msg_text = "C(%d, %d), search slices number %d: real = %d, slices sum = %d, is same %s" % (
            max_elements_size, choice_number, slice_cnt, total_search_number,
            slices_search_number,
            str(total_search_number == slices_search_number))
        print(msg_text)

        if total_search_number != slices_search_number:
            raise RuntimeError(msg_text)
    def near_uniform_vandermonde_generator(left_zone_elements,
                                           right_zone_elements,
                                           choice_number,
                                           max_space_size=2000000000):
        spaces = list()
        spaces.append(
            (list(), left_zone_elements, right_zone_elements, choice_number))

        while len(spaces) > 0:
            new_spaces = list()

            for sp in spaces:
                space_slices = CombinationSearchingSpaceSplitter.vandermonde_generator(
                    sp[1], sp[2], sp[3])
                for s_slice in space_slices:
                    space_size = CombinaryCounter.compute_comb(
                        len(s_slice[1]), s_slice[2])
                    if space_size <= max_space_size:
                        s_slice[0].extend(sp[0])
                        yield s_slice
                    else:
                        new_all_zone = s_slice[1]
                        new_left_zone = new_all_zone[0:s_slice[2]]
                        new_right_zone = new_all_zone[s_slice[2]:]
                        s_slice[0].extend(sp[0])
                        new_ts = (s_slice[0], new_left_zone, new_right_zone,
                                  s_slice[2])
                        new_spaces.append(new_ts)

            spaces = new_spaces
def yanghui_triangle_number_sets(nse_isets, original_left_isets, all_isets, pick_number):
    skip_number = 0
    task_slices = list()
    remain_nse_isets = nse_isets.difference(original_left_isets)
    if len(remain_nse_isets) == 0:
        skip_number = CombinaryCounter.compute_comb(len(all_isets), pick_number)
        return skip_number, task_slices

    if remain_nse_isets.issubset(all_isets) and len(remain_nse_isets) <= pick_number:
        nse_size = len(remain_nse_isets)
        remain_nse_isets = list(remain_nse_isets)
        eliminate_atoms = set()
        right_zone_isets = copy.deepcopy(all_isets)
        for i in range(nse_size + 1):
            if i == nse_size:
                skip_number = CombinaryCounter.compute_comb(len(right_zone_isets), pick_number - nse_size)
            else:
                left_isets = copy.deepcopy(eliminate_atoms)
                eliminate_atoms.add(remain_nse_isets[i])
                right_zone_isets.remove(remain_nse_isets[i])
                right_isets_number = pick_number - len(left_isets)
                left_isets = left_isets.union(original_left_isets)
                task_item = (left_isets, copy.deepcopy(right_zone_isets), right_isets_number)
                task_slices.append(task_item)
    else:
        task_item = (original_left_isets, all_isets, pick_number)
        task_slices.append(task_item)

    real = CombinaryCounter.compute_comb(len(all_isets), pick_number)
    compute = skip_number
    for ti in task_slices:
        compute += CombinaryCounter.compute_comb(len(ti[1]), ti[2])

    # print(real, compute, compute == real)
    #
    # print("skip number ", skip_number)
    # print("compute tasks: ")
    # for ti in task_slices:
    #     print(ti)
    #
    # if compute != real:
    #     raise RuntimeError("wrong case: ", pick_number)

    return skip_number, task_slices
Exemple #6
0
def compute_search_space_size(search_isets_size, choice_number):
    max_sub_space_size = 100000000000
    left_zone_size = 12
    right_zone_size = search_isets_size - left_zone_size
    space_size = CombinaryCounter.compute_comb(search_isets_size, choice_number)
    sub_spaces_size_sum = 0
    for i in range(choice_number + 1):
        left_choice_size = CombinaryCounter.compute_comb(left_zone_size, i)
        right_choice_size = CombinaryCounter.compute_comb(right_zone_size, choice_number - i)
        subspace_size = left_choice_size * right_choice_size
        sub_spaces_size_sum += subspace_size
        bigger = subspace_size > max_sub_space_size
        ratio = subspace_size / max_sub_space_size
        print("i = %d, subspace size = %d, is bigger than max size: %s, %.3f" % (i, subspace_size, str(bigger), ratio))
        if ratio > 100:
            right_bigger = right_choice_size > max_sub_space_size
            right_ratio = right_choice_size / max_sub_space_size
            print("\t right choice size %d, is bigger than max size: %s, %.3f" % (right_choice_size, right_bigger, right_ratio))

    if space_size != sub_spaces_size_sum:
        print("wrong case!")
 def meta_data_checker(k_size, m_size, n_size):
     key = I4RawSearchMaster.get_kmn_meta_key(k_size, m_size, n_size)
     meta = I4RawSearchMaster.i4_meta[key]
     i4_iset_size = len(meta)
     for i in range(1, i4_iset_size):
         real_tuple_size = CombinaryCounter.compute_comb(
             i4_iset_size - 1, i)
         non_semi_valid_tuple_size = meta[i]
         left = real_tuple_size - non_semi_valid_tuple_size
         print(
             "choose %d elements, has %d tuples, %d non-semi-valid tuples, remain %d tuples"
             % (i, real_tuple_size, non_semi_valid_tuple_size, left))
    def process_one_nse_subpart_task_slice(cls, nse_isets, task_slice):
        """
        :param cls:
        :param nse_isets:
        :param task_slice: (left_iset_ids, right_zone_iset_ids, right_zone_choice_number)
        :return:
        """
        original_left_isets = set(task_slice[0])
        remained_nse_isets = nse_isets.difference(original_left_isets)
        yang_task_slices = list()

        if len(remained_nse_isets) == 0:
            skip_number = CombinaryCounter.compute_comb(
                len(task_slice[1]), task_slice[2])
            return skip_number, yang_task_slices

        if not remained_nse_isets.issubset(task_slice[1]):
            skip_number = 0
            yang_task_slices.append(task_slice)
            return skip_number, yang_task_slices

        nse_isets_size = len(remained_nse_isets)
        right_zone_isets = task_slice[1].difference(remained_nse_isets)
        v_generator = CombinationSearchingSpaceSplitter.vandermonde_generator(
            remained_nse_isets, right_zone_isets, task_slice[2])

        skip_number = 0
        for slice in v_generator:
            if len(slice[0]) == nse_isets_size:
                skip_number += CombinaryCounter.compute_comb(
                    len(slice[1]), slice[2])
                continue

            for a in original_left_isets:
                slice[0].append(a)

            new_slice = (set(slice[0]), set(slice[1]), slice[2])
            yang_task_slices.append(new_slice)

        return skip_number, yang_task_slices
def yanghui_triangle_number_sets_2(minmal_i4_isets_tuples, left_iset_ids, right_zone_isets, right_iset_number):
    left_iset_ids = set(left_iset_ids)
    right_zone_isets = set(right_zone_isets)

    task_slices = [(left_iset_ids, right_zone_isets, right_iset_number)]
    skip_task_number = 0
    cnt = 0
    for nse in minmal_i4_isets_tuples:
        nse_new_task_slices = list()
        for ts in task_slices:
            ts_skip_task_number, new_task_slices = yanghui_triangle_number_sets(nse, *ts)
            skip_task_number += ts_skip_task_number
            nse_new_task_slices.extend(new_task_slices)
        task_slices = nse_new_task_slices

        cnt += 1
        print("nse %d: " % cnt, nse)
        for ts in task_slices:
            print("\t", ts)
        print("\n")


    real = CombinaryCounter.compute_comb(len(right_zone_isets), right_iset_number)
    compute = skip_task_number
    for ti in task_slices:
        compute += CombinaryCounter.compute_comb(len(ti[1]), ti[2])

    print(real, compute, compute == real)

    print("skip number ", skip_task_number)
    print("compute tasks: ")
    for ti in task_slices:
        print(ti)

    if compute != real:
        raise RuntimeError("wrong case: ", pick_number)

    return skip_task_number, task_slices
    def eliminate_one_nse_condition(cls, nse_isets, original_left_isets,
                                    all_isets, pick_number):
        skip_number = 0
        task_slices = list()

        remain_nse_isets = nse_isets.difference(original_left_isets)

        if len(remain_nse_isets) == 0:
            skip_number = CombinaryCounter.compute_comb(
                len(all_isets), pick_number)
            return skip_number, task_slices

        if remain_nse_isets.issubset(
                all_isets) and len(remain_nse_isets) <= pick_number:
            nse_size = len(remain_nse_isets)
            remain_nse_isets = list(remain_nse_isets)
            eliminate_atoms = set()
            right_zone_isets = copy.deepcopy(all_isets)
            for i in range(nse_size + 1):
                if i == nse_size:
                    skip_number = CombinaryCounter.compute_comb(
                        len(right_zone_isets), pick_number - nse_size)
                else:
                    left_isets = copy.deepcopy(eliminate_atoms)
                    eliminate_atoms.add(remain_nse_isets[i])
                    right_zone_isets.remove(remain_nse_isets[i])
                    right_isets_number = pick_number - len(left_isets)
                    left_isets = left_isets.union(original_left_isets)
                    task_item = (left_isets, copy.deepcopy(right_zone_isets),
                                 right_isets_number)
                    task_slices.append(task_item)
        else:
            task_item = (original_left_isets, all_isets, pick_number)
            task_slices.append(task_item)

        return skip_number, task_slices
    def process_semi_valid_task_slices(cls, itask_id, itask, task_slice):
        left_isets = task_slice[0]
        right_zone_isets = task_slice[1]
        right_zone_choice_number = task_slice[2]
        ne_iset_number = len(left_isets) + right_zone_choice_number
        search_i4_isets = set(itask.meta_data.search_i4_composed_iset_ids)
        skip_number = 0
        new_task_slices = list()

        right_zone_i4_isets = right_zone_isets.intersection(search_i4_isets)
        if len(right_zone_i4_isets) == 0:
            v_generator = [task_slice]
        else:
            right_zone_non_i4_isets = right_zone_isets.difference(
                right_zone_i4_isets)
            v_generator = CombinationSearchingSpaceSplitter.vandermonde_generator(
                right_zone_i4_isets, right_zone_non_i4_isets,
                right_zone_choice_number)

        for ts in v_generator:
            new_left_ids = left_isets.union(set(ts[0]))
            is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset(
                4, new_left_ids, itask.rule_number,
                itask.is_use_extended_rules)
            if is_contain_semi_valid_rule:
                skip_number += CombinaryCounter.compute_comb(len(ts[1]), ts[2])
            else:
                new_task_slices.append((new_left_ids, set(ts[1]), ts[2]))

        valid_skip_result = None
        if skip_number > 0:
            valid_skip_result = (itask_id, ne_iset_number, 0, skip_number,
                                 skip_number)
            # print("valid skip ", valid_skip_result)
            # result_queue.put(result_tuple)

        return new_task_slices, valid_skip_result
Exemple #12
0
 def init_task_numbers(self):
     unknown_iset_number = len(self.meta_data.search_space_iset_ids)
     for i in range(self.min_ne, self.max_ne + 1):
         task_number = CombinaryCounter.compute_comb(unknown_iset_number, i)
         self.task_total_number += task_number
         self.hierarchical_task_number[i] = task_number
    def itask_slices_generator(cls, isc_config_file="isets-tasks.json"):
        msg_text = "%s init task slices generator ..." % str(cls)
        logging.info(msg_text)
        msg.send_message(msg_text)

        SearchWorkerQueueManger.register("get_task_queue")
        SearchWorkerQueueManger.register("get_result_queue")
        manager = SearchWorkerQueueManger(address=(config.task_host,
                                                   config.task_host_port),
                                          authkey=bytes(config.task_host_key,
                                                        encoding="utf-8"))
        manager.connect()
        task_queue = manager.get_task_queue()
        result_queue = manager.get_result_queue()

        isc_tasks_cfg = ITaskConfig(isc_config_file)
        isc_tasks = isc_tasks_cfg.isc_tasks

        for tid in range(len(isc_tasks)):
            it = isc_tasks[tid]
            min_ne = it.min_ne
            max_ne = it.max_ne
            unknown_iset_number = len(it.meta_data.search_space_iset_ids)
            rule_number = it.rule_number

            left_zone_iset_ids = it.meta_data.search_i4_composed_iset_ids

            left_zone_length = len(left_zone_iset_ids)
            right_zone_length = unknown_iset_number - left_zone_length
            is_use_extended_rules = it.is_use_extended_rules

            for i in range(min_ne, max_ne + 1):
                ne_iset_number = i
                for left_iset_number in range(ne_iset_number + 1):
                    right_iset_number = ne_iset_number - left_iset_number
                    if left_iset_number > left_zone_length or right_iset_number > right_zone_length:
                        continue

                    task_iter = itertools.combinations(left_zone_iset_ids,
                                                       left_iset_number)
                    for left_ti in task_iter:
                        left_iset_ids = list(left_ti)
                        is_contain_semi_valid_rule = iscm.check_contain_rules_without_i_n_iset(
                            4, left_iset_ids, rule_number,
                            is_use_extended_rules)

                        if is_contain_semi_valid_rule:
                            check_cnt = 0
                            # C(right_zone_length, right_iset_number)
                            task_number = CombinaryCounter.compute_comb(
                                right_zone_length, right_iset_number)
                            semi_valid_skip_cnt = task_number
                            stat_item = (ITaskSignal.stat_signal, tid,
                                         ne_iset_number, check_cnt,
                                         task_number, semi_valid_skip_cnt,
                                         None)
                            result_queue.put(stat_item)
                        else:
                            task_item = (tid, (ne_iset_number,
                                               set(left_zone_iset_ids),
                                               left_iset_ids))
                            # print(task_item)
                            task_queue.put(task_item)

        working_hosts_number = 5
        for i in range(working_hosts_number * 200):
            task_queue.put((ITaskSignal.kill_signal, -1))
        logging.info("all itasks has been dispatched")
    def itask_slice_generator_by_i4_meta(ne_iset_number, itask_id, itask,
                                         max_space_size, manager_tuple):
        task_queue = manager_tuple[1]
        result_queue = manager_tuple[3]

        kmn_key = I4RawSearchMaster.get_kmn_meta_key(*itask.k_m_n)
        i4_meta = I4RawSearchMaster.i4_meta[kmn_key]

        left_zone_length = len(itask.meta_data.search_i4_composed_iset_ids)
        search_isets_length = len(itask.meta_data.search_space_iset_ids)
        right_zone_length = search_isets_length - left_zone_length

        task_slice_cnt = 0
        if ne_iset_number <= right_zone_length:
            semi_valid_i4_slices_size = CombinaryCounter.compute_comb(
                right_zone_length, ne_iset_number)
            valid_skip_number = CombinaryCounter.compute_comb(
                right_zone_length, ne_iset_number)
            result_tuple = (ITaskSignal.stat_signal, itask_id, ne_iset_number,
                            0, valid_skip_number, valid_skip_number, None)
            result_queue.put(result_tuple)

        for left_choice in range(1, left_zone_length + 1):
            right_choice = ne_iset_number - left_choice
            if right_choice > right_zone_length or left_choice > ne_iset_number:
                continue

            single_slice_right_task_number = CombinaryCounter.compute_comb(
                right_zone_length, right_choice)
            task_i4_slice_number = max_space_size // single_slice_right_task_number + 1
            non_semi_valid_i4_slices_size = i4_meta[left_choice]
            itask_sizes = non_semi_valid_i4_slices_size // task_i4_slice_number

            if itask_sizes < 92 or itask_sizes > 1200:
                itask_sizes = 1200
                if itask_sizes > non_semi_valid_i4_slices_size:
                    itask_sizes = non_semi_valid_i4_slices_size

            task_i4_slice_number = non_semi_valid_i4_slices_size // itask_sizes

            itask_splitting_points = [
                i * task_i4_slice_number for i in range(itask_sizes)
            ]
            if len(itask_splitting_points) == 0:
                itask_splitting_points.append(0)

            if itask_splitting_points[-1] < non_semi_valid_i4_slices_size:
                itask_splitting_points.append(non_semi_valid_i4_slices_size)

            for i in range(1, len(itask_splitting_points)):
                itask_slice_tuple = (left_choice,
                                     itask_splitting_points[i - 1],
                                     itask_splitting_points[i], right_choice)
                itask_slice_tuple = (itask_id, itask_slice_tuple)
                task_queue.put(itask_slice_tuple)
                task_slice_cnt += 1

            total_i4_silces_size = CombinaryCounter.compute_comb(
                left_zone_length, left_choice)
            semi_valid_i4_slices_size = total_i4_silces_size - non_semi_valid_i4_slices_size
            if semi_valid_i4_slices_size > 0:
                valid_skip_number = semi_valid_i4_slices_size * single_slice_right_task_number
                result_tuple = (ITaskSignal.stat_signal, itask_id,
                                ne_iset_number, 0, valid_skip_number,
                                valid_skip_number, None)
                result_queue.put(result_tuple)

        msg_text = "itask %d-%d-%d ne iset number %d, put %d task slices" % (
            *itask.k_m_n, ne_iset_number, task_slice_cnt)
        logging.info(msg_text)
        msg.send_message(msg_text)