def derive_depths(marker_list, additional_constraints=[]):
    """Use constraint programming to derive the paragraph depths associated
    with a list of paragraph markers. Additional constraints (e.g. expected
    marker types, etc.) can also be added. Such constraints are functions of
    two parameters, the constraint function (problem.addConstraint) and a
    list of all variables"""
    if not marker_list:
        return []
    problem = Problem()

    # Marker type per marker
    problem.addVariables(["type" + str(i) for i in range(len(marker_list))],
                         markers.types)
    # Index within the marker list
    problem.addVariables(["idx" + str(i) for i in range(len(marker_list))],
                         range(51))
    # Depth in the tree, with an arbitrary limit of 10
    problem.addVariables(["depth" + str(i) for i in range(len(marker_list))],
                         range(10))
    all_vars = []
    for i in range(len(marker_list)):
        all_vars.extend(['type' + str(i), 'idx' + str(i), 'depth' + str(i)])

    # Always start at depth 0
    problem.addConstraint(rules.must_be(0), ("depth0", ))

    for idx, marker in enumerate(marker_list):
        idx_str = str(idx)
        problem.addConstraint(rules.type_match(marker),
                              ("type" + idx_str, "idx" + idx_str))

        prior_params = ['type' + idx_str, 'idx' + idx_str, 'depth' + idx_str]
        for i in range(idx):
            prior_params += ['type' + str(i), 'idx' + str(i), 'depth' + str(i)]

        problem.addConstraint(rules.same_type, prior_params)
        problem.addConstraint(rules.diff_type, prior_params)

    # @todo: There's probably efficiency gains to making these rules over
    # prefixes (see above) rather than over the whole collection at once
    problem.addConstraint(rules.same_depth_same_type, all_vars)
    problem.addConstraint(rules.stars_occupy_space, all_vars)

    for constraint in additional_constraints:
        constraint(problem.addConstraint, all_vars)

    return [Solution(solution) for solution in problem.getSolutions()]
Example #2
0
def derive_depths(marker_list, additional_constraints=[]):
    """Use constraint programming to derive the paragraph depths associated
    with a list of paragraph markers. Additional constraints (e.g. expected
    marker types, etc.) can also be added. Such constraints are functions of
    two parameters, the constraint function (problem.addConstraint) and a
    list of all variables"""
    if not marker_list:
        return []
    problem = Problem()

    # Marker type per marker
    problem.addVariables(["type" + str(i) for i in range(len(marker_list))],
                         markers.types)
    # Index within the marker list
    problem.addVariables(["idx" + str(i) for i in range(len(marker_list))],
                         range(51))
    # Depth in the tree, with an arbitrary limit of 10
    problem.addVariables(["depth" + str(i) for i in range(len(marker_list))],
                         range(10))
    all_vars = []
    for i in range(len(marker_list)):
        all_vars.extend(['type' + str(i), 'idx' + str(i), 'depth' + str(i)])

    # Always start at depth 0
    problem.addConstraint(rules.must_be(0), ("depth0",))

    for idx, marker in enumerate(marker_list):
        idx_str = str(idx)
        problem.addConstraint(rules.type_match(marker),
                              ("type" + idx_str, "idx" + idx_str))

        prior_params = ['type' + idx_str, 'idx' + idx_str, 'depth' + idx_str]
        for i in range(idx):
            prior_params += ['type' + str(i), 'idx' + str(i), 'depth' + str(i)]

        problem.addConstraint(rules.same_type, prior_params)
        problem.addConstraint(rules.diff_type, prior_params)

    # @todo: There's probably efficiency gains to making these rules over
    # prefixes (see above) rather than over the whole collection at once
    problem.addConstraint(rules.same_depth_same_type, all_vars)
    problem.addConstraint(rules.stars_occupy_space, all_vars)

    for constraint in additional_constraints:
        constraint(problem.addConstraint, all_vars)

    return [Solution(solution) for solution in problem.getSolutions()]
Example #3
0
def derive_depths(original_markers, additional_constraints=[]):
    """Use constraint programming to derive the paragraph depths associated
    with a list of paragraph markers. Additional constraints (e.g. expected
    marker types, etc.) can also be added. Such constraints are functions of
    two parameters, the constraint function (problem.addConstraint) and a
    list of all variables"""
    if not original_markers:
        return []
    problem = Problem()
    marker_list = _compress_markerless(original_markers)

    # Depth in the tree, with an arbitrary limit of 10
    problem.addVariables(["depth" + str(i) for i in range(len(marker_list))],
                         range(10))

    # Always start at depth 0
    problem.addConstraint(rules.must_be(0), ("depth0",))

    all_vars = []
    for idx, marker in enumerate(marker_list):
        type_var = "type{}".format(idx)
        depth_var = "depth{}".format(idx)
        # Index within the marker list. Though this variable is redundant, it
        # makes the code easier to understand and doesn't have a significant
        # performance penalty
        idx_var = "idx{}".format(idx)

        typ_opts = [t for t in markers.types if marker in t]
        idx_opts = [i for t in typ_opts for i in range(len(t))
                    if t[i] == marker]
        problem.addVariable(type_var, typ_opts)
        problem.addVariable(idx_var, idx_opts)

        problem.addConstraint(rules.type_match(marker), [type_var, idx_var])
        all_vars.extend([type_var, idx_var, depth_var])

        if idx > 0:
            pairs = all_vars[3*(idx-1):]
            problem.addConstraint(rules.depth_check, pairs)

        if idx > 1:
            pairs = all_vars[3*(idx-2):]
            problem.addConstraint(rules.markerless_sandwich, pairs)
            problem.addConstraint(rules.star_sandwich, pairs)

    # separate loop so that the simpler checks run first
    for idx in range(1, len(marker_list)):
        # start with the current idx
        params = all_vars[3*idx:3*(idx+1)]
        # then add on all previous
        params += all_vars[:3*idx]
        problem.addConstraint(rules.sequence, params)

    # @todo: There's probably efficiency gains to making these rules over
    # prefixes (see above) rather than over the whole collection at once
    problem.addConstraint(rules.same_parent_same_type, all_vars)
    problem.addConstraint(rules.stars_occupy_space, all_vars)

    for constraint in additional_constraints:
        constraint(problem.addConstraint, all_vars)

    solutions = []
    for assignment in problem.getSolutionIter():
        assignment = _decompress_markerless(assignment, original_markers)
        solutions.append(Solution(assignment))
    return solutions
Example #4
0
def derive_depths(original_markers, additional_constraints=[]):
    """Use constraint programming to derive the paragraph depths associated
    with a list of paragraph markers. Additional constraints (e.g. expected
    marker types, etc.) can also be added. Such constraints are functions of
    two parameters, the constraint function (problem.addConstraint) and a
    list of all variables"""
    if not original_markers:
        return []
    problem = Problem()
    marker_list = _compress_markerless(original_markers)

    # Depth in the tree, with an arbitrary limit of 10
    problem.addVariables(["depth" + str(i) for i in range(len(marker_list))],
                         range(10))

    # Always start at depth 0
    problem.addConstraint(rules.must_be(0), ("depth0", ))

    all_vars = []
    for idx, marker in enumerate(marker_list):
        type_var = "type{}".format(idx)
        depth_var = "depth{}".format(idx)
        # Index within the marker list. Though this variable is redundant, it
        # makes the code easier to understand and doesn't have a significant
        # performance penalty
        idx_var = "idx{}".format(idx)

        typ_opts = [t for t in markers.types if marker in t]
        idx_opts = [
            i for t in typ_opts for i in range(len(t)) if t[i] == marker
        ]
        problem.addVariable(type_var, typ_opts)
        problem.addVariable(idx_var, idx_opts)

        problem.addConstraint(rules.type_match(marker), [type_var, idx_var])
        all_vars.extend([type_var, idx_var, depth_var])

        if idx > 0:
            pairs = all_vars[3 * (idx - 1):]
            problem.addConstraint(pair_rules, pairs)

        if idx > 1:
            pairs = all_vars[3 * (idx - 2):]
            problem.addConstraint(rules.triplet_tests, pairs)

    # separate loop so that the simpler checks run first
    for idx in range(1, len(marker_list)):
        # start with the current idx
        params = all_vars[3 * idx:3 * (idx + 1)]
        # then add on all previous
        params += all_vars[:3 * idx]
        problem.addConstraint(rules.continue_previous_seq, params)

    # @todo: There's probably efficiency gains to making these rules over
    # prefixes (see above) rather than over the whole collection at once
    problem.addConstraint(rules.same_parent_same_type, all_vars)
    problem.addConstraint(rules.stars_occupy_space, all_vars)

    for constraint in additional_constraints:
        constraint(problem.addConstraint, all_vars)

    solutions = []
    for assignment in problem.getSolutionIter():
        assignment = _decompress_markerless(assignment, original_markers)
        solutions.append(Solution(assignment))
    return solutions