Exemple #1
0
def raw_cleanup(raw):
    """
    Cleanup a raw value to perform raw comparison
    :param raw:
    :type raw:
    :return:
    :rtype:
    """
    return formatters(cleanup, strip)(raw.lower())
Exemple #2
0
def raw_cleanup(raw):
    """
    Cleanup a raw value to perform raw comparison
    :param raw:
    :type raw:
    :return:
    :rtype:
    """
    return formatters(cleanup, strip)(raw.lower())
Exemple #3
0
def title():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)

    expected_title = build_expected_function('expected_title')

    rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
                      validator=seps_surround,
                      formatter=formatters(cleanup, reorder_title),
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_title'))

    return rebulk
Exemple #4
0
def title():
    """
    Builder for rebulk object.
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear)

    expected_title = build_expected_function('expected_title')

    rebulk.functional(
        expected_title,
        name='title',
        tags=['expected', 'title'],
        validator=seps_surround,
        formatter=formatters(cleanup, reorder_title),
        conflict_solver=lambda match, other: other,
        disabled=lambda context: not context.get('expected_title'))

    return rebulk
Exemple #5
0
def title(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
    rebulk.rules(TitleFromPosition, PreferTitleWithYear)

    expected_title = build_expected_function('expected_title')

    rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
                      validator=seps_surround,
                      formatter=formatters(cleanup, reorder_title),
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_title'))

    return rebulk
Exemple #6
0
def title(config):  # pylint:disable=unused-argument
    """
    Builder for rebulk object.

    :param config: rule configuration
    :type config: dict
    :return: Created Rebulk object
    :rtype: Rebulk
    """
    rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title'))
    rebulk.rules(TitleFromPosition, PreferTitleWithYear)

    expected_title = build_expected_function('expected_title')

    rebulk.functional(expected_title, name='title', tags=['expected', 'title'],
                      validator=seps_surround,
                      formatter=formatters(cleanup, reorder_title),
                      conflict_solver=lambda match, other: other,
                      disabled=lambda context: not context.get('expected_title'))

    return rebulk
Exemple #7
0
    def check_titles_in_filepart(self, filepart, matches, context):
        """
        Find title in filepart (ignoring language)
        """
        # pylint:disable=too-many-locals,too-many-branches,too-many-statements
        start, end = filepart.span

        holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title),
                              ignore=self.is_ignored,
                              predicate=lambda hole: hole.value)

        holes = self.holes_process(holes, matches)

        for hole in holes:
            # pylint:disable=cell-var-from-loop
            if not hole or (self.hole_filter and not self.hole_filter(hole, matches)):
                continue

            to_remove = []
            to_keep = []

            ignored_matches = matches.range(hole.start, hole.end, self.is_ignored)

            if ignored_matches:
                for ignored_match in reversed(ignored_matches):
                    # pylint:disable=undefined-loop-variable
                    trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match)
                    if trailing:
                        should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False)
                        if should_keep:
                            # pylint:disable=unpacking-non-sequence
                            try:
                                append, crop = should_keep
                            except TypeError:
                                append, crop = should_keep, should_keep
                            if append:
                                to_keep.append(ignored_match)
                            if crop:
                                hole.end = ignored_match.start

                for ignored_match in ignored_matches:
                    if ignored_match not in to_keep:
                        starting = matches.chain_after(hole.start, seps,
                                                       predicate=lambda match: match == ignored_match)
                        if starting:
                            should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True)
                            if should_keep:
                                # pylint:disable=unpacking-non-sequence
                                try:
                                    append, crop = should_keep
                                except TypeError:
                                    append, crop = should_keep, should_keep
                                if append:
                                    to_keep.append(ignored_match)
                                if crop:
                                    hole.start = ignored_match.end

            for match in ignored_matches:
                if self.should_remove(match, matches, filepart, hole, context):
                    to_remove.append(match)
            for keep_match in to_keep:
                to_remove.remove(keep_match)

            if hole and hole.value:
                hole.name = self.match_name
                hole.tags = self.match_tags
                if self.alternative_match_name:
                    # Split and keep values that can be a title
                    titles = hole.split(title_seps, lambda match: match.value)
                    for title_match in list(titles[1:]):
                        previous_title = titles[titles.index(title_match) - 1]
                        separator = matches.input_string[previous_title.end:title_match.start]
                        if len(separator) == 1 and separator == '-' \
                                and previous_title.raw[-1] not in seps \
                                and title_match.raw[0] not in seps:
                            titles[titles.index(title_match) - 1].end = title_match.end
                            titles.remove(title_match)
                        else:
                            title_match.name = self.alternative_match_name

                else:
                    titles = [hole]
                return titles, to_remove
Exemple #8
0
    def check_titles_in_filepart(self, filepart, matches, context):  # pylint:disable=inconsistent-return-statements
        """
        Find title in filepart (ignoring language)
        """
        # pylint:disable=too-many-locals,too-many-branches,too-many-statements
        start, end = filepart.span

        holes = matches.holes(start,
                              end + 1,
                              formatter=formatters(cleanup, reorder_title),
                              ignore=self.is_ignored,
                              predicate=lambda m: m.value)

        holes = self.holes_process(holes, matches)

        for hole in holes:
            if not hole or (self.hole_filter
                            and not self.hole_filter(hole, matches)):
                continue

            to_remove = []
            to_keep = []

            ignored_matches = matches.range(hole.start, hole.end,
                                            self.is_ignored)

            if ignored_matches:
                for ignored_match in reversed(ignored_matches):
                    # pylint:disable=undefined-loop-variable, cell-var-from-loop
                    trailing = matches.chain_before(
                        hole.end, seps, predicate=lambda m: m == ignored_match)
                    if trailing:
                        should_keep = self.should_keep(ignored_match, to_keep,
                                                       matches, filepart, hole,
                                                       False)
                        if should_keep:
                            # pylint:disable=unpacking-non-sequence
                            try:
                                append, crop = should_keep
                            except TypeError:
                                append, crop = should_keep, should_keep
                            if append:
                                to_keep.append(ignored_match)
                            if crop:
                                hole.end = ignored_match.start

                for ignored_match in ignored_matches:
                    if ignored_match not in to_keep:
                        starting = matches.chain_after(
                            hole.start,
                            seps,
                            predicate=lambda m: m == ignored_match)
                        if starting:
                            should_keep = self.should_keep(
                                ignored_match, to_keep, matches, filepart,
                                hole, True)
                            if should_keep:
                                # pylint:disable=unpacking-non-sequence
                                try:
                                    append, crop = should_keep
                                except TypeError:
                                    append, crop = should_keep, should_keep
                                if append:
                                    to_keep.append(ignored_match)
                                if crop:
                                    hole.start = ignored_match.end

            for match in ignored_matches:
                if self.should_remove(match, matches, filepart, hole, context):
                    to_remove.append(match)
            for keep_match in to_keep:
                if keep_match in to_remove:
                    to_remove.remove(keep_match)

            if hole and hole.value:
                hole.name = self.match_name
                hole.tags = self.match_tags
                if self.alternative_match_name:
                    # Split and keep values that can be a title
                    titles = hole.split(title_seps, lambda m: m.value)
                    for title_match in list(titles[1:]):
                        previous_title = titles[titles.index(title_match) - 1]
                        separator = matches.input_string[previous_title.
                                                         end:title_match.start]
                        if len(separator) == 1 and separator == '-' \
                                and previous_title.raw[-1] not in seps \
                                and title_match.raw[0] not in seps:
                            titles[titles.index(title_match) -
                                   1].end = title_match.end
                            titles.remove(title_match)
                        else:
                            title_match.name = self.alternative_match_name

                else:
                    titles = [hole]
                return titles, to_remove