def raw_cleanup(raw): """ Cleanup a raw value to perform raw comparison :param raw: :type raw: :return: :rtype: """ return formatters(cleanup, strip)(raw.lower())
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional(expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def title(): """ Builder for rebulk object. :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk().rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional( expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def title(config): # pylint:disable=unused-argument """ Builder for rebulk object. :param config: rule configuration :type config: dict :return: Created Rebulk object :rtype: Rebulk """ rebulk = Rebulk(disabled=lambda context: is_disabled(context, 'title')) rebulk.rules(TitleFromPosition, PreferTitleWithYear) expected_title = build_expected_function('expected_title') rebulk.functional(expected_title, name='title', tags=['expected', 'title'], validator=seps_surround, formatter=formatters(cleanup, reorder_title), conflict_solver=lambda match, other: other, disabled=lambda context: not context.get('expected_title')) return rebulk
def check_titles_in_filepart(self, filepart, matches, context): """ Find title in filepart (ignoring language) """ # pylint:disable=too-many-locals,too-many-branches,too-many-statements start, end = filepart.span holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title), ignore=self.is_ignored, predicate=lambda hole: hole.value) holes = self.holes_process(holes, matches) for hole in holes: # pylint:disable=cell-var-from-loop if not hole or (self.hole_filter and not self.hole_filter(hole, matches)): continue to_remove = [] to_keep = [] ignored_matches = matches.range(hole.start, hole.end, self.is_ignored) if ignored_matches: for ignored_match in reversed(ignored_matches): # pylint:disable=undefined-loop-variable trailing = matches.chain_before(hole.end, seps, predicate=lambda match: match == ignored_match) if trailing: should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False) if should_keep: # pylint:disable=unpacking-non-sequence try: append, crop = should_keep except TypeError: append, crop = should_keep, should_keep if append: to_keep.append(ignored_match) if crop: hole.end = ignored_match.start for ignored_match in ignored_matches: if ignored_match not in to_keep: starting = matches.chain_after(hole.start, seps, predicate=lambda match: match == ignored_match) if starting: should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, True) if should_keep: # pylint:disable=unpacking-non-sequence try: append, crop = should_keep except TypeError: append, crop = should_keep, should_keep if append: to_keep.append(ignored_match) if crop: hole.start = ignored_match.end for match in ignored_matches: if self.should_remove(match, matches, filepart, hole, context): to_remove.append(match) for keep_match in to_keep: to_remove.remove(keep_match) if hole and hole.value: hole.name = self.match_name hole.tags = self.match_tags if self.alternative_match_name: # Split and keep values that can be a title titles = hole.split(title_seps, lambda match: match.value) for title_match in list(titles[1:]): previous_title = titles[titles.index(title_match) - 1] separator = matches.input_string[previous_title.end:title_match.start] if len(separator) == 1 and separator == '-' \ and previous_title.raw[-1] not in seps \ and title_match.raw[0] not in seps: titles[titles.index(title_match) - 1].end = title_match.end titles.remove(title_match) else: title_match.name = self.alternative_match_name else: titles = [hole] return titles, to_remove
def check_titles_in_filepart(self, filepart, matches, context): # pylint:disable=inconsistent-return-statements """ Find title in filepart (ignoring language) """ # pylint:disable=too-many-locals,too-many-branches,too-many-statements start, end = filepart.span holes = matches.holes(start, end + 1, formatter=formatters(cleanup, reorder_title), ignore=self.is_ignored, predicate=lambda m: m.value) holes = self.holes_process(holes, matches) for hole in holes: if not hole or (self.hole_filter and not self.hole_filter(hole, matches)): continue to_remove = [] to_keep = [] ignored_matches = matches.range(hole.start, hole.end, self.is_ignored) if ignored_matches: for ignored_match in reversed(ignored_matches): # pylint:disable=undefined-loop-variable, cell-var-from-loop trailing = matches.chain_before( hole.end, seps, predicate=lambda m: m == ignored_match) if trailing: should_keep = self.should_keep(ignored_match, to_keep, matches, filepart, hole, False) if should_keep: # pylint:disable=unpacking-non-sequence try: append, crop = should_keep except TypeError: append, crop = should_keep, should_keep if append: to_keep.append(ignored_match) if crop: hole.end = ignored_match.start for ignored_match in ignored_matches: if ignored_match not in to_keep: starting = matches.chain_after( hole.start, seps, predicate=lambda m: m == ignored_match) if starting: should_keep = self.should_keep( ignored_match, to_keep, matches, filepart, hole, True) if should_keep: # pylint:disable=unpacking-non-sequence try: append, crop = should_keep except TypeError: append, crop = should_keep, should_keep if append: to_keep.append(ignored_match) if crop: hole.start = ignored_match.end for match in ignored_matches: if self.should_remove(match, matches, filepart, hole, context): to_remove.append(match) for keep_match in to_keep: if keep_match in to_remove: to_remove.remove(keep_match) if hole and hole.value: hole.name = self.match_name hole.tags = self.match_tags if self.alternative_match_name: # Split and keep values that can be a title titles = hole.split(title_seps, lambda m: m.value) for title_match in list(titles[1:]): previous_title = titles[titles.index(title_match) - 1] separator = matches.input_string[previous_title. end:title_match.start] if len(separator) == 1 and separator == '-' \ and previous_title.raw[-1] not in seps \ and title_match.raw[0] not in seps: titles[titles.index(title_match) - 1].end = title_match.end titles.remove(title_match) else: title_match.name = self.alternative_match_name else: titles = [hole] return titles, to_remove