def get_int_from_locus(self, x): # returns integer from a locus, # e.g. 2v => 2 * 2 + 1 # returns 0 if not a number (e.g. unumbered) n = utils.get_int(re.sub(ur'^(\d+).*$', ur'\1', x), 0) * 2 if len(x) and 'v' in x[-1]: n += 1 return n
def segment_units(self): self.segunits = [] self.init_variant_patterns() t0 = datetime.now() # TODO: derive the info from the faceted_search settings.py or from a new # settings variable. # arguments args = self.options # unpack hilite args['hilite'] = args.get('hilite', '').split(',') args['hilite_groups'] = [self.get_group_key_from_pattern_key(self.get_pattern_from_id(pid)['key']) for pid in args['hilite'] if pid and self.get_pattern_from_id(pid)] args['ignore'] = args.get('ignore', '') args['exclude'] = args.get('exclude', '') args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10) args['urange'] = args.get('urange', '') # Get the text units hand_filters.chrono('units:') self.stats = stats = {'duration_segmentation': 0, 'range_size': 0, 'patterns': {}, 'groups': {}} for pattern in self.get_patterns(): group = re.sub(ur'-\d+$', '', pattern['key']) self.stats['groups'][group] = 0 for unit in self.get_unit_model().objects.filter(content_xml__id=4).iterator(): # only fief types = unit.get_entry_type() if not types or 'F' not in types: continue # only selected range if not dputils.is_unit_in_range(unit.unitid, args['urange']): continue stats['range_size'] += 1 # segment the unit self.segment_unit(unit, args) if unit.match_conditions: self.segunits.append(unit) hand_filters.chrono(':units') self.variants = [{'text': variant, 'hits': self.variants[variant]} for variant in sorted(self.variants.keys())] # stats stats['result_size'] = len(self.segunits) stats['result_size_pc'] = int(100.0 * stats['result_size'] / stats['range_size']) if stats['range_size'] else 'N/A' # limit size of returned result if args['ulimit'] > 0: self.segunits = self.segunits[0:args['ulimit']] stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()
def replace(match): # !!! ASSUME pb is not in <p> or anything else number = re.sub(ur'^.*"([^"]+)".*$', ur'\1', match.group(1)) if len(number) == len(match.group(1)): number = self.rep_option ret = u'<p><span data-dpt="location" data-dpt-loctype="locus">%s</span></p>' % number self.rep_option = get_int(number, default=self.rep_option) + 1 return ret
def segment_units(self): self.segunits = [] self.init_variant_patterns() t0 = datetime.now() # TODO: derive the info from the faceted_search settings.py or from a new # settings variable. # arguments args = self.options # unpack hilite args['hilite'] = args.get('hilite', '').split(',') args['hilite_groups'] = [ self.get_group_key_from_pattern_key( self.get_pattern_from_id(pid)['key']) for pid in args['hilite'] if pid and self.get_pattern_from_id(pid) ] args['ignore'] = args.get('ignore', '') args['exclude'] = args.get('exclude', '') args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10) args['urange'] = args.get('urange', '') # Get the text units hand_filters.chrono('units:') self.stats = stats = { 'duration_segmentation': 0, 'range_size': 0, 'patterns': {}, 'groups': {} } for pattern in self.get_patterns(): group = re.sub(ur'-\d+$', '', pattern['key']) self.stats['groups'][group] = 0 for unit in self.get_unit_model().objects.filter( content_xml__id=4).iterator(): # only fief types = unit.get_entry_type() if not types or 'F' not in types: continue # only selected range if not dputils.is_unit_in_range(unit.unitid, args['urange']): continue stats['range_size'] += 1 # segment the unit self.segment_unit(unit, args) if unit.match_conditions: self.segunits.append(unit) hand_filters.chrono(':units') self.variants = [{ 'text': variant, 'hits': self.variants[variant] } for variant in sorted(self.variants.keys())] # stats stats['result_size'] = len(self.segunits) stats['result_size_pc'] = int( 100.0 * stats['result_size'] / stats['range_size']) if stats['range_size'] else 'N/A' # limit size of returned result if args['ulimit'] > 0: self.segunits = self.segunits[0:args['ulimit']] stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()