예제 #1
0
파일: overview.py 프로젝트: kcl-ddh/digipal
 def get_int_from_locus(self, x):
     # returns integer from a locus,
     # e.g. 2v => 2 * 2 + 1
     # returns 0 if not a number (e.g. unumbered)
     n = utils.get_int(re.sub(ur'^(\d+).*$', ur'\1', x), 0) * 2
     if len(x) and 'v' in x[-1]:
         n += 1
     return n
예제 #2
0
    def segment_units(self):
        self.segunits = []
        
        self.init_variant_patterns()

        t0 = datetime.now()

        # TODO: derive the info from the faceted_search settings.py or from a new
        # settings variable.

        # arguments
        args = self.options
        # unpack hilite
        args['hilite'] = args.get('hilite', '').split(',')
        args['hilite_groups'] = [self.get_group_key_from_pattern_key(self.get_pattern_from_id(pid)['key']) for pid in args['hilite'] if pid and self.get_pattern_from_id(pid)]
        args['ignore'] = args.get('ignore', '')
        args['exclude'] = args.get('exclude', '')
        
        args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10)
        args['urange'] = args.get('urange', '')

        # Get the text units
        hand_filters.chrono('units:')
        self.stats = stats = {'duration_segmentation': 0, 'range_size': 0, 'patterns': {}, 'groups': {}}
        for pattern in self.get_patterns():
            group = re.sub(ur'-\d+$', '', pattern['key'])
            self.stats['groups'][group] = 0

        for unit in self.get_unit_model().objects.filter(content_xml__id=4).iterator():
            # only fief
            types = unit.get_entry_type()
            if not types or 'F' not in types: continue

            # only selected range
            if not dputils.is_unit_in_range(unit.unitid, args['urange']): continue

            stats['range_size'] += 1

            # segment the unit
            self.segment_unit(unit, args)

            if unit.match_conditions:
                self.segunits.append(unit)

        hand_filters.chrono(':units')

        self.variants = [{'text': variant, 'hits': self.variants[variant]} for variant in sorted(self.variants.keys())]

        # stats
        stats['result_size'] = len(self.segunits)
        stats['result_size_pc'] = int(100.0 * stats['result_size'] / stats['range_size']) if stats['range_size'] else 'N/A'

        # limit size of returned result
        if args['ulimit'] > 0:
            self.segunits = self.segunits[0:args['ulimit']]

        stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()
예제 #3
0
        def replace(match):
            # !!! ASSUME pb is not in <p> or anything else

            number = re.sub(ur'^.*"([^"]+)".*$', ur'\1', match.group(1))
            if len(number) == len(match.group(1)):
                number = self.rep_option

            ret = u'<p><span data-dpt="location" data-dpt-loctype="locus">%s</span></p>' % number

            self.rep_option = get_int(number, default=self.rep_option) + 1

            return ret
예제 #4
0
파일: dptext.py 프로젝트: suzypiat/digipal
        def replace(match):
            # !!! ASSUME pb is not in <p> or anything else

            number = re.sub(ur'^.*"([^"]+)".*$', ur'\1', match.group(1))
            if len(number) == len(match.group(1)):
                number = self.rep_option

            ret = u'<p><span data-dpt="location" data-dpt-loctype="locus">%s</span></p>' % number

            self.rep_option = get_int(number, default=self.rep_option) + 1

            return ret
예제 #5
0
    def segment_units(self):
        self.segunits = []

        self.init_variant_patterns()

        t0 = datetime.now()

        # TODO: derive the info from the faceted_search settings.py or from a new
        # settings variable.

        # arguments
        args = self.options
        # unpack hilite
        args['hilite'] = args.get('hilite', '').split(',')
        args['hilite_groups'] = [
            self.get_group_key_from_pattern_key(
                self.get_pattern_from_id(pid)['key']) for pid in args['hilite']
            if pid and self.get_pattern_from_id(pid)
        ]
        args['ignore'] = args.get('ignore', '')
        args['exclude'] = args.get('exclude', '')

        args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10)
        args['urange'] = args.get('urange', '')

        # Get the text units
        hand_filters.chrono('units:')
        self.stats = stats = {
            'duration_segmentation': 0,
            'range_size': 0,
            'patterns': {},
            'groups': {}
        }
        for pattern in self.get_patterns():
            group = re.sub(ur'-\d+$', '', pattern['key'])
            self.stats['groups'][group] = 0

        for unit in self.get_unit_model().objects.filter(
                content_xml__id=4).iterator():
            # only fief
            types = unit.get_entry_type()
            if not types or 'F' not in types:
                continue

            # only selected range
            if not dputils.is_unit_in_range(unit.unitid, args['urange']):
                continue

            stats['range_size'] += 1

            # segment the unit
            self.segment_unit(unit, args)

            if unit.match_conditions:
                self.segunits.append(unit)

        hand_filters.chrono(':units')

        self.variants = [{
            'text': variant,
            'hits': self.variants[variant]
        } for variant in sorted(self.variants.keys())]

        # stats
        stats['result_size'] = len(self.segunits)
        stats['result_size_pc'] = int(
            100.0 * stats['result_size'] /
            stats['range_size']) if stats['range_size'] else 'N/A'

        # limit size of returned result
        if args['ulimit'] > 0:
            self.segunits = self.segunits[0:args['ulimit']]

        stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()