コード例 #1
0
    def command_unit(self):
        # from digipal_text.models import TextUnit
        # rs = TextUnit.objects
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        rid = self.get_arg(1)
        fitler = {}
        if rid:
            fitler = {'id': rid}
        ctx = TextContentXML.objects.filter(**fitler).first()

        cnt = 0

        if ctx:
            print ctx
            location_type = self.get_arg(2, 'locus')

            location = self.get_arg(3, None)
            units = get_all_units(ctx.content, location_type)
            
            for unit in units:
                if location is None or dputils.is_unit_in_range(unit['unitid'], location):
                    cnt += 1
                    print '%-10s %-5s %-10s' % (unit['unitid'], len(unit['content']), repr(unit['content'][:10]))
                    if location:
                        print repr(unit['content'])

            print '%s units' % cnt
コード例 #2
0
ファイル: dptext.py プロジェクト: suzypiat/digipal
    def command_unit(self):
        # from digipal_text.models import TextUnit
        # rs = TextUnit.objects
        from digipal_text.models import TextContentXML
        from digipal_text.views.viewer import get_fragment_extent, get_all_units
        rid = self.get_arg(1)
        fitler = {}
        if rid:
            fitler = {'id': rid}
        ctx = TextContentXML.objects.filter(**fitler).first()

        cnt = 0

        if ctx:
            print ctx
            location_type = self.get_arg(2, 'locus')

            location = self.get_arg(3, None)
            units = get_all_units(ctx.content, location_type)

            for unit in units:
                if location is None or dputils.is_unit_in_range(
                        unit['unitid'], location):
                    cnt += 1
                    print '%-10s %-5s %-10s' % (unit['unitid'],
                                                len(unit['content']),
                                                repr(unit['content'][:10]))
                    if location:
                        print repr(unit['content'])

            print '%s units' % cnt
コード例 #3
0
    def segment_units(self):
        self.segunits = []
        
        self.init_variant_patterns()

        t0 = datetime.now()

        # TODO: derive the info from the faceted_search settings.py or from a new
        # settings variable.

        # arguments
        args = self.options
        # unpack hilite
        args['hilite'] = args.get('hilite', '').split(',')
        args['hilite_groups'] = [self.get_group_key_from_pattern_key(self.get_pattern_from_id(pid)['key']) for pid in args['hilite'] if pid and self.get_pattern_from_id(pid)]
        args['ignore'] = args.get('ignore', '')
        args['exclude'] = args.get('exclude', '')
        
        args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10)
        args['urange'] = args.get('urange', '')

        # Get the text units
        hand_filters.chrono('units:')
        self.stats = stats = {'duration_segmentation': 0, 'range_size': 0, 'patterns': {}, 'groups': {}}
        for pattern in self.get_patterns():
            group = re.sub(ur'-\d+$', '', pattern['key'])
            self.stats['groups'][group] = 0

        for unit in self.get_unit_model().objects.filter(content_xml__id=4).iterator():
            # only fief
            types = unit.get_entry_type()
            if not types or 'F' not in types: continue

            # only selected range
            if not dputils.is_unit_in_range(unit.unitid, args['urange']): continue

            stats['range_size'] += 1

            # segment the unit
            self.segment_unit(unit, args)

            if unit.match_conditions:
                self.segunits.append(unit)

        hand_filters.chrono(':units')

        self.variants = [{'text': variant, 'hits': self.variants[variant]} for variant in sorted(self.variants.keys())]

        # stats
        stats['result_size'] = len(self.segunits)
        stats['result_size_pc'] = int(100.0 * stats['result_size'] / stats['range_size']) if stats['range_size'] else 'N/A'

        # limit size of returned result
        if args['ulimit'] > 0:
            self.segunits = self.segunits[0:args['ulimit']]

        stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()
コード例 #4
0
ファイル: patterns.py プロジェクト: suzypiat/digipal
    def segment_units(self):
        self.segunits = []

        self.init_variant_patterns()

        t0 = datetime.now()

        # TODO: derive the info from the faceted_search settings.py or from a new
        # settings variable.

        # arguments
        args = self.options
        # unpack hilite
        args['hilite'] = args.get('hilite', '').split(',')
        args['hilite_groups'] = [
            self.get_group_key_from_pattern_key(
                self.get_pattern_from_id(pid)['key']) for pid in args['hilite']
            if pid and self.get_pattern_from_id(pid)
        ]
        args['ignore'] = args.get('ignore', '')
        args['exclude'] = args.get('exclude', '')

        args['ulimit'] = dputils.get_int(args.get('ulimit', 10), 10)
        args['urange'] = args.get('urange', '')

        # Get the text units
        hand_filters.chrono('units:')
        self.stats = stats = {
            'duration_segmentation': 0,
            'range_size': 0,
            'patterns': {},
            'groups': {}
        }
        for pattern in self.get_patterns():
            group = re.sub(ur'-\d+$', '', pattern['key'])
            self.stats['groups'][group] = 0

        for unit in self.get_unit_model().objects.filter(
                content_xml__id=4).iterator():
            # only fief
            types = unit.get_entry_type()
            if not types or 'F' not in types:
                continue

            # only selected range
            if not dputils.is_unit_in_range(unit.unitid, args['urange']):
                continue

            stats['range_size'] += 1

            # segment the unit
            self.segment_unit(unit, args)

            if unit.match_conditions:
                self.segunits.append(unit)

        hand_filters.chrono(':units')

        self.variants = [{
            'text': variant,
            'hits': self.variants[variant]
        } for variant in sorted(self.variants.keys())]

        # stats
        stats['result_size'] = len(self.segunits)
        stats['result_size_pc'] = int(
            100.0 * stats['result_size'] /
            stats['range_size']) if stats['range_size'] else 'N/A'

        # limit size of returned result
        if args['ulimit'] > 0:
            self.segunits = self.segunits[0:args['ulimit']]

        stats['duration_segmentation'] = (datetime.now() - t0).total_seconds()