def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, attrs=None, structs='', detail_ctx_incr=60): data = {} corpus_encoding = corp.get_conf('ENCODING') wrapdetail = corp.get_conf('WRAPDETAIL') if wrapdetail: data['wrapdetail'] = '<%s>' % wrapdetail if not wrapdetail in structs.split(','): data['deletewrap'] = True structs = wrapdetail + ',' + structs else: data['wrapdetail'] = '' try: maxdetail = int(corp.get_conf('MAXDETAIL')) if maxdetail == 0: maxdetail = int(corp.get_conf('MAXCONTEXT')) if maxdetail == 0: maxdetail = sys.maxint except: maxdetail = 0 if maxdetail: if detail_left_ctx > maxdetail: detail_left_ctx = maxdetail if detail_right_ctx > maxdetail: detail_right_ctx = maxdetail if detail_left_ctx > pos: detail_left_ctx = pos query_attrs = 'word' if attrs is None else ','.join(attrs) cr = manatee.CorpRegion(corp, query_attrs, structs) region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos)) region_kwic = tokens2strclass(cr.region(pos, pos + hitlen)) region_right = tokens2strclass( cr.region(pos + hitlen, pos + hitlen + detail_right_ctx)) for seg in region_left + region_kwic + region_right: seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding) for seg in region_kwic: if not seg['class']: seg['class'] = 'coll' data['content'] = region_left + region_kwic + region_right refbase = [('pos', pos)] if hitlen != 1: refbase.append(('hitlen', hitlen)) data['expand_left_args'] = dict( refbase + [('detail_left_ctx', detail_left_ctx + detail_ctx_incr), ('detail_right_ctx', detail_right_ctx)]) data['expand_right_args'] = dict(refbase + [('detail_left_ctx', detail_left_ctx), ('detail_right_ctx', detail_right_ctx + detail_ctx_incr)]) data['righttoleft'] = corp.get_conf('RIGHTTOLEFT') data['pos'] = pos data['maxdetail'] = maxdetail return data
def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, addattrs=None, structs='', detail_ctx_incr=60): data = {} if addattrs is None: addattrs = [] corpus_encoding = corp.get_conf('ENCODING') wrapdetail = corp.get_conf('WRAPDETAIL') if wrapdetail: data['wrapdetail'] = '<%s>' % wrapdetail if not wrapdetail in structs.split(','): data['deletewrap'] = True structs = wrapdetail + ',' + structs else: data['wrapdetail'] = '' try: maxdetail = int(corp.get_conf('MAXDETAIL')) if maxdetail == 0: maxdetail = int(corp.get_conf('MAXCONTEXT')) if maxdetail == 0: maxdetail = sys.maxint except: maxdetail = 0 if maxdetail: if detail_left_ctx > maxdetail: detail_left_ctx = maxdetail if detail_right_ctx > maxdetail: detail_right_ctx = maxdetail if detail_left_ctx > pos: detail_left_ctx = pos attrs = ','.join(['word'] + addattrs) cr = manatee.CorpRegion(corp, attrs, structs) region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos)) region_kwic = tokens2strclass(cr.region(pos, pos + hitlen)) region_right = tokens2strclass(cr.region(pos + hitlen, pos + hitlen + detail_right_ctx)) for seg in region_left + region_kwic + region_right: seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding) for seg in region_kwic: if not seg['class']: seg['class'] = 'coll' data['content'] = region_left + region_kwic + region_right refbase = 'pos=%i&' % pos if hitlen != 1: refbase += 'hitlen=%i&' % hitlen data['leftlink'] = refbase + ('detail_left_ctx=%i&detail_right_ctx=%i' % (detail_left_ctx + detail_ctx_incr, detail_right_ctx)) data['rightlink'] = refbase + ('detail_left_ctx=%i&detail_right_ctx=%i' % (detail_left_ctx, detail_right_ctx + detail_ctx_incr)) data['righttoleft'] = corp.get_conf('RIGHTTOLEFT') data['pos'] = pos data['maxdetail'] = maxdetail return data
def get_detail_context( corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, addattrs=None, structs="", detail_ctx_incr=60 ): data = {} if addattrs is None: addattrs = [] corpus_encoding = corp.get_conf("ENCODING") wrapdetail = corp.get_conf("WRAPDETAIL") if wrapdetail: data["wrapdetail"] = "<%s>" % wrapdetail if not wrapdetail in structs.split(","): data["deletewrap"] = True structs = wrapdetail + "," + structs else: data["wrapdetail"] = "" try: maxdetail = int(corp.get_conf("MAXDETAIL")) if maxdetail == 0: maxdetail = int(corp.get_conf("MAXCONTEXT")) if maxdetail == 0: maxdetail = sys.maxint except: maxdetail = 0 if maxdetail: if detail_left_ctx > maxdetail: detail_left_ctx = maxdetail if detail_right_ctx > maxdetail: detail_right_ctx = maxdetail if detail_left_ctx > pos: detail_left_ctx = pos attrs = ",".join(["word"] + addattrs) cr = manatee.CorpRegion(corp, attrs, structs) region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos)) region_kwic = tokens2strclass(cr.region(pos, pos + hitlen)) region_right = tokens2strclass(cr.region(pos + hitlen, pos + hitlen + detail_right_ctx)) for seg in region_left + region_kwic + region_right: seg["str"] = import_string(seg["str"].replace("===NONE===", ""), from_encoding=corpus_encoding) for seg in region_kwic: if not seg["class"]: seg["class"] = "coll" data["content"] = region_left + region_kwic + region_right refbase = "pos=%i&" % pos if hitlen != 1: refbase += "hitlen=%i&" % hitlen data["leftlink"] = refbase + ( "detail_left_ctx=%i&detail_right_ctx=%i" % (detail_left_ctx + detail_ctx_incr, detail_right_ctx) ) data["rightlink"] = refbase + ( "detail_left_ctx=%i&detail_right_ctx=%i" % (detail_left_ctx, detail_right_ctx + detail_ctx_incr) ) data["righttoleft"] = corp.get_conf("RIGHTTOLEFT") data["pos"] = pos data["maxdetail"] = maxdetail return data
def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, attrs=None, structs='', detail_ctx_incr=60): data = {} corpus_encoding = corp.get_conf('ENCODING') wrapdetail = corp.get_conf('WRAPDETAIL') if wrapdetail: data['wrapdetail'] = '<%s>' % wrapdetail if not wrapdetail in structs.split(','): data['deletewrap'] = True structs = wrapdetail + ',' + structs else: data['wrapdetail'] = '' try: maxdetail = int(corp.get_conf('MAXDETAIL')) if maxdetail == 0: maxdetail = int(corp.get_conf('MAXCONTEXT')) if maxdetail == 0: maxdetail = sys.maxint except: maxdetail = 0 if maxdetail: if detail_left_ctx > maxdetail: detail_left_ctx = maxdetail if detail_right_ctx > maxdetail: detail_right_ctx = maxdetail if detail_left_ctx > pos: detail_left_ctx = pos query_attrs = 'word' if attrs is None else ','.join(attrs) cr = manatee.CorpRegion(corp, query_attrs, structs) region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos)) region_kwic = tokens2strclass(cr.region(pos, pos + hitlen)) region_right = tokens2strclass(cr.region(pos + hitlen, pos + hitlen + detail_right_ctx)) for seg in region_left + region_kwic + region_right: seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding) for seg in region_kwic: if not seg['class']: seg['class'] = 'coll' data['content'] = region_left + region_kwic + region_right refbase = [('pos', pos)] if hitlen != 1: refbase.append(('hitlen', hitlen)) data['expand_left_args'] = dict(refbase + [('detail_left_ctx', detail_left_ctx + detail_ctx_incr), ('detail_right_ctx', detail_right_ctx)]) data['expand_right_args'] = dict(refbase + [('detail_left_ctx', detail_left_ctx), ('detail_right_ctx', detail_right_ctx + detail_ctx_incr)]) data['righttoleft'] = corp.get_conf('RIGHTTOLEFT') data['pos'] = pos data['maxdetail'] = maxdetail return data
def test_tokens2strclass(self): # notes: class1..class2 (2 whitespaces), 'bar ' (trailing whitespace) data = ('foo', '{class1 class2}', 'bar ', '{class3 class4}', 'last one', '{class5}') output = kwiclib.tokens2strclass(data) self.assertEqual(len(output), 3) self.assertEqual(output[0].get('str'), 'foo') self.assertEqual(output[0].get('class'), 'class1 class2') self.assertEqual(output[1].get('str'), 'bar ') self.assertEqual(output[1].get('class'), 'class3 class4') self.assertEqual(output[2].get('str'), 'last one') self.assertEqual(output[2].get('class'), 'class5')
def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, addattrs=None, structs='', detail_ctx_incr=60): data = {} if addattrs is None: addattrs = [] corpus_encoding = corp.get_conf('ENCODING') wrapdetail = corp.get_conf('WRAPDETAIL') if wrapdetail: data['wrapdetail'] = '<%s>' % wrapdetail if not wrapdetail in structs.split(','): data['deletewrap'] = True structs = wrapdetail + ',' + structs else: data['wrapdetail'] = '' try: maxdetail = int(corp.get_conf('MAXDETAIL')) if maxdetail == 0: maxdetail = int(corp.get_conf('MAXCONTEXT')) if maxdetail == 0: maxdetail = sys.maxint except: maxdetail = 0 if maxdetail: if detail_left_ctx > maxdetail: detail_left_ctx = maxdetail if detail_right_ctx > maxdetail: detail_right_ctx = maxdetail if detail_left_ctx > pos: detail_left_ctx = pos attrs = ','.join(['word'] + addattrs) cr = manatee.CorpRegion(corp, attrs, structs) region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos)) region_kwic = tokens2strclass(cr.region(pos, pos + hitlen)) region_right = tokens2strclass( cr.region(pos + hitlen, pos + hitlen + detail_right_ctx)) for seg in region_left + region_kwic + region_right: seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding) for seg in region_kwic: if not seg['class']: seg['class'] = 'coll' data['content'] = region_left + region_kwic + region_right refbase = 'pos=%i&' % pos if hitlen != 1: refbase += 'hitlen=%i&' % hitlen data['leftlink'] = refbase + ( 'detail_left_ctx=%i&detail_right_ctx=%i' % (detail_left_ctx + detail_ctx_incr, detail_right_ctx)) data['rightlink'] = refbase + ( 'detail_left_ctx=%i&detail_right_ctx=%i' % (detail_left_ctx, detail_right_ctx + detail_ctx_incr)) data['righttoleft'] = corp.get_conf('RIGHTTOLEFT') data['pos'] = pos data['maxdetail'] = maxdetail return data