예제 #1
0
def get_detail_context(corp,
                       pos,
                       hitlen=1,
                       detail_left_ctx=40,
                       detail_right_ctx=40,
                       attrs=None,
                       structs='',
                       detail_ctx_incr=60):
    data = {}
    corpus_encoding = corp.get_conf('ENCODING')
    wrapdetail = corp.get_conf('WRAPDETAIL')
    if wrapdetail:
        data['wrapdetail'] = '<%s>' % wrapdetail
        if not wrapdetail in structs.split(','):
            data['deletewrap'] = True
        structs = wrapdetail + ',' + structs
    else:
        data['wrapdetail'] = ''
    try:
        maxdetail = int(corp.get_conf('MAXDETAIL'))
        if maxdetail == 0:
            maxdetail = int(corp.get_conf('MAXCONTEXT'))
            if maxdetail == 0:
                maxdetail = sys.maxint
    except:
        maxdetail = 0
    if maxdetail:
        if detail_left_ctx > maxdetail:
            detail_left_ctx = maxdetail
        if detail_right_ctx > maxdetail:
            detail_right_ctx = maxdetail
    if detail_left_ctx > pos:
        detail_left_ctx = pos
    query_attrs = 'word' if attrs is None else ','.join(attrs)
    cr = manatee.CorpRegion(corp, query_attrs, structs)
    region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos))
    region_kwic = tokens2strclass(cr.region(pos, pos + hitlen))
    region_right = tokens2strclass(
        cr.region(pos + hitlen, pos + hitlen + detail_right_ctx))
    for seg in region_left + region_kwic + region_right:
        seg['str'] = import_string(seg['str'].replace('===NONE===', ''),
                                   from_encoding=corpus_encoding)
    for seg in region_kwic:
        if not seg['class']:
            seg['class'] = 'coll'
    data['content'] = region_left + region_kwic + region_right
    refbase = [('pos', pos)]
    if hitlen != 1:
        refbase.append(('hitlen', hitlen))
    data['expand_left_args'] = dict(
        refbase + [('detail_left_ctx', detail_left_ctx +
                    detail_ctx_incr), ('detail_right_ctx', detail_right_ctx)])
    data['expand_right_args'] = dict(refbase +
                                     [('detail_left_ctx', detail_left_ctx),
                                      ('detail_right_ctx',
                                       detail_right_ctx + detail_ctx_incr)])
    data['righttoleft'] = corp.get_conf('RIGHTTOLEFT')
    data['pos'] = pos
    data['maxdetail'] = maxdetail
    return data
예제 #2
0
def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40,
                       addattrs=None, structs='', detail_ctx_incr=60):
    data = {}
    if addattrs is None:
        addattrs = []
    corpus_encoding = corp.get_conf('ENCODING')
    wrapdetail = corp.get_conf('WRAPDETAIL')
    if wrapdetail:
        data['wrapdetail'] = '<%s>' % wrapdetail
        if not wrapdetail in structs.split(','):
            data['deletewrap'] = True
        structs = wrapdetail + ',' + structs
    else:
        data['wrapdetail'] = ''
    try:
        maxdetail = int(corp.get_conf('MAXDETAIL'))
        if maxdetail == 0:
            maxdetail = int(corp.get_conf('MAXCONTEXT'))
            if maxdetail == 0:
                maxdetail = sys.maxint
    except:
        maxdetail = 0
    if maxdetail:
        if detail_left_ctx > maxdetail:
            detail_left_ctx = maxdetail
        if detail_right_ctx > maxdetail:
            detail_right_ctx = maxdetail
    if detail_left_ctx > pos:
        detail_left_ctx = pos
    attrs = ','.join(['word'] + addattrs)
    cr = manatee.CorpRegion(corp, attrs, structs)
    region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos))
    region_kwic = tokens2strclass(cr.region(pos, pos + hitlen))
    region_right = tokens2strclass(cr.region(pos + hitlen,
                                             pos + hitlen + detail_right_ctx))
    for seg in region_left + region_kwic + region_right:
        seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding)
    for seg in region_kwic:
        if not seg['class']:
            seg['class'] = 'coll'
    data['content'] = region_left + region_kwic + region_right
    refbase = 'pos=%i&' % pos
    if hitlen != 1:
        refbase += 'hitlen=%i&' % hitlen
    data['leftlink'] = refbase + ('detail_left_ctx=%i&detail_right_ctx=%i'
                                  % (detail_left_ctx + detail_ctx_incr,
                                     detail_right_ctx))
    data['rightlink'] = refbase + ('detail_left_ctx=%i&detail_right_ctx=%i'
                                   % (detail_left_ctx,
                                      detail_right_ctx + detail_ctx_incr))
    data['righttoleft'] = corp.get_conf('RIGHTTOLEFT')
    data['pos'] = pos
    data['maxdetail'] = maxdetail
    return data
예제 #3
0
파일: conclib.py 프로젝트: simar0at/kontext
def get_detail_context(
    corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40, addattrs=None, structs="", detail_ctx_incr=60
):
    data = {}
    if addattrs is None:
        addattrs = []
    corpus_encoding = corp.get_conf("ENCODING")
    wrapdetail = corp.get_conf("WRAPDETAIL")
    if wrapdetail:
        data["wrapdetail"] = "<%s>" % wrapdetail
        if not wrapdetail in structs.split(","):
            data["deletewrap"] = True
        structs = wrapdetail + "," + structs
    else:
        data["wrapdetail"] = ""
    try:
        maxdetail = int(corp.get_conf("MAXDETAIL"))
        if maxdetail == 0:
            maxdetail = int(corp.get_conf("MAXCONTEXT"))
            if maxdetail == 0:
                maxdetail = sys.maxint
    except:
        maxdetail = 0
    if maxdetail:
        if detail_left_ctx > maxdetail:
            detail_left_ctx = maxdetail
        if detail_right_ctx > maxdetail:
            detail_right_ctx = maxdetail
    if detail_left_ctx > pos:
        detail_left_ctx = pos
    attrs = ",".join(["word"] + addattrs)
    cr = manatee.CorpRegion(corp, attrs, structs)
    region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos))
    region_kwic = tokens2strclass(cr.region(pos, pos + hitlen))
    region_right = tokens2strclass(cr.region(pos + hitlen, pos + hitlen + detail_right_ctx))
    for seg in region_left + region_kwic + region_right:
        seg["str"] = import_string(seg["str"].replace("===NONE===", ""), from_encoding=corpus_encoding)
    for seg in region_kwic:
        if not seg["class"]:
            seg["class"] = "coll"
    data["content"] = region_left + region_kwic + region_right
    refbase = "pos=%i&" % pos
    if hitlen != 1:
        refbase += "hitlen=%i&" % hitlen
    data["leftlink"] = refbase + (
        "detail_left_ctx=%i&detail_right_ctx=%i" % (detail_left_ctx + detail_ctx_incr, detail_right_ctx)
    )
    data["rightlink"] = refbase + (
        "detail_left_ctx=%i&detail_right_ctx=%i" % (detail_left_ctx, detail_right_ctx + detail_ctx_incr)
    )
    data["righttoleft"] = corp.get_conf("RIGHTTOLEFT")
    data["pos"] = pos
    data["maxdetail"] = maxdetail
    return data
예제 #4
0
def get_detail_context(corp, pos, hitlen=1, detail_left_ctx=40, detail_right_ctx=40,
                       attrs=None, structs='', detail_ctx_incr=60):
    data = {}
    corpus_encoding = corp.get_conf('ENCODING')
    wrapdetail = corp.get_conf('WRAPDETAIL')
    if wrapdetail:
        data['wrapdetail'] = '<%s>' % wrapdetail
        if not wrapdetail in structs.split(','):
            data['deletewrap'] = True
        structs = wrapdetail + ',' + structs
    else:
        data['wrapdetail'] = ''
    try:
        maxdetail = int(corp.get_conf('MAXDETAIL'))
        if maxdetail == 0:
            maxdetail = int(corp.get_conf('MAXCONTEXT'))
            if maxdetail == 0:
                maxdetail = sys.maxint
    except:
        maxdetail = 0
    if maxdetail:
        if detail_left_ctx > maxdetail:
            detail_left_ctx = maxdetail
        if detail_right_ctx > maxdetail:
            detail_right_ctx = maxdetail
    if detail_left_ctx > pos:
        detail_left_ctx = pos
    query_attrs = 'word' if attrs is None else ','.join(attrs)
    cr = manatee.CorpRegion(corp, query_attrs, structs)
    region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos))
    region_kwic = tokens2strclass(cr.region(pos, pos + hitlen))
    region_right = tokens2strclass(cr.region(pos + hitlen,
                                             pos + hitlen + detail_right_ctx))
    for seg in region_left + region_kwic + region_right:
        seg['str'] = import_string(seg['str'].replace('===NONE===', ''), from_encoding=corpus_encoding)
    for seg in region_kwic:
        if not seg['class']:
            seg['class'] = 'coll'
    data['content'] = region_left + region_kwic + region_right
    refbase = [('pos', pos)]
    if hitlen != 1:
        refbase.append(('hitlen', hitlen))
    data['expand_left_args'] = dict(refbase + [('detail_left_ctx', detail_left_ctx + detail_ctx_incr),
                                    ('detail_right_ctx', detail_right_ctx)])
    data['expand_right_args'] = dict(refbase + [('detail_left_ctx', detail_left_ctx),
                                     ('detail_right_ctx', detail_right_ctx + detail_ctx_incr)])
    data['righttoleft'] = corp.get_conf('RIGHTTOLEFT')
    data['pos'] = pos
    data['maxdetail'] = maxdetail
    return data
예제 #5
0
    def test_tokens2strclass(self):
        # notes: class1..class2 (2 whitespaces), 'bar  ' (trailing whitespace)
        data = ('foo', '{class1  class2}', 'bar  ', '{class3 class4}', 'last one', '{class5}')
        output = kwiclib.tokens2strclass(data)

        self.assertEqual(len(output), 3)
        self.assertEqual(output[0].get('str'), 'foo')
        self.assertEqual(output[0].get('class'), 'class1  class2')
        self.assertEqual(output[1].get('str'), 'bar  ')
        self.assertEqual(output[1].get('class'), 'class3 class4')
        self.assertEqual(output[2].get('str'), 'last one')
        self.assertEqual(output[2].get('class'), 'class5')
예제 #6
0
def get_detail_context(corp,
                       pos,
                       hitlen=1,
                       detail_left_ctx=40,
                       detail_right_ctx=40,
                       addattrs=None,
                       structs='',
                       detail_ctx_incr=60):
    data = {}
    if addattrs is None:
        addattrs = []
    corpus_encoding = corp.get_conf('ENCODING')
    wrapdetail = corp.get_conf('WRAPDETAIL')
    if wrapdetail:
        data['wrapdetail'] = '<%s>' % wrapdetail
        if not wrapdetail in structs.split(','):
            data['deletewrap'] = True
        structs = wrapdetail + ',' + structs
    else:
        data['wrapdetail'] = ''
    try:
        maxdetail = int(corp.get_conf('MAXDETAIL'))
        if maxdetail == 0:
            maxdetail = int(corp.get_conf('MAXCONTEXT'))
            if maxdetail == 0:
                maxdetail = sys.maxint
    except:
        maxdetail = 0
    if maxdetail:
        if detail_left_ctx > maxdetail:
            detail_left_ctx = maxdetail
        if detail_right_ctx > maxdetail:
            detail_right_ctx = maxdetail
    if detail_left_ctx > pos:
        detail_left_ctx = pos
    attrs = ','.join(['word'] + addattrs)
    cr = manatee.CorpRegion(corp, attrs, structs)
    region_left = tokens2strclass(cr.region(pos - detail_left_ctx, pos))
    region_kwic = tokens2strclass(cr.region(pos, pos + hitlen))
    region_right = tokens2strclass(
        cr.region(pos + hitlen, pos + hitlen + detail_right_ctx))
    for seg in region_left + region_kwic + region_right:
        seg['str'] = import_string(seg['str'].replace('===NONE===', ''),
                                   from_encoding=corpus_encoding)
    for seg in region_kwic:
        if not seg['class']:
            seg['class'] = 'coll'
    data['content'] = region_left + region_kwic + region_right
    refbase = 'pos=%i&' % pos
    if hitlen != 1:
        refbase += 'hitlen=%i&' % hitlen
    data['leftlink'] = refbase + (
        'detail_left_ctx=%i&detail_right_ctx=%i' %
        (detail_left_ctx + detail_ctx_incr, detail_right_ctx))
    data['rightlink'] = refbase + (
        'detail_left_ctx=%i&detail_right_ctx=%i' %
        (detail_left_ctx, detail_right_ctx + detail_ctx_incr))
    data['righttoleft'] = corp.get_conf('RIGHTTOLEFT')
    data['pos'] = pos
    data['maxdetail'] = maxdetail
    return data