Esempio n. 1
0
def tokenize(data):
    for text in data['items']:
        if not reader.in_2008(text):
            continue
        txt = re.sub(r'<[^>]*>', '', text['textContent'])
        txt = re.sub(r'-\n', '', txt)
        found = re.findall(r'\b[a-zA-ZąęłćżźóńśŚĄĘŁĆŻŹÓŃ]+\b', txt)
        found = [x.lower() for x in found]
        words.extend(found)
Esempio n. 2
0
def tag_judgment(data):
    global count
    for text in data['items']:
        if not reader.in_2008(text):
            continue
        count += 1
        response = requests.post(url, text['textContent'].encode('utf-8'))
        with open('tagged.txt', 'a') as f:
            f.writelines(response.text)
Esempio n. 3
0
def find_references(data):
    for item in data['items']:
        if not reader.in_2008(item):
            continue
        for reference in item['referencedRegulations']:
            found = re.findall(
                r'(\bUstawa\b\s+\bz\b\s+\bdnia\b\s+\b23\b\s+\bkwietnia\b\s+\b1964\b\s+r\.\s+-\s+\bKodeks\b\s+\bcywilny\b)'
                r'[\S\s]+(\bart.\s+\b445\b)', reference['text'])
            f.extend(found)
Esempio n. 4
0
def find_szkoda(data):
    for text in data['items']:
        if not reader.in_2008(text):
            continue
        found = re.findall(r'\b[Ss]zk[oó]d\w{0,3}\b', text['textContent'])

        for word in found:
            if proper(word):
                words.append(word)
                break
Esempio n. 5
0
def find_values(data):
    for text in data['items']:
        if not reader.in_2008(text):
            continue
        # liczba taka jak '1203120' lub '123,1541.123,11321' lub '123 2100 3210 3210'
        # jednostka 'mld' | 'mln' | 'tys' lub kwota slownie '(kwota slownie)' lub stare w postaci: '(starych|stare) lub 'starych|stare' wystepujace max 3 razy
        # zlotych lub zlote na samym koncu
        found = re.findall(
            r'((\b\d+[\d,]*\b)|(\b\d+\s?[\d.]*,?\d{2}?\b)|(\b\d+[\d\s]+,?\d{2}?\b))\s?'
            r'((\bmld\b\s?)|(\bmln\b\s?)|(\btys\b\s?)|(\b\(?star\w{1,3}\)?\b\s?)|(\([\w\s]+\)\s?)){0,3}'
            r'((\bzłot\w{1,3}\b)|(\bzł\.?\b))', text['textContent'])
        for matched in found:
            value = parse_value(matched)
            money.append(value)
Esempio n. 6
0
def save_in_es(data):
    global id
    for text in data['items']:
        if not reader.in_2008(text):
            continue

        judges = []
        for judge in text['judges']:
            judges.append(judge['name'])
        judgment_doc = {
            'text': text['textContent'],
            'judgmentDate': parse(text['judgmentDate']),
            'caseNumber': text['courtCases'][0]['caseNumber'],
            'judges.name': judges
        }
        print(judges)
        res = es.index(index='lab',
                       doc_type='judgment',
                       body=judgment_doc,
                       id=id)
        print(res)
        id += 1
Esempio n. 7
0
def load_judgments(data):
    for jsondoc in data['items']:
        if not reader.in_2008(jsondoc):
            continue
        all_jugdments.append(jsondoc)