Python parse Beispiele, pattern.nl.parse Python Beispiele

Beispiel #1

0

Datei anzeigen

 def test_parse(self):
     # Assert parsed output with Penn Treebank II tags (slash-formatted).
     # 1) "de zwarte kat" is a noun phrase, "op de mat" is a prepositional
     # noun phrase.
     v = nl.parser.parse("De zwarte kat zat op de mat.")
     self.assertEqual(
         v, "De/DT/B-NP/O zwarte/JJ/I-NP/O kat/NN/I-NP/O " +
         "zat/VBD/B-VP/O " +
         "op/IN/B-PP/B-PNP de/DT/B-NP/I-PNP mat/NN/I-NP/I-PNP ././O/O")
     # 2) "jaagt" and "vogels" lemmata are "jagen" and "vogel".
     v = nl.parser.parse("De zwarte kat jaagt op vogels.", lemmata=True)
     self.assertEqual(
         v, "De/DT/B-NP/O/de zwarte/JJ/I-NP/O/zwart kat/NN/I-NP/O/kat " +
         "jaagt/VBZ/B-VP/O/jagen " +
         "op/IN/B-PP/B-PNP/op vogels/NNS/B-NP/I-PNP/vogel ././O/O/.")
     # Assert the accuracy of the Dutch tagger.
     i, n = 0, 0
     for sentence in open(
             os.path.join(PATH, "corpora",
                          "tagged-nl-twnc.txt")).readlines():
         sentence = sentence.strip()
         s1 = [w.split("/") for w in sentence.split(" ")]
         s1 = [nl.wotan2penntreebank(w, tag) for w, tag in s1]
         s2 = [[w for w, pos in s1]]
         s2 = nl.parse(s2, tokenize=False)
         s2 = [w.split("/") for w in s2.split(" ")]
         for j in range(len(s1)):
             if s1[j][1] == s2[j][1]:
                 i += 1
             n += 1
     self.assertTrue(float(i) / n > 0.90)
     print("pattern.nl.parser.parse()")

Beispiel #2

0

Datei anzeigen

Datei: return_dim_sent.py Projekt: Aagje-codes/Dutch-Diminutives

def return_dim_sent(sent):
    """ Takes in a string sentence and checks if there are nouns in that sentence. If there are, it 
	returns the sentence with the noun in their diminutive form.

	:param sent: a string containing a sentence. 
	:return: a string containging the sentence with the nouns turned to diminutives.
	:rtype: str
	"""

    parsed = parse(sent, tokenize=True, tags=True, chunks=False)

    new_sent = []
    for word, pos in parsed.split()[0]:
        if pos == 'NN' and not word.endswith('je'):  # If the word is a noun...
            dim = dg.generate_diminutive(word)

            if new_sent[-1] == 'de':
                new_sent[
                    -1] = 'het'  # correcting for article. Not perfect though.

            new_sent.append(dim)
        elif pos == 'NNS' and not word.endswith(
                'jes'):  # If the word is a noun in plural...
            root = singularize(word)
            dim = dg.generate_diminutive(root)
            new_sent.append(dim + "s")
        else:
            new_sent.append(word)

    return " ".join(new_sent)

Beispiel #3

0

Datei anzeigen

Datei: test_nl.py Projekt: DataBranner/pattern

 def test_parse(self):
     # Assert parsed output with Penn Treebank II tags (slash-formatted).
     # 1) "de zwarte kat" is a noun phrase, "op de mat" is a prepositional
     # noun phrase.
     v = nl.parser.parse("De zwarte kat zat op de mat.")
     self.assertEqual(v,
                      "De/DT/B-NP/O zwarte/JJ/I-NP/O kat/NN/I-NP/O " +
                      "zat/VBD/B-VP/O " +
                      "op/IN/B-PP/B-PNP de/DT/B-NP/I-PNP mat/NN/I-NP/I-PNP ././O/O"
                      )
     # 2) "jaagt" and "vogels" lemmata are "jagen" and "vogel".
     v = nl.parser.parse("De zwarte kat jaagt op vogels.", lemmata=True)
     self.assertEqual(v,
                      "De/DT/B-NP/O/de zwarte/JJ/I-NP/O/zwart kat/NN/I-NP/O/kat " +
                      "jaagt/VBZ/B-VP/O/jagen " +
                      "op/IN/B-PP/B-PNP/op vogels/NNS/B-NP/I-PNP/vogel ././O/O/."
                      )
     # Assert the accuracy of the Dutch tagger.
     i, n = 0, 0
     for sentence in open(os.path.join(PATH, "corpora", "tagged-nl-twnc.txt")).readlines():
         sentence = sentence.decode("utf-8").strip()
         s1 = [w.split("/") for w in sentence.split(" ")]
         s1 = [nl.wotan2penntreebank(w, tag) for w, tag in s1]
         s2 = [[w for w, pos in s1]]
         s2 = nl.parse(s2, tokenize=False)
         s2 = [w.split("/") for w in s2.split(" ")]
         for j in range(len(s1)):
             if s1[j][1] == s2[j][1]:
                 i += 1
             n += 1
     self.assertTrue(float(i) / n > 0.90)
     print("pattern.nl.parser.parse()")

Beispiel #4

0

Datei anzeigen

Datei: return_dim_sent.py Projekt: Aagje-codes/Dutch-Diminutives

def return_dim_sent(sent):
	""" Takes in a string sentence and checks if there are nouns in that sentence. If there are, it 
	returns the sentence with the noun in their diminutive form.

	:param sent: a string containing a sentence. 
	:return: a string containging the sentence with the nouns turned to diminutives.
	:rtype: str
	"""

	parsed = parse(sent, tokenize = True, tags = True, chunks = False)

	new_sent = []
	for word, pos in parsed.split()[0]:
	    if pos == 'NN' and not word.endswith('je'):    # If the word is a noun...
	        dim = dg.generate_diminutive(word)
	        
	        if new_sent[-1] == 'de': new_sent[-1] = 'het'	# correcting for article. Not perfect though.	
	        
	        new_sent.append(dim)
	    elif pos == 'NNS' and not word.endswith('jes'): # If the word is a noun in plural...
	        root = singularize(word)
	        dim = dg.generate_diminutive(root)
	        new_sent.append(dim + "s")
	    else:
	        new_sent.append(word)
        

	return " ".join(new_sent)

Beispiel #5

0

Datei anzeigen

def fakenewsify(title, trends):
    s = parse(title)
    sentences = s.split()
    first_sentence = sentences[0]
    new_sentence_words = []

    tags = [word_data[1] for word_data in first_sentence]
    tag_indices = []
    for i, word_data in enumerate(first_sentence):
        tag = word_data[1]
        if tag == 'NNP':
            tag_indices.append(i)
    if len(tag_indices) == 0:
        return None
    index_to_replace = choice(tag_indices)
    ### random_trend_index = num
    random_trend_index = randint(0, len(trends) - 1)
    words = [word_data[0] for word_data in first_sentence]
    chosen_trend = trends[random_trend_index]
    words[index_to_replace] = chosen_trend
    del trends[random_trend_index]
    for i,word in enumerate(words):
        if (i==index_to_replace):
            new_sentence_words.append({'woord': word,'class': "special"})
        else:
            new_sentence_words.append({'woord': word,'class': "normal"})
      

    new_sentence = ' '.join(words)
    ###return [new_sentence,chosen_trend]
    return new_sentence_words

Beispiel #6

0

Datei anzeigen

Datei: basic_text_processing.py Projekt: uvacw/inca

 def process(self, document_field):
     """everything except nouns, adjectives, and adverbs was removed"""
     doc = ""
     for sentence in parse(document_field).split():
         for token in sentence:
             if (token[1].startswith("N") or token[1].startswith("J")
                     or token[1].startswith("R")):
                 doc += " " + token[0]
     return doc

Beispiel #7

0

Datei anzeigen

Datei: basic_text_processing.py Projekt: noellelebe/inca

 def process(self, document_field):
     '''everything except nouns, adjectives, and adverbs was removed'''
     doc = ""
     for sentence in parse(document_field).split():
         for token in sentence:
             if token[1].startswith('N') or token[1].startswith(
                     'J') or token[1].startswith('R'):
                 doc += (" " + token[0])
     return doc

Beispiel #8

0

Datei anzeigen

def parseResponse():
    if request.args.get('text') is None:
        return ""

    ret = []
    parsed = parse(request.args.get('text'), lemmata=True)
    for sen in split(parsed):
        for word in sen.words:
            ret.append(u'\t'.join([word.string, word.lemma, word.type]))
    return u'\n'.join(ret)

Beispiel #9

0

Datei anzeigen

Datei: 01-preprocessing.py Projekt: SPraet/issue_communication

def clean(doc):
    https_free = " ".join(
        [i if not i.startswith('http') else 'zzzurl' for i in doc.split()])
    mention_free = " ".join([
        j if not j.startswith('@') else 'zzzmention'
        for j in https_free.split()
    ])
    survey_doc = nlp(mention_free)
    doc_ne = redact_NE(survey_doc)
    punc_free = ''.join(ch for ch in doc_ne.lower() if ch not in exclude)
    stop_free = " ".join([x for x in punc_free.split() if x not in stop])
    number_free = ''.join(
        [k if not k.isdigit() else 'zzznumber' for k in stop_free])
    normalized = " ".join([(parse(word, lemmata=True).split())[0][0][4]
                           for word in number_free.split()])
    return normalized

Beispiel #10

0

Datei anzeigen

Datei: classes.py Projekt: openstate/program-tracker

    def split(self, paragraph):
        '''
        We use pattern.nl to tokenize and stem all the words
        '''
        parsed_words = parse(lower(paragraph.text.strip()), lemmata=True).split(' ')
        if parsed_words == ['']:
            return []

        try:
            stemmed_words = map(lambda x: x.split('/')[4] , parsed_words)
        except Exception:
            pass
        stemmed_paragraph = ' '.join(stemmed_words)
        onegrams = ngrams(stemmed_paragraph, n=1)

        return [ word[0] for word in onegrams if word[0] not in self.common_words ]

Beispiel #11

0

Datei anzeigen

Datei: story.py Projekt: fbkarsdorp/storypy

 def add_sentiments(self, smoothed=False, window_size=12, binary=False, window_type='flat'):
     """Add a sentiment score to each scene in this story."""
     with codecs.open(self.filepath + '.txt', encoding='utf-8') as infile:
         text = infile.read()
         sentiments = []
         for scene in self:
             sentiments.append(extract_sentiment(
                 Sentence(parse(text[scene.start: scene.end], lemmata=True)))[0])
         if smoothed:
             while window_size > len(self):
                 window_size -= 2
             sentiments = smooth(
                 np.array(sentiments), window_len=window_size, window=window_type)
         for i, sentiment in enumerate(sentiments):
             self[i].sentiment = sentiment if not binary else - \
                 1 if sentiment < 0 else 1

Beispiel #12

0

Datei anzeigen

Datei: tokenizer.py Projekt: NLeSC/AVResearcherXL

def tokenize(text, min_lemma=3, max_lemma=30, allowed_pos_tags=POS_TAGS,
             allowed_lemma_chars=LEMMA_CHARS):
    """
    Tokenize and lemmatize the input text and return a generator that
    yields lemmas.

    :param text: the text to process.
    :param min_lemma: the minimal number of characters a lemma should contain.
    :param max_lemma: the maximal number of characters a lemma should contain.
    :param allowed_pos_tags: a compiled regex containing the POS tags that
                             we are interested in.
    :param allowed_lemma_chars: a compiled regex that matches lemma's we are
                             interested in.
    """
    parsed = parse(text, lemmata=True, collapse=False)
    for sentence in parsed:
        for token, tag, _, _, lemma in sentence:
            if min_lemma <= len(lemma) <= max_lemma \
                    and allowed_pos_tags.match(tag)\
                    and allowed_lemma_chars.match(lemma):
                yield lemma

Beispiel #13

0

Datei anzeigen

def tokenize(text,
             min_lemma=3,
             max_lemma=30,
             allowed_pos_tags=POS_TAGS,
             allowed_lemma_chars=LEMMA_CHARS):
    """
    Tokenize and lemmatize the input text and return a generator that
    yields lemmas.

    :param text: the text to process.
    :param min_lemma: the minimal number of characters a lemma should contain.
    :param max_lemma: the maximal number of characters a lemma should contain.
    :param allowed_pos_tags: a compiled regex containing the POS tags that
                             we are interested in.
    :param allowed_lemma_chars: a compiled regex that matches lemma's we are
                             interested in.
    """
    parsed = parse(text, lemmata=True, collapse=False)
    for sentence in parsed:
        for token, tag, _, _, lemma in sentence:
            if min_lemma <= len(lemma) <= max_lemma \
                    and allowed_pos_tags.match(tag)\
                    and allowed_lemma_chars.match(lemma):
                yield lemma

Beispiel #14

0

Datei anzeigen

def prepare_text_nl(row):
    """ Prepares dutch text by doing the following:
    * Lemmatize a word
    * Singularize a word
    * Predicative a word
    
    Parameters:
    -----------
    row : pandas dataframe
        A row of a pandas dataframe
        
    Returns:
    --------
    new_message : pandas dataframe
        A row of a pandas dataframe 
    
    """
    try:
        message = split(parse(row.Message_Only_Text))
    except:
        print(row.Message_Only_Text)

    new_message = ''

    for sentence in message:
        for word, tag in sentence.tagged:
            if (tag == 'MD') | ('VB' in tag):
                new_message += lemma(word) + ' '
            elif tag == 'NNS':
                new_message += singularize(word) + ' '
            elif 'JJ' in tag:
                new_message += predicative(word) + ' '
            else:
                new_message += word + ' '

    return new_message

Beispiel #15

0

Datei anzeigen

Datei: datamanager.py Projekt: bobvdvelde/inca

def clean_database_njr():
    # TODO: is nu copy-paste an gewone functie, beter doen. misschien integreren?

    # initialize new database for cleaned collection
    c = Connection()
    c[databasename].drop_collection(collectionnamecleanedNJR)

    # load replacement lists
    # replacement list 1: always replace
    with open(replacementlistfile, mode="r", encoding="utf-8") as fi:
        repldict = json.load(fi, object_pairs_hook=OrderedDict)
    replpatterns = set(re.compile("\\b" + k + "\\b") for k in repldict)
    # replacement list 2: replace if already replaced according to list 1
    with open(replacementlistlastnamesfile, mode="r", encoding="utf-8") as fi:
        repldictpersons = json.load(fi, object_pairs_hook=OrderedDict)
    # replacement list 3: replace only if another expression is mentioned
    with open(replacementlistsindienfile, mode="r", encoding="utf-8") as fi:
        repldictindien = json.load(fi, object_pairs_hook=OrderedDict)


    allarticles = collection.find()
    aantal = collection.count()
    i = 0
    for art in allarticles:
        i += 1
        print("\r", i, "/", aantal, " or ", int(i / aantal * 100), "%", end=' ')
        sys.stdout.flush()
        thisartorig = art["text"].replace("\n", " ")

        # hier is t dus interessant

        from pattern.nl import parse
        thisart=""
        for zin in parse(thisartorig).split():
                for token in zin:
                    if token[1].startswith('N') or token[1].startswith('J') or token[1].startswith('R'):
                        #print token[0],token[1]
                        thisart+=(" "+token[0])
        #print thisart


        numbsub = 0
        for pat in replpatterns:
            subst = pat.subn(repldict[pat.pattern[2:-2]], thisart)  #[2:-2] to strip the \b
            thisart = subst[0]
            numbsub += subst[1]
        # only if sth has been substituted at all, check if it's a last name that has to be substituted as well
        # functie 1b: als iemand een keer met z'n volledige naam genoemd wordt, ook de volgende keren dat alleen z'n achternaam wordt genoemd deze vervangen
        if numbsub > 0:
            for k, v in repldictpersons.items():
                #print "For",k,", there are",len(v),"rules."
                for vv in v:
                    if vv in thisart:
                        thisart = re.sub("\\b" + k + "\\b", vv, thisart)
                        #print "Replaced",k,"by",vv
        for k, v in repldictindien.items():
            if re.findall("\\b" + k + "\\b",thisart):
                for vv in v:
                    #print "checking vv",vv,"and k",k
                    thisart = re.sub("\\b" + vv[0] + "\\b", vv[1], thisart)
                #print "Replaced", vv[0], "by", vv[1], "because", k, "was mentioned"


        thisart = remove_punctuation(thisart.lower())
        stops = [line.strip().lower() for line in open(stopwordsfile, mode="r", encoding="utf-8")]
        tas = thisart.split()
        thisart = ""
        for woord in tas:
            if (woord not in stops) and (not woord.isdigit()):
                thisart = " ".join([thisart, woord])
        # replace original text with  modified text and put the whole item in the cleaned collection
        art["text"] = thisart
        article_id = collectioncleanedNJR.insert(art)

Beispiel #16

0

Datei anzeigen

def clean_database_njr():

    # load replacement lists
    # replacement list 1: always replace
    with open(replacementlistfile, mode="r", encoding="utf-8") as fi:
        repldict = json.load(fi, object_pairs_hook=OrderedDict)
    replpatterns = set(re.compile("\\b" + k + "\\b") for k in repldict)
    # replacement list 2: replace if already replaced according to list 1
    with open(replacementlistlastnamesfile, mode="r", encoding="utf-8") as fi:
        repldictpersons = json.load(fi, object_pairs_hook=OrderedDict)
    # replacement list 3: replace only if another expression is mentioned
    with open(replacementlistsindienfile, mode="r", encoding="utf-8") as fi:
        repldictindien = json.load(fi, object_pairs_hook=OrderedDict)

    allarticles = collection.find(subset).batch_size(
        30
    )  # batch size = no of articles that we are sure can be processed in 10 minutes. defaults to 100, that can be problematic
    aantal = collection.find(subset).count()
    i = 0
    for art in allarticles:
        i += 1
        print "\r", i, "/", aantal, " or ", int(i / aantal * 100), "%",
        sys.stdout.flush()
        try:
            thisartorig = art["text"].replace("\n", " ")
        except:
            # do nothing with this article if it does not have any text
            print
            print 'Weird, this article did not have any text body, skipping it'
            print
            continue

        numbsub = 0
        for pat in replpatterns:
            subst = pat.subn(repldict[pat.pattern[2:-2]],
                             thisartorig)  #[2:-2] to strip the \b
            thisartorig = subst[0]
            numbsub += subst[1]
        # only if sth has been substituted at all, check if it's a last name that has to be substituted as well
        # functie 1b: als iemand een keer met z'n volledige naam genoemd wordt, ook de volgende keren dat alleen z'n achternaam wordt genoemd deze vervangen
        if numbsub > 0:
            for k, v in repldictpersons.items():
                #print "For",k,", there are",len(v),"rules."
                for vv in v:
                    if vv in thisartorig:
                        thisartorig = re.sub("\\b" + k + "\\b", vv,
                                             thisartorig)
                        #print "Replaced",k,"by",vv
        for k, v in repldictindien.items():
            if re.findall("\\b" + k + "\\b", thisartorig):
                for vv in v:
                    #print "checking vv",vv,"and k",k
                    thisartorig = re.sub("\\b" + vv[0] + "\\b", vv[1],
                                         thisartorig)
                #print "Replaced", vv[0], "by", vv[1], "because", k, "was mentioned"

        from pattern.nl import parse
        thisart = ""
        for zin in parse(thisartorig).split():
            for token in zin:
                if token[1].startswith('N') or token[1].startswith(
                        'J') or token[1].startswith('R'):
                    #print token[0],token[1]
                    thisart += (" " + token[0])

        thisart = remove_punctuation(thisart.lower())
        stops = [
            line.strip().lower()
            for line in open(stopwordsfile, mode="r", encoding="utf-8")
        ]
        tas = thisart.split()
        thisart = ""
        for woord in tas:
            if (woord not in stops) and (not woord.isdigit()):
                thisart = " ".join([thisart, woord])

        r = collection.update({'_id': art['_id']},
                              {"$set": {
                                  'textclean_njr': thisart
                              }})
        print r

Beispiel #17

0

Datei anzeigen

def ocr_correct(sentences=False, lang="nl"):
"""

"""
dictionary = enchant.Dict(lang)
ngram_model = load_ngram(lang)

if not sentences:
sentences = unescape(u"""
In verband met den NIEUWJAARSDAG zal dit blad a.s. Vrijdag 1 Januari
niet verschijnen. Het eerstvolgend No. van „De Graafschap-Bode"
verschijnt derhalve A.S. MAANDAG 4 JANUARI 1932. H.H. Adverteerders en
Correspondenten verdoeken wij beleefd, daarmede rekening te willen
houden. DE UITGEEFSTER. [ Lijders aan Maagpijn! I | Maagkramp, Zuur,
Hartwater en siechte spijs- ; * vertering zullen baat vinden bij het
gebruik • ■ van Maagpoeder van Apotheker BOOM. ; ■ Verkrijgbaar In de
meeste Apotheken en • « Drogistwinkel i ƒ 1.25 per verzegelde doos. •
|V■ ' • mmmwm as Rente over het jaar 1931 kan vanaf jjj p 2 Januari
1932 op de Spaarbank- $ boekjes, worden bijgeschreven. %
GEMEENTE-SPAARBANK I fS . DOETINCHEM. $ m 'w. PORTRET-ATELIERS Fa.
BRINCKER-v. GALEN, is het adres voor goede en mooie PORTRETTEN. 24818 •
Hamburgcrstr. 19A, Tel. 164 DOETINCHEM. : ■ - j" viiSTO^G"'"'"] * De
Laxeerpillen van Apotheker BOOM ver- J * drijven overtollige gal en
slijm, zuiveren het ; ; bloed en bevorderen een goeden stoelgang. ! !
Verkrijgbaar in de meeste apotheken en dro- ; ; gistwinkels è 30 en 55
ct. per verzegelde doos. ! i Gratis en franco wordt op aanvraag een ! S
proefdoosje toegezonden door firma A. M. S _ A ra tiem. ï

Berichten Lijmers. BERGH.— Vergadering Raad gemeente Woensdag
December, voormiddags uur. Afwezig heer Spekking. Punt Ingekomen
stukken. Secretaris leest diverse ingekomen stukken voor, waarvan
enkele voor kennisgeving worden aangenomen andere terzijde gelegd voor
latere behandeling. Punt Benoeming Alg. Burgerlijk Armbestuur.
Periodiek aftredend heer Winters. Voorgedragen worden benoeming:
WIIUCJs; LSUCIIUWUVUI. Benoemd wordt heer Winters. Punt Eenige
voorgestelde wijzigingen begrooting gemeente over 1931 worden
goedgekeurd. Punt Rekening 1930 Gasthuis 's-Heerenberg. rekening
Gasthuis 's-Heerenberg over 1930 wordt goedkeuring aangeboden. heer
gedacht, Gasthuis wijziging pachten gebracht hebben. sprekers spijt
heeft daar niets vernomen. Spr. bepleit noodzakelijkheid
pachtverlaging. Spr. heeft gehoord, gratis kunstmest gegeven doch acht
spr. niet zoo'n belang; grond jaartje buiten desnoods. heer Berntsen
acht opmerkingen heer Wilmes zeer terecht. Spr. gelooft ook, menschen
meer geholpen worden pachtverlaging gratis kunstmest. heer voelt voor
pachtverlaging. Besloten wordt bovenstaande opmerkingen kennis
Gasthuisbestuur brengen. Besloten wordt rekening 1930 Gasthuis goed
keuren. Eindcijfers Algemeene rekening ontvangst 74939.14, uitgaaf
63988.1 S'/J. Voordeelig slot 10.950.98>/2- Eindcijfers bijzondere
rekening ontvangst 4274.5814, uitgaaf 10760.66. Nadeelig slot 6486.07
Punt Begrooting 1932 Gasthuis. Besloten wordt ingediende begrooting
voor 1932 Gasthuis 's-Heerenberg goed keuren. eindcijfers algemeene
begrooting luiden: ontvangst uitgaaf 21952 bijzondere begrooting
ontvangst 3761.—, uitgaaf 9836.75. Nadeelig slot 6075.75. Punt
Rekening 1930 Instelling voorkoming Armoede. ue?e rekening wordt
aangeboden goedgekeurd. eindcijfers luiden: ontvangst 6469.80, uitgaaf
5522.32. Voordeeiig ;aido 947.68. Punt Begrooting 1932 vorengenoemde
Instelling. Aangeboden goedgekeurd wordt begrooting over 1932.
eindcijfer luidt uitgaaf ontvangst 7017,61. Punt aanvulling
leerlingenlijst onder punt genoemde instelling wordt goedgekeurd. Punt
Leening 30.000.—. Voorgesteld wordt 30.000 leenen .egen rente hoogstens
koers Mij.1 voor Jemeente*crediet Amsterdam. leening noodig voor
aanbrengen archief Gemeentehuis, wegenverbetering, aankoop materieel
voor wegenverbetering werkverschaffing. licht toe, mogelijk Januari
a.s. Rijkssubsidie voor werkloOzepzorg anders minder wordt. heer stelt
voor werkverschaffing Januari a.s. geval voort zetten risico verminderd
subsidie voor rekening gemeente nemen. zegt, bedoeling heer Brar.ts:
Wordt wegenwals voorgestelde leening bestreden? Voorz.: Neen. heer
bepleit aanschaffen wals maar stellen. heer Kupors achf
aanschaffing wals zeer belang gemeente; -vals behoeft niet alleen
nieuwe wegen, doch oude wegen gebruikt worden. heer Ditzhuyzen
voelt voor spoedige uitvoering door commissie voor werkverruiming
geadviseerde werken. Besloten wordt voorgestelde geldleening gaan.
Punt Afstand strooken grond voor wegenverbetering. Voorgesteld
wordt rtrooken grond aanvaarden voor verbet:ring wegen
's-Heerenberg, Beek toerbeek. heer Ditzhuyzen zegt, niet gronden
Braamt Kilder voorstel genoemd worden; waar afstand dezer grondvn
over eenige dagen feit zijn, ctelt spr. voor deze strooken grond
thans nemen Raadsbesluit noemen. voorstel wordt aangenomen,
aangevuld voorstel heer Ditzhuyzen. Punt Voorstel I.z. aankoop
grond voor bouw cadaverhuisje woningen voor sociaal achterlijken
verworpen. stellen voor perceel grond koopen Lengel, groot ruim
H.A. Gasthuis 's-Heerenberg, prijs 2100; pacht jaar. bedoeling
dezen grond cadaverhuisje woningen voor sociaal-achterlijken
bouwen. heer Bern aeht verkeerde methode Gasthuis steeds gronden
tracht verkoopen, gelegen buiten 'a-Heerenberg, bedoeling armen
's-Heerenberg kwijt raken.De heer begrijpt :ilet, meente niet
publiceert courant, grond nnnkoopen. zijn particulieren
instellingen genoeg, grond willen verkoopen. lijkt net, alsof
alleen Gasthuis! grond verkoopen. Spr. tegen voorstel, omdat geen
publiciteit gegeven voorgenomen grondaankoop omdat prijs hoog heer
Ditzhuyzen vorige sprekers eens. Bovendien lijkt spr, niet
gewenscht noodslachtplaats perceel grond zetten, waar woningen voor
sociaal achterlijken komen staap. Weth. Thuis zegt, misverstand
aanwezig Gasthuis biedt geen grond aan, doch gevraagd bedoelden
grond verkoopen. Gasthuis houdt grond liever zelf. liook vorri Minf
urnn noUlotl Itwt lIVI lllVk UVtttbll) Gasthuis sociaal
achterlijken naar andere parochie tracht schuiven, omdat deze
menschen, wanneer 's-Heerenberg zouden blijven wonen, laste
Gasthuis zouden komen. Voorz. licht toe, grond hard noodig wonen
menschen woningen wijze, welke onmenschelijk noemen Daarvoor
woningbouw hard noodig. Spr. wijst buitengewoon moeilijk grond
koopen; voor woningen voor sociaal-achterlijken heeft niemand grond
koop! Daarom bleef niets anders over beroep doen Gasthuis, gemeente
grond helpen. Spr. wijst grond Zeddam veel duurder was; grond
Lengel, aangewezen door 'icer Roording, klein. Spr. hoopt, Raad
grondaankoop besluiten, daar hard noodig beklagenswaardige menschen
helpen menschwaardiger woningen. heer Berntsen zegt, alleen grond
's-Heerenberg Gasthuis koopen, omdat armen, komen wonen straks
laste vallen Gasthuis. Voorz. zegt, naar zijn overtuiging Gasthuis
's-Heerenberg geen grond heeft liggen tegen denzelfden prijs. heer
Berntsen hiermede niet eens. Voorz. zegt, combinatie
cadaverhuisje-noodslachtplaats woningen voor sociaalachterlijken
geen bezwaar oplevert, daar terrein groot genoeg heer Roording
wijst stukken grond Klaassen, welke liggen. Gasthuis heeft veel
meer grond. Spr. tegen voorgestelden grondaankoop. heer Berntsen
vreest, geheel Lengel dupe wordt voorstel; menschen bouwen woningen
vernielen maar vruchten veld. heer Elshof raadt voorstel houden
volgende vergadering alsnog oproep voor rondaankoop doen. Voorz.
betoogt, grondaankoop niet langer uitgesteld worden. voorstel
grondaankoop wordt hierna verworpen 10—3 stemmen. Vóór stemmen
beide wethouders heer Kupers; blanco heer Diecker. Punt Vergoeding
leermiddelen geweigerd. Schoolbestuur Kilder verzoekt vergoeding
voor aanschaffing leermiddelen, bedrag circa 241.—. stellen voor
aanvrage niet willigen. heer Elshof bepleit inwilliging verzoek;
heer Gerretsen steunt het. heer betoogt, schoolbesturen zuinig
:mogelijk moeten zijn aanvragen. De-aangevraagde, leermiddelen pcht
spr. luxe. heer Elshof zegt, schoolbestuur toch weigerende
beslissing beroep gaan. Waarom moet gemeente eerst gedwongen
worden? Spr. acht aangevraagde leer» middeien geen weeide. Weth.
Thuis zegt, schoolbesturen reeds aangeschreven zijn vooral zuinig
zijn aanvragen. gaat voorts onderhavige aanvrage niet niet luxe
aanvrage valt echter r.iet onder „eerste volgens 0.-wet, doch moet
bestreden worden gewone jaarlijkscke vergoeding voor instandhouding
school, welke ƒ„B.— leerling jaar bedraagt. heer Berntsen vreest
leelijke jezichten belastingbetalers, Raad nakkeKjk aanvragen
schoolbesturen inwilligt. $pr. onderwijs gaarne terwille, doch moet
belastingdruk denken. heer Elshof zegt, Inspecteur mondeling
Pastoor verklaard heeft, bedoelde leermiddelen noodig waren. heer
Ditzhuyzen heeft eens deskundige gehoord, schoolbesturen vergoeding
leerling ruimschoots kunnen; fpr. daarom tegen aangevraagde
subsidie. Voorz. brengt hierna voorstel stemming aanvrage niet
willigen. Hiertoe wordt besloten stemmen. Tegen heeren Elshof,
Wilmes Gerretsen. Punt Vastgesteld wordt verordening, regelende
beroep geweigerde bouwvergunningen, bedoeld art. Woningwet. Punt
Reglement Spaar- Voorschotbank. Voorgesteld wordt vast stellen Raad
overgelegd reglement voor Spaar- Voorschotbank. heer Berntsen stelt
voor commissie benoemen voor onderzoek voorgestelde reglement. heer
Ditzhuyzen acht beter reglement nemen, omdat spoed kunnen afschrilt
krijgen noodig volgende zitting eens wijzigingen voorstellen.
Hiertoe wordt besloten. Punt Onbewoonbaarverklaring. Besloten wordt
onbewoonbaarverklaring woning 's-Heerenberg 428, toebehoorend N.V.
Huis Bergh. Punt Voorgesteld wordt WijnantU, bewoner vorenbedoelds
onbewoonbaarverklaards woning, bijdrage hoogste fjoo verstrekken
voor bouw 'van nieuwe woning. Dhr. Ditzhuyjen acht slaapkamer
nieuwe wwjing kjein; voorts vraagt spr. voorwaarde «tellen,
materialen gemeente gekocht moeten worden. Conform voorstel
aangevuld hetgeen dhr. JPitjhuyzen opmerkt, woedt besloten. Punt
Winkel»Tuitln?. wnkïls mogen Zondags 8-*9'/j. 11—.12 14—18 geopend
ïljn. Voorgesteld wordt verordening, welke volgende regelen stelt.
Kermis geld: tlui'.'n'T winkels niet. Voor wtnkels grens wordt
bepaald, deze Zondags li—! 14—18. open mogen zijn.Dhr. stelt voor
winket» open laten zijn B—9l/1 11—13 Zondagmorgen, toog
kerkganger», inkoopen willen doen. Besloten wordt bepalen, winkels
heele gemeente e-~9l/s> 14—i& Zondag open mogen zijn. Bergplaats
voor vuilnis Zeddam. Gi'.debcstuur Zeddam vraagt voor terrein,
waarop liet vui'lms gestort worden. achten dezen prijs hoog. Dhr.
Berntsen hiermede eens; Gildebestuur heeft zelf veei belang
voorgestelde regeling. vuil wordt overal Gildegrond geworpen. Spr.
stelt voor voorloopig vergoeding goed keuren; spr. trachten
Giloebestuur bewegen billijker houding. voorstel-B erntsen woidt
aangenomen. Telefooninstaila'je Gemeentehui», etc. Volgens
telefoondienst huistelefoon* installatie voor gemeente, aangesloten
stadsnet, jaar 355.— kosten, ongeveer meer jaar thans telefoon
kost. Deze installatie wordt zeer noodig geacht; wordt besloten.
Subsidie verleend Streekcommissle bsvordering Vreemdelingenverkeer.
Ingekomen adres „Streekcommissie bevordering Vreemdelingen* verkeer
gemeentel» Eergh, Doetinchem, Gendringen, Hummelo Keppel Wisch
~'s-Heerenberg", adressanten verzoe j.cent inwoner jaar subsidie,
waarvoor goede streekpropaganda zullen opzetten, zulks
vermeerdering inkom* sten middenstand. Voorz. stelt voor adres
financieel moeilijke tijden voorloopig terzijde leggen. Dhr.
Berntsen hiermede eens. Spr. betoogt, b.v. ambtenaren, wier inkomen
vrijwel niet gedaald aanmerking genomen mindere kosten
levensonderhoud;, vacantie zullen gaan. gaat deze menschen naar
deze streek trekken. Juist dezen slechten tijd vindt spr. alles
voor vreemdelingenverkeer bevorderen. Bergh heeft prachtig
natuurschoon. Dhr. Ditzhuyzen vorige-n spreker eens. Weth. Thuis
wijst vreemdelingenverkeer middenstandsbelang Raaa gelegenheid
winkeliers, hotelhou* ders eens helpen. Dhr. Berntsen merkt
Gendrmgen'een voorbeeld zijn; ofschoon daar heelemaal geen
vreemdelingenverkeer besloot gemeente toch subsidie voor
vreemdelingen* verkeerpropaganda verkenen. Dhr. Brants staat niet
enthousiast tegenover, maar niet tegenhouden- Weth. Reyers zegt,
heele gemeente voeren propaganda profiteeren ;al. Voorz. legt niet
voor afstel, doch slechts voor uitstel voelen. Zonder hoofdelijke
stemming wordt hierna gevraagde subsidie besloten. Adres-Wehl i.ï.
stichting Landbouwmlnlsterle. gemeentebestuur,"van Wehl verzoekt,
act-, haesie adres regeenng, waarin instelling af?onderiijk
ministerie landbouw verzocht wordt Dhr. Berntsen tegen. Spr./ ziet
a<cft' adres groep ontevredenen, buiten boerenorganisafies actie
voeren.. Deze Prganisaties hebben altijd'bun phcht gedaan ,bij
regeering gedaan voor landbouwers, maar konden. Spr. acht
organisatorisch, adres Wefcl steunen. dhr- Brants verklaart zich
tegen. Dhr. Ditahuyten meent, toch niet verkeerd naast bestaande
organisaties anderen voor eenzelfde doel ijveren; spr. ziet zoo,
actie niet tegenover, doch naast organisaties gevoerd wordt. Dhr.
Berntsen nogmaals uiteen, waar* onjuist acht adres steunen; dhr.
Ditzhuyzen vindt hierin aanleiding aanhouding beslissing verzoeken
voor nader onderzoek. Hiertoe wordt besloten.•" Rondvraag. I>hr.
Elsliof wijst aanwexigheid van1 grint naar Siiulerdijk, tlat
nutteloos hoopen ligt. Voort, ander, door heer Elshof berd;
gebracht, aandacht schenken. Dhr. Gerretsen vraagt geen vergoeding
uitgekeerd' wordt personen, Kilder xand gebracht hebben. heer geeft
eenige inlichtingen omtrent deze kwestie. Voorz. »egt, rekeningen
aannemers niet zijn ingekomen. beer Berntson vraagt, staat
aanbrengen electrische lamp Gasthui» Zed&am. Voorz. zegt,
vooroerciding tUCCI tIIU **•**** langs Probat Heerenberg. Voorz.
aegt onderzoek toe. heer Ditzhuyzen vraagt redmi. ging goten
bovendorp Zeddam. Voorz. zegt toe. beer Dieeker vraagt verbetering
Wijnbergscbe naar Sinderdijk. Voorz. belooft zullen nagaan.
Besloten wordt Abbenhuiis Braamt bomvpremie kennen. Hierna
sluiting.
ïj a.s. Feestdagen offreeren wij: alle soorten binnen- buitenlandsch
GEDISTILLEERD. LIKEUREN. WIJNEN, BOERENJONGENS, ADVOCAAT, CHAMPAGNE, COGNAC,
PUNCH. Perlstein DOETINCHBN. Boterolie Slaolie tegen verlaagde prijzen. PAUL
WESTHOFF HUMMELO. twentsche BANK N.V.I gevestigd Amsterdam. Kapitaal Reserve
5t.500.000.— Ikantoor winterswijk.! Belast ziet) uitvoering ALLE Bankierszaken.
GRATIS apparaat indien proef wilt nemen onze „SIGI" mesjetT „siGi"Man itelt
introductie deze prima mesjes proefpakket mesjes beschikbaar tegen kleine
vergoeding slechts 0.90 PARKET (werkelijke winkelwaarde 12.—), terwijl daarbij
GRATIS (raai ver* tilverd, roestvrij scheerapparaat ontvangt
beschikbaarstelling deze voorwaarden geschiedt slechts tjjdelyk alleen
ontvangst onderstaande coupon, binnen dagen bezit moet zijn. toezending
onder rembours 0.90 plus rembourskosten geschiedt naar volgorde
ingekomen coupons. voorkoming abuizen verzoeken geen geld acnden.
COUPON Verkoopkantoor „SiGi"scheerapparaten ————— Firma MEYER RIJSWIJK
Z..H. Zend geld Lindelaan Postbus oom* prMlpikkM (101 maajae ratia
varzltvard apparaat »««•» snor ramboura »0.00 plua rembourakoitan
Naam: stfHt r.w-r.fT.< <S.v.p. duidelijk schrijven; diukwerk
verzenden: cent frankeeren)
——BB—— FABER Hamburgerstraat Do^ichem.
""".encode("utf-8"))

misspelled_words = set()
sentences = parse(sentences)

bad_sentences_count = 0
good_sentence_count = 0
correctable_sentence_count = 0

correctspelled_word = 0
misspelled_word_count = 0
total_sentences_count = 0
total_word_count = 0

for sentence in split(sentences):
total_sentences_count += 1
bad_sentence = False
correctable_sentence = False
new_sentence = u""

for chunk in sentence.chunks:
for word in chunk.words:
bad_sentence, \
correctable_sentence, \
new_sentence = \
analyze_word(word,
bad_sentence,
correctable_sentence,
new_sentence,
dictionary,
ngram_model)

if bad_sentence:
print u"Bad: " + u" ".join([w.string for w in sentence.words])
bad_sentences_count += 1
elif correctable_sentence:
print u"Correctable (old): " + \
u" ".join([w.string for w in sentence.words])
print u"Correctable (new): " + new_sentence
correctable_sentence_count += 1
else:
print u"Good: " + \
u" ".join([w.string for w in sentence.words])
good_sentence_count += 1

print("Good_sentence_count: %i" % good_sentence_count)
print("Bad_sentences_count: %i" % bad_sentences_count)

Beispiel #18

0

Datei anzeigen

Datei: postag.py Projekt: NetwerkOorlogsBronnen/pilot-geocoderen

import sys
import json

#print sys.argv[1]
import HTMLParser
pars = HTMLParser.HTMLParser()

sentences = []

from pattern.nl import parse, split
sentence = pars.unescape(sys.argv[1])

s = parse(sentence)
for sentence in split(s):
	#print type(sentence)
	sentences.append(str(sentence))

print json.dumps(sentences)

Beispiel #19

0

Datei anzeigen

import sys
import json

#print sys.argv[1]
import HTMLParser
pars = HTMLParser.HTMLParser()

sentences = []

from pattern.nl import parse, split
sentence = pars.unescape(sys.argv[1])

s = parse(sentence)
for sentence in split(s):
    #print type(sentence)
    sentences.append(str(sentence))

print json.dumps(sentences)

Beispiel #20

0

Datei anzeigen

Datei: datamanager.py Projekt: uvacw/inca

def clean_database_njr():
    # TODO: is nu copy-paste an gewone functie, beter doen. misschien integreren?

    # initialize new database for cleaned collection
    c = Connection()
    c[databasename].drop_collection(collectionnamecleanedNJR)

    # load replacement lists
    # replacement list 1: always replace
    with open(replacementlistfile, mode="r", encoding="utf-8") as fi:
        repldict = json.load(fi, object_pairs_hook=OrderedDict)
    replpatterns = set(re.compile("\\b" + k + "\\b") for k in repldict)
    # replacement list 2: replace if already replaced according to list 1
    with open(replacementlistlastnamesfile, mode="r", encoding="utf-8") as fi:
        repldictpersons = json.load(fi, object_pairs_hook=OrderedDict)
    # replacement list 3: replace only if another expression is mentioned
    with open(replacementlistsindienfile, mode="r", encoding="utf-8") as fi:
        repldictindien = json.load(fi, object_pairs_hook=OrderedDict)

    allarticles = collection.find()
    aantal = collection.count()
    i = 0
    for art in allarticles:
        i += 1
        print("\r",
              i,
              "/",
              aantal,
              " or ",
              int(i / aantal * 100),
              "%",
              end=" ")
        sys.stdout.flush()
        thisartorig = art["text"].replace("\n", " ")

        # hier is t dus interessant

        from pattern.nl import parse

        thisart = ""
        for zin in parse(thisartorig).split():
            for token in zin:
                if (token[1].startswith("N") or token[1].startswith("J")
                        or token[1].startswith("R")):
                    # print token[0],token[1]
                    thisart += " " + token[0]
        # print thisart

        numbsub = 0
        for pat in replpatterns:
            subst = pat.subn(repldict[pat.pattern[2:-2]],
                             thisart)  # [2:-2] to strip the \b
            thisart = subst[0]
            numbsub += subst[1]
        # only if sth has been substituted at all, check if it's a last name that has to be substituted as well
        # functie 1b: als iemand een keer met z'n volledige naam genoemd wordt, ook de volgende keren dat alleen z'n achternaam wordt genoemd deze vervangen
        if numbsub > 0:
            for k, v in repldictpersons.items():
                # print "For",k,", there are",len(v),"rules."
                for vv in v:
                    if vv in thisart:
                        thisart = re.sub("\\b" + k + "\\b", vv, thisart)
                        # print "Replaced",k,"by",vv
        for k, v in repldictindien.items():
            if re.findall("\\b" + k + "\\b", thisart):
                for vv in v:
                    # print "checking vv",vv,"and k",k
                    thisart = re.sub("\\b" + vv[0] + "\\b", vv[1], thisart)
                # print "Replaced", vv[0], "by", vv[1], "because", k, "was mentioned"

        thisart = remove_punctuation(thisart.lower())
        stops = [
            line.strip().lower()
            for line in open(stopwordsfile, mode="r", encoding="utf-8")
        ]
        tas = thisart.split()
        thisart = ""
        for woord in tas:
            if (woord not in stops) and (not woord.isdigit()):
                thisart = " ".join([thisart, woord])
        # replace original text with  modified text and put the whole item in the cleaned collection
        art["text"] = thisart
        article_id = collectioncleanedNJR.insert(art)

Beispiel #21

0

Datei anzeigen

Datei: datamanager_addcleantext.py Projekt: uvacw/inca

def clean_database_njr():

    # load replacement lists
    # replacement list 1: always replace
    with open(replacementlistfile, mode="r", encoding="utf-8") as fi:
        repldict = json.load(fi, object_pairs_hook=OrderedDict)
    replpatterns = set(re.compile("\\b" + k + "\\b") for k in repldict)
    # replacement list 2: replace if already replaced according to list 1
    with open(replacementlistlastnamesfile, mode="r", encoding="utf-8") as fi:
        repldictpersons = json.load(fi, object_pairs_hook=OrderedDict)
    # replacement list 3: replace only if another expression is mentioned
    with open(replacementlistsindienfile, mode="r", encoding="utf-8") as fi:
        repldictindien = json.load(fi, object_pairs_hook=OrderedDict)

    allarticles = collection.find(subset).batch_size(
        30
    )  # batch size = no of articles that we are sure can be processed in 10 minutes. defaults to 100, that can be problematic
    aantal = collection.find(subset).count()
    i = 0
    for art in allarticles:
        i += 1
        print "\r", i, "/", aantal, " or ", int(i / aantal * 100), "%",
        sys.stdout.flush()
        try:
            thisartorig = art["text"].replace("\n", " ")
        except:
            # do nothing with this article if it does not have any text
            print
            print "Weird, this article did not have any text body, skipping it"
            print
            continue

        numbsub = 0
        for pat in replpatterns:
            subst = pat.subn(repldict[pat.pattern[2:-2]], thisartorig)  # [2:-2] to strip the \b
            thisartorig = subst[0]
            numbsub += subst[1]
        # only if sth has been substituted at all, check if it's a last name that has to be substituted as well
        # functie 1b: als iemand een keer met z'n volledige naam genoemd wordt, ook de volgende keren dat alleen z'n achternaam wordt genoemd deze vervangen
        if numbsub > 0:
            for k, v in repldictpersons.items():
                # print "For",k,", there are",len(v),"rules."
                for vv in v:
                    if vv in thisartorig:
                        thisartorig = re.sub("\\b" + k + "\\b", vv, thisartorig)
                        # print "Replaced",k,"by",vv
        for k, v in repldictindien.items():
            if re.findall("\\b" + k + "\\b", thisartorig):
                for vv in v:
                    # print "checking vv",vv,"and k",k
                    thisartorig = re.sub("\\b" + vv[0] + "\\b", vv[1], thisartorig)
                # print "Replaced", vv[0], "by", vv[1], "because", k, "was mentioned"

        from pattern.nl import parse

        thisart = ""
        for zin in parse(thisartorig).split():
            for token in zin:
                if token[1].startswith("N") or token[1].startswith("J") or token[1].startswith("R"):
                    # print token[0],token[1]
                    thisart += " " + token[0]

        thisart = remove_punctuation(thisart.lower())
        stops = [line.strip().lower() for line in open(stopwordsfile, mode="r", encoding="utf-8")]
        tas = thisart.split()
        thisart = ""
        for woord in tas:
            if (woord not in stops) and (not woord.isdigit()):
                thisart = " ".join([thisart, woord])

        r = collection.update({"_id": art["_id"]}, {"$set": {"textclean_njr": thisart}})
        print r