def ngram(wordex, period, ddk=None): if " " in word: bigram = word.split()[:2] res = nb.bigram(first=bigram[0], second=bigram[1], ddk=ddk, period=period) else: res = nb.unigram(word, period=period, ddk=ddk) return res
def sumword(words, period, media='bok'): wordlist = [x.strip() for x in words.split(',')] # check if trailing comma, or comma in succession, if so count comma in if '' in wordlist: wordlist = [','] + [y for y in wordlist if y != ''] ref = pd.concat( [nb.unigram(w, media=media, period=period) for w in wordlist], axis=1).sum(axis=1) ref.columns = ["tot"] return ref
def ngram(word, ddk, subject, period): if " " in word: bigram = word.split()[:2] res = nb.bigram(first=bigram[0], second=bigram[1], ddk=ddk, topic=subject, period=period) else: res = nb.unigram(word, ddk=ddk, topic=subject, period=period) return res
def sumword(words, period, media='bok', lang='nob'): wordlist = [x.strip() for x in words.split(',')] # check if trailing comma, or comma in succession, if so count comma in if '' in wordlist: wordlist = [','] + [y for y in wordlist if y != ''] ref = pd.concat([ nb.unigram(w, media=media, period=period, lang=lang) for w in wordlist ], axis=1).sum(axis=1) ref.index = pd.to_datetime(ref.index, format='%Y') return ref
def ngavis(word, period): try: if " " in word: bigram = word.split()[:2] res = nb.frame( nb.bigram(first=bigram[0], second=bigram[1], period=period, media='avis'), word) else: res = nb.frame(nb.unigram(word, period=period, media='avis'), word) #st.write(res.head()) except: res = pd.DataFrame() return res
def ngbok(word, period, ddk=None, lang='nob'): try: if " " in word: bigram = word.split()[:2] res = nb.frame( nb.bigram(first=bigram[0], second=bigram[1], ddk=ddk, period=period, media='bok', lang=lang), word) else: res = nb.frame( nb.unigram(word, period=period, ddk=ddk, media='bok', lang=lang), word) except: res = pd.DataFrame() return res
def ngavis(x, period): try: r = nb.frame(nb.unigram(x, period, media='avis'), x) except: r = pd.DataFrame() return r
def ngbok(x, period, ddk=None): try: r = nb.frame(nb.unigram(x, period, media='bok', ddk=ddk), x) except: r = pd.DataFrame() return r