def count(self): parser, opc = self.optionParser() # Przekazanie argumentow filel = opc.lines filew = opc.words # Wyjatek od liczby argumentow i przekazanie nazwy pliku if (filew == "non" and opc.lines == "non") or (filew != "non" and opc.lines != "non"): parser.error("niepoprawna liczba argumentow") elif filew != "non" and filel == "non": file = filew print("wybrales liczenie SLOW\n") elif filew == "non" and filel != "non": file = filel print("wybrales liczenie LINII\n") # Nazwa pliku print("plik do odczytu: %s\n" % file) if filel != "non": line_count.line_count(file) if filew != "non": word_count.word_count(file)
def search(): question_id = request.form['question-id'] max_num = int(request.form['max-num']) record_txt_path = '' result_dir_path = './word_count_results/' # ------数据库写在下面,input: 问题ID--------------------------- # -----output: 1.待处理文本的txt文件路径-2.用问题ID和问题内容填充questions变量 questions = '<tr><td>1</td><td>你认为世界上最厉害的人是谁?</td><tr>' if question_id == '1': # 把响应id的问题答案写入下面这个路径的文件即可 record_txt_path = './answer-content.txt' # -------------------------数据库写在上面------------------------------------- word_count.word_count(input_file_path=record_txt_path, output_dir_path=result_dir_path, max_num=max_num) def _txt_to_html(txt_path, title): txt_in = open(txt_path) def _read_file(file_in) -> 'html-str': result = '' for line in file_in: result += '<tr><td>' + line.strip() + '</td></tr>\n' return result html = _read_file(txt_in) html = '<table class="result-table" border="1"><tr><th>' + \ title+'</th></tr>\n'+html+'</table>' txt_in.close() return html adjective_html = _txt_to_html(result_dir_path + 'adjective.txt', '形容词') adverb_html = _txt_to_html(result_dir_path + 'adverb.txt', '副词') noun_html = _txt_to_html(result_dir_path + 'noun.txt', '名词') postposition_html = _txt_to_html(result_dir_path + 'postposition.txt', '介词') verb_html = _txt_to_html(result_dir_path + 'verb.txt', '动词') results_html = ''' <div class="row"> <div class="col">%s</div> <div class="col">%s</div> <div class="col">%s</div> <div class="col">%s</div> <div class="col">%s</div> </div> ''' % (adjective_html, adverb_html, noun_html, postposition_html, verb_html) return render_template('entry.html', title='知乎问题答案分析', results=results_html, questions=questions)
def total_assets(arr): text3=arr total_assets = 0 for i in text3: istring=i.split(' ') cntstr=len(fnmatch.filter(istring,'20??')) if(cntstr>1):break for k in text3: k=k.replace("|","") k=k.replace("_","") k=k.replace("=","") k=k.replace("—-","") k=k.replace("$","") k=k.replace("W","") if(k.split(" ")[0].isalpha()==False): k=k.replace(k.split(" ")[0],"") for l in range(0,len(assets['Assets'])): if(word_count(k.lower())==word_count(assets['Assets'][l].lower())): print("k identifier"+" "+k) print("assets"+" "+assets['Assets'][l]) if(k.lower().find(assets['Assets'][l].lower())!=-1): bal_str=k.lower().replace(assets['Assets'][l].lower(),"") bal_str=bal_str.replace("|","") bal_lst=bal_str.split(" ") print(bal_lst) bal_lst1=[] for j in range(0,len(bal_lst)): if(bal_lst[j] !=''):bal_lst1.append(bal_lst[j]) bal_lst=bal_lst1 if(len(bal_lst)>cntstr): for m in range(0,((len(bal_lst))-cntstr)): bal_lst.pop(0) if(len(bal_lst)!=0): str_ng=bal_lst[0].replace(",","") str_ng=str_ng.replace(".","") if((str_ng.isdigit()==True or str_ng.isdigit()==True) and (assets['Assets'][l].lower()=="total assets") and word_count(k.lower())==word_count("total assets")): total_assets=float(bal_lst[0].replace(",","")) print ("hh %s" %total_assets) break if(str_ng.isdigit()==True or str_ng.isdigit()==True): total_assets=total_assets+ float(bal_lst[0].replace(",","")) print ("gg %s" %total_assets) elif(bal_lst[0]=="-"): total_assets=total_assets+ float(bal_lst[0].replace("-","0")) print ("kk %s" %total_assets) if(k.lower().find("total assets")!=-1 and word_count(k.lower())==word_count("total assets")): break return total_assets
def main(): counter = word_count() filereader = file_reader() db_access = database_access() # load folder linked to read the word documents if (os.environ.get('MONITOREDFOLDER') == None): monitoredfolder = './words/' else: monitoredfolder = os.environ.get('MONITOREDFOLDER') while True: # List all files in the folder onlyfiles = [ f for f in listdir(monitoredfolder) if isfile(join(monitoredfolder, f)) ] # create log folder if not exists(monitoredfolder + 'logs/'): makedirs(monitoredfolder + 'logs/') # configure Log logging.basicConfig(filename=monitoredfolder + 'logs/document_import.log', level=logging.INFO) logging.info(str(len(onlyfiles)) + " New Files") # Process Single new File if there is one if len(onlyfiles) != 0: words = filereader.read(onlyfiles[0]) result = counter.count(words) for key in result: # update count for each word in the dictionary of the currenct file db_access.add_value_to_word_count(key, result[key]) else: time.sleep(60)
def test_tabs(self): self.assertEqual( {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\troma roma ma\tga ga oh la la\t' 'want your bad romance') )
def words_of_diaries(dir='diaries'): for file in os.listdir(os.path.join(os.getcwd(), dir)): counter = word_count(os.path.join(os.getcwd(), dir, file)) results = sorted(counter, key=lambda k: counter[k], reverse=True) maxes = list( takewhile(lambda e: counter[e] == counter[results[0]], results)) print file, ':', maxes
def test_newlines(self): self.assertEqual( {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2, 'want': 1, 'your': 1, 'bad': 1, 'romance': 1}, word_count('rah rah ah ah ah\nroma roma ma\n' 'ga ga oh la la\nwant your bad romance') )
def test_ignores_punctuation(self): assert word_count('car : carpet as java : javascript!!&@$%^&') == { 'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1 }
def test_non_alphanumeric(self): assert word_count('hey,my_spacebar_is_broken.') == { 'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1 }
def test_count_multiple_occurences(self): self.assertEqual({ 'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1 }, word_count('one fish two fish red fish blue fish'))
def test_apostrophes(self): assert word_count("First: don't laugh. Then: don't cry.") == { 'first': 1, "don't": 2, 'laugh': 1, 'then': 1, 'cry': 1 }
def test_count(self): count = word_count("toy.txt") self.assertTrue("asdf" not in count) self.assertEqual(count.get("coffers", 0), 1) self.assertEqual(count.get("And", 0), 5) self.assertEqual(count.get("honourable", 0), 5) self.assertEqual(count.get("Caesar", 0), 4) self.assertEqual(count.get("Caesar,", 0), 2)
def test_non_alphanumeric(self): self.assertEqual(word_count('hey,my_spacebar_is_broken.'), { 'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1 })
def test_count_multiple_occurrences_of_a_word(self): self.assertEqual(word_count('one fish two fish red fish blue fish'), { 'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1 })
def test_apostrophes(self): self.assertEqual(word_count("First: don't laugh. Then: don't cry."), { 'first': 1, "don't": 2, 'laugh': 1, 'then': 1, 'cry': 1 })
def test_quotations(self): assert word_count("Joe can't tell between 'large' and large.") == { 'joe': 1, "can't": 1, 'tell': 1, 'between': 1, 'large': 2, 'and': 1 }
def word_count_article(dir): result={} for article in os.listdir(dir): dic=wc.word_count(dir+article) print(dic) key,count=max(list(dic.items()),key=lambda d:d[1] ) result[str(article)]=key return result
def test_ignores_punctuation(self): self.assertEqual( word_count('car : carpet as java : javascript!!&@$%^&'), { 'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1 })
def test_word_count(self): # x = word_count("") # self.assertTrue(x == {}) # x = word_count("Hello hello") # self.assertTrue(x == {"hello": 2}) x = word_count('Hello, my cat. And my cat doesn\'t say "hello" back.') self.assertTrue(x == {'hello': 2, 'my': 2, 'cat': 2, 'and': 1, "doesn't": 1, 'say': 1, 'back': 1}) x = word_count('This is a test of the Emergency Broadcast Network. This is only a test.') self.assertTrue(x == {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'of': 1, 'the': 1, 'emergency': 1, 'broadcast': 1, 'network': 1, 'only': 1}) x = word_count('":;,.-+=/\\|[]{}()*^&') self.assertTrue(x == {}) x = word_count('a a\ra\na\ta \t\r\n') self.assertTrue(x == {"a": 5})
def net_worth(arr): text3=arr net_worth = 0 for i in text3: istring=i.split(' ') cntstr=len(fnmatch.filter(istring,'20??')) if(cntstr>1):break for k in text3: k=k.replace("|","") k=k.replace("_","") k=k.replace("=","") if(k.split(" ")[0].isalpha()==False): k=k.replace(k.split(" ")[0],"") for l in range(0,len(netWorth['Networth'])): if(word_count(k.lower())==word_count(netWorth['Networth'][l].lower())): print("k identifier"+" "+k) print("networt"+" "+netWorth['Networth'][l]) if(k.lower().find(netWorth['Networth'][l].lower())!=-1): bal_str=k.lower().replace(netWorth['Networth'][l].lower(),"") bal_str=bal_str.replace("|","") bal_lst=bal_str.split(" ") bal_lst1=[] for j in range(0,len(bal_lst)): if(bal_lst[j] !=''):bal_lst1.append(bal_lst[j]) bal_lst=bal_lst1 if(len(bal_lst)>cntstr): for m in range(0,((len(bal_lst))-cntstr)): bal_lst.pop(0) if(len(bal_lst)!=0): str_ng=bal_lst[0].replace(",","") str_ng=str_ng.replace(".","") if((str_ng.isdigit()==True or str_ng.isdigit()==True) and (netWorth['Networth'][l].lower()=="total equity") and word_count(k.lower())==word_count("total equity")): net_worth=float(bal_lst[0].replace(",","")) print ("hh %s" %net_worth) break if(str_ng.isdigit()==True or str_ng.isdigit()==True): net_worth=net_worth+ float(bal_lst[0].replace(",","")) print ("gg %s" %net_worth) elif(bal_lst[0]=="-"): net_worth=net_worth+ float(bal_lst[0].replace("-","0")) print ("kk %s" %net_worth) if(k.lower().find("total equity")!=-1 and word_count(k.lower())==word_count("total equity")): break return net_worth
def test_quotations(self): self.assertDictEqual( word_count("Joe can't tell between 'large' and large."), { 'joe': 1, "can't": 1, 'tell': 1, 'between': 1, 'large': 2, 'and': 1 })
def test_apostrophes(self): self.assertEqual( word_count("First: don't laugh. Then: don't cry."), { "first": 1, "don't": 2, "laugh": 1, "then": 1, "cry": 1 }, )
def test_ignores_punctuation(self): self.assertEqual( word_count("car : carpet as java : javascript!!&@$%^&"), { "car": 1, "carpet": 1, "as": 1, "java": 1, "javascript": 1 }, )
def test_count_multiple_occurrences_of_a_word(self): self.assertEqual( word_count("one fish two fish red fish blue fish"), { "one": 1, "fish": 4, "two": 1, "red": 1, "blue": 1 }, )
def test_non_alphanumeric(self): self.assertEqual( word_count("hey,my_spacebar_is_broken."), { "hey": 1, "my": 1, "spacebar": 1, "is": 1, "broken": 1 }, )
def test_quotations(self): self.assertEqual( word_count("Joe can't tell between 'large' and large."), { "joe": 1, "can't": 1, "tell": 1, "between": 1, "large": 2, "and": 1 }, )
def draw_histogram(file_name): with open(file_name) as f: words = f.read() words = word_count(words) sorted_words = sorted(words.items(), key=lambda x: (-x[1], x[0])) longest_word = max(len(w) for w in words) for x in sorted_words: res = " " * (longest_word - len(x[0]) + 2) for _ in range(x[1]): res += "#" print(f"{x[0].lower()}{res}")
def fquncy(s): wordkey = word_count(s) quincy = {} for w in wordkey: hertz = wordkey[w] if hertz in quincy: quincy[hertz].append(w) else: quincy[hertz] = [w] hzs = list(quincy.keys()) hzs.sort(reverse=True) for f in hzs: for word in quincy[f]: print(word, ' ' * (20 - len(word)), '#' * f)
def main(): # get data try: df_raw = pd.read_csv('df_raw.csv') except IOError: articles = ['articles1.csv', 'articles2.csv', 'articles3.csv'] df_raw = get_data(articles) df_raw.to_csv('df_raw.csv') # word count and df initialization try: stats_by_pub = pd.read_csv('stats_by_pub.csv') print(stats_by_pub) print('stats on disk') except IOError: stats_by_pub = word_count(df_raw) stats_by_pub.to_csv('stats_by_pub.csv')
def test_tabs(self): self.assertEqual( word_count("rah rah ah ah ah\troma roma ma\tga ga oh la la\t" "want your bad romance"), { "rah": 2, "ah": 3, "roma": 2, "ma": 1, "ga": 2, "oh": 1, "la": 2, "want": 1, "your": 1, "bad": 1, "romance": 1, }, )
def test_word_count(): expected = { '2000': 1, 'Green': 1, 'Hop': 1, 'Splash': 1, 'and': 2, 'cool': 1, 'in': 2, 'legs': 1, 'lily': 1, 'logs': 1, 'on': 4, 'pads': 1, 'speckled': 1, 'water': 2 } filename = 'testfile.txt' assert expected == word_count(filename)
def test_multiple_spaces_not_detected_as_a_word(self): self.assertEqual( word_count(' multiple whitespaces'), {'multiple': 1, 'whitespaces': 1} )
def test_cramped_list(self): self.assertEqual( word_count('one,two,three'), {'one': 1, 'two': 1, 'three': 1} )
import sys from word_count import word_count word_count(instances=int(sys.argv[1]))
def test_mixed_case(self): self.assertEqual( word_count('go Go GO Stop stop'), {'go': 3, 'stop': 2} )
def test_expanded_list(self): self.assertEqual( word_count('one,\ntwo,\nthree'), {'one': 1, 'two': 1, 'three': 1} )
def test_count_one_of_each(self): self.assertEqual( {'one': 1, 'of': 1, 'each': 1}, word_count('one of each') )
def test_count_multiple_occurrences_of_a_word(self): self.assertEqual( word_count('one fish two fish red fish blue fish'), {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1} )
def test_unicode(self): self.assertEqual( {decode_if_needed('аДаО'): 1, decode_if_needed('баВаИаДаАаНаИб'): 1}, word_count('аДаО№баВаИаДаАаНаИб!') )
def test_count_multiple_occurences(self): self.assertEqual( {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1}, word_count('one fish two fish red fish blue fish') )
def test_apostrophes(self): self.assertEqual( word_count("First: don't laugh. Then: don't cry."), {'first': 1, "don't": 2, 'laugh': 1, 'then': 1, 'cry': 1} )
def test_multiple_spaces(self): self.assertEqual( {'wait': 1, 'for': 1, 'it': 1}, word_count('wait for it') )
from word_count import word_count word_count(instances=1) word_count(instances=10) word_count(instances=20)
def test_include_numbers(self): self.assertEqual( {'testing': 2, '1': 1, '2': 1}, word_count('testing 1 2 testing') )
def test_ignores_punctuation(self): self.assertEqual( {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1}, word_count('car : carpet as java : javascript!!&@$%^&') )
def test_non_alphanumeric(self): self.assertEqual( {'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1}, word_count('hey,my_spacebar_is_broken.') )
def test_count_one_word(self): self.assertEqual( {'word': 1}, word_count('word') )
def test_mixed_case(self): self.assertEqual( [2, 3], sorted(list(word_count('go Go GO Stop stop').values())) )
def test_quotations(self): self.assertEqual( word_count("Joe can't tell between 'large' and large."), {'joe': 1, "can't": 1, 'tell': 1, 'between': 1, 'large': 2, 'and': 1} )