Exemple #1
0
    def count(self):
        parser, opc = self.optionParser()

        # Przekazanie argumentow
        filel = opc.lines
        filew = opc.words

        # Wyjatek od liczby argumentow i przekazanie nazwy pliku
        if (filew == "non" and opc.lines == "non") or (filew != "non" and opc.lines != "non"):
            parser.error("niepoprawna liczba argumentow")
        elif filew != "non" and filel == "non":
            file = filew
            print("wybrales liczenie SLOW\n")
        elif filew == "non" and filel != "non":
            file = filel
            print("wybrales liczenie LINII\n")

        # Nazwa pliku
        print("plik do odczytu: %s\n" % file)

        if filel != "non":
            line_count.line_count(file)

        if filew != "non":
            word_count.word_count(file)
Exemple #2
0
def search():
    question_id = request.form['question-id']
    max_num = int(request.form['max-num'])
    record_txt_path = ''
    result_dir_path = './word_count_results/'
    # ------数据库写在下面,input: 问题ID---------------------------
    # -----output: 1.待处理文本的txt文件路径-2.用问题ID和问题内容填充questions变量
    questions = '<tr><td>1</td><td>你认为世界上最厉害的人是谁?</td><tr>'
    if question_id == '1':
        # 把响应id的问题答案写入下面这个路径的文件即可
        record_txt_path = './answer-content.txt'


# -------------------------数据库写在上面-------------------------------------
    word_count.word_count(input_file_path=record_txt_path,
                          output_dir_path=result_dir_path,
                          max_num=max_num)

    def _txt_to_html(txt_path, title):
        txt_in = open(txt_path)

        def _read_file(file_in) -> 'html-str':
            result = ''
            for line in file_in:
                result += '<tr><td>' + line.strip() + '</td></tr>\n'
            return result

        html = _read_file(txt_in)
        html = '<table class="result-table" border="1"><tr><th>' + \
            title+'</th></tr>\n'+html+'</table>'
        txt_in.close()
        return html

    adjective_html = _txt_to_html(result_dir_path + 'adjective.txt', '形容词')
    adverb_html = _txt_to_html(result_dir_path + 'adverb.txt', '副词')
    noun_html = _txt_to_html(result_dir_path + 'noun.txt', '名词')
    postposition_html = _txt_to_html(result_dir_path + 'postposition.txt',
                                     '介词')
    verb_html = _txt_to_html(result_dir_path + 'verb.txt', '动词')

    results_html = '''
<div class="row">
    <div class="col">%s</div>
    <div class="col">%s</div>
    <div class="col">%s</div>
    <div class="col">%s</div>
    <div class="col">%s</div>
</div>
    ''' % (adjective_html, adverb_html, noun_html, postposition_html,
           verb_html)

    return render_template('entry.html',
                           title='知乎问题答案分析',
                           results=results_html,
                           questions=questions)
Exemple #3
0
def total_assets(arr):
    text3=arr
    total_assets = 0
    for i in text3:
        istring=i.split(' ')
        cntstr=len(fnmatch.filter(istring,'20??'))
        if(cntstr>1):break
    for k in text3:
        k=k.replace("|","")
        k=k.replace("_","")
        k=k.replace("=","")
        k=k.replace("—-","")
        k=k.replace("$","")
        k=k.replace("W","")
        if(k.split(" ")[0].isalpha()==False):
            k=k.replace(k.split(" ")[0],"")
        for l in range(0,len(assets['Assets'])):
            if(word_count(k.lower())==word_count(assets['Assets'][l].lower())):
                print("k identifier"+" "+k)
                print("assets"+" "+assets['Assets'][l])
                if(k.lower().find(assets['Assets'][l].lower())!=-1):
                    bal_str=k.lower().replace(assets['Assets'][l].lower(),"")
                    bal_str=bal_str.replace("|","")
                    bal_lst=bal_str.split(" ")
                    print(bal_lst)
                    bal_lst1=[]
                    for j in range(0,len(bal_lst)):
                        if(bal_lst[j] !=''):bal_lst1.append(bal_lst[j])
                    bal_lst=bal_lst1
                    if(len(bal_lst)>cntstr):
                        for m in range(0,((len(bal_lst))-cntstr)):
                            bal_lst.pop(0)
                    if(len(bal_lst)!=0):
                        str_ng=bal_lst[0].replace(",","")
                        str_ng=str_ng.replace(".","")
                        if((str_ng.isdigit()==True or str_ng.isdigit()==True) and (assets['Assets'][l].lower()=="total assets") and word_count(k.lower())==word_count("total assets")):
                            total_assets=float(bal_lst[0].replace(",",""))
                            print ("hh %s" %total_assets)
                            break
                        if(str_ng.isdigit()==True or str_ng.isdigit()==True):

                            total_assets=total_assets+ float(bal_lst[0].replace(",",""))
                            print ("gg %s" %total_assets)
                        elif(bal_lst[0]=="-"):
                            total_assets=total_assets+ float(bal_lst[0].replace("-","0"))
                            print ("kk %s" %total_assets)
        if(k.lower().find("total assets")!=-1 and word_count(k.lower())==word_count("total assets")):
            break
    return total_assets
Exemple #4
0
def main():
    counter = word_count()
    filereader = file_reader()
    db_access = database_access()
    # load folder linked to read the word documents
    if (os.environ.get('MONITOREDFOLDER') == None):
        monitoredfolder = './words/'
    else:
        monitoredfolder = os.environ.get('MONITOREDFOLDER')
    while True:

        # List all files in the folder
        onlyfiles = [
            f for f in listdir(monitoredfolder)
            if isfile(join(monitoredfolder, f))
        ]
        # create log folder
        if not exists(monitoredfolder + 'logs/'):
            makedirs(monitoredfolder + 'logs/')
        # configure Log
        logging.basicConfig(filename=monitoredfolder +
                            'logs/document_import.log',
                            level=logging.INFO)
        logging.info(str(len(onlyfiles)) + " New Files")
        # Process Single new File if there is one
        if len(onlyfiles) != 0:
            words = filereader.read(onlyfiles[0])
            result = counter.count(words)
            for key in result:
                # update count for each word in the dictionary of the currenct file
                db_access.add_value_to_word_count(key, result[key])

        else:
            time.sleep(60)
Exemple #5
0
 def test_tabs(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\troma roma ma\tga ga oh la la\t'
                    'want your bad romance')
     )
Exemple #6
0
def words_of_diaries(dir='diaries'):
    for file in os.listdir(os.path.join(os.getcwd(), dir)):
        counter = word_count(os.path.join(os.getcwd(), dir, file))
        results = sorted(counter, key=lambda k: counter[k], reverse=True)
        maxes = list(
            takewhile(lambda e: counter[e] == counter[results[0]], results))
        print file, ':', maxes
 def test_tabs(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\troma roma ma\tga ga oh la la\t'
                    'want your bad romance')
     )
 def test_newlines(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\nroma roma ma\n'
                    'ga ga oh la la\nwant your bad romance')
     )
Exemple #9
0
 def test_newlines(self):
     self.assertEqual(
         {'rah': 2, 'ah': 3, 'roma': 2, 'ma': 1, 'ga': 2, 'oh': 1, 'la': 2,
          'want': 1, 'your': 1, 'bad': 1, 'romance': 1},
         word_count('rah rah ah ah ah\nroma roma ma\n'
                    'ga ga oh la la\nwant your bad romance')
     )
Exemple #10
0
 def test_ignores_punctuation(self):
     assert word_count('car : carpet as java : javascript!!&@$%^&') == {
         'car': 1,
         'carpet': 1,
         'as': 1,
         'java': 1,
         'javascript': 1
     }
Exemple #11
0
 def test_non_alphanumeric(self):
     assert word_count('hey,my_spacebar_is_broken.') == {
         'hey': 1,
         'my': 1,
         'spacebar': 1,
         'is': 1,
         'broken': 1
     }
 def test_count_multiple_occurences(self):
     self.assertEqual({
         'one': 1,
         'fish': 4,
         'two': 1,
         'red': 1,
         'blue': 1
     }, word_count('one fish two fish red fish blue fish'))
Exemple #13
0
 def test_apostrophes(self):
     assert word_count("First: don't laugh. Then: don't cry.") == {
         'first': 1,
         "don't": 2,
         'laugh': 1,
         'then': 1,
         'cry': 1
     }
 def test_count(self):
     count = word_count("toy.txt")
     self.assertTrue("asdf" not in count)
     self.assertEqual(count.get("coffers", 0), 1)
     self.assertEqual(count.get("And", 0), 5)
     self.assertEqual(count.get("honourable", 0), 5)                
     self.assertEqual(count.get("Caesar", 0), 4)
     self.assertEqual(count.get("Caesar,", 0), 2)                
Exemple #15
0
 def test_non_alphanumeric(self):
     self.assertEqual(word_count('hey,my_spacebar_is_broken.'), {
         'hey': 1,
         'my': 1,
         'spacebar': 1,
         'is': 1,
         'broken': 1
     })
Exemple #16
0
 def test_count_multiple_occurrences_of_a_word(self):
     self.assertEqual(word_count('one fish two fish red fish blue fish'), {
         'one': 1,
         'fish': 4,
         'two': 1,
         'red': 1,
         'blue': 1
     })
Exemple #17
0
 def test_apostrophes(self):
     self.assertEqual(word_count("First: don't laugh. Then: don't cry."), {
         'first': 1,
         "don't": 2,
         'laugh': 1,
         'then': 1,
         'cry': 1
     })
Exemple #18
0
 def test_quotations(self):
     assert word_count("Joe can't tell between 'large' and large.") == {
         'joe': 1,
         "can't": 1,
         'tell': 1,
         'between': 1,
         'large': 2,
         'and': 1
     }
def word_count_article(dir):
    result={}
    for article in os.listdir(dir):
        dic=wc.word_count(dir+article)
        print(dic)
        key,count=max(list(dic.items()),key=lambda d:d[1] )
        result[str(article)]=key
    
    return result
Exemple #20
0
 def test_ignores_punctuation(self):
     self.assertEqual(
         word_count('car : carpet as java : javascript!!&@$%^&'), {
             'car': 1,
             'carpet': 1,
             'as': 1,
             'java': 1,
             'javascript': 1
         })
    def test_word_count(self):
        # x = word_count("")
        # self.assertTrue(x == {})

        # x = word_count("Hello    hello")
        # self.assertTrue(x == {"hello": 2})

        x = word_count('Hello, my cat.  And my cat doesn\'t say "hello" back.')
        self.assertTrue(x == {'hello': 2, 'my': 2, 'cat': 2, 'and': 1, "doesn't": 1, 'say': 1, 'back': 1})

        x = word_count('This is a test of the  Emergency  Broadcast  Network. This is only a test.')
        self.assertTrue(x == {'this': 2, 'is': 2, 'a': 2, 'test': 2, 'of': 1, 'the': 1, 'emergency': 1, 'broadcast': 1, 'network': 1, 'only': 1})

        x = word_count('":;,.-+=/\\|[]{}()*^&')
        self.assertTrue(x == {})

        x = word_count('a a\ra\na\ta \t\r\n')
        self.assertTrue(x == {"a": 5})
Exemple #22
0
def net_worth(arr):
    text3=arr
    net_worth = 0
    for i in text3:
        istring=i.split(' ')
        cntstr=len(fnmatch.filter(istring,'20??'))
        if(cntstr>1):break
    for k in text3:
        k=k.replace("|","")
        k=k.replace("_","")
        k=k.replace("=","")
        if(k.split(" ")[0].isalpha()==False):
            k=k.replace(k.split(" ")[0],"")
        for l in range(0,len(netWorth['Networth'])):
            if(word_count(k.lower())==word_count(netWorth['Networth'][l].lower())):
                print("k identifier"+" "+k)
                print("networt"+" "+netWorth['Networth'][l])
                if(k.lower().find(netWorth['Networth'][l].lower())!=-1):
                    bal_str=k.lower().replace(netWorth['Networth'][l].lower(),"")
                    bal_str=bal_str.replace("|","")
                    bal_lst=bal_str.split(" ")
                    bal_lst1=[]
                    for j in range(0,len(bal_lst)):
                        if(bal_lst[j] !=''):bal_lst1.append(bal_lst[j])
                    bal_lst=bal_lst1
                    if(len(bal_lst)>cntstr):
                        for m in range(0,((len(bal_lst))-cntstr)):
                            bal_lst.pop(0)
                    if(len(bal_lst)!=0):
                        str_ng=bal_lst[0].replace(",","")
                        str_ng=str_ng.replace(".","")
                        if((str_ng.isdigit()==True or str_ng.isdigit()==True) and (netWorth['Networth'][l].lower()=="total equity") and word_count(k.lower())==word_count("total equity")):
                            net_worth=float(bal_lst[0].replace(",",""))
                            print ("hh %s" %net_worth)
                            break
                        if(str_ng.isdigit()==True or str_ng.isdigit()==True):
                            net_worth=net_worth+ float(bal_lst[0].replace(",",""))
                            print ("gg %s" %net_worth)
                        elif(bal_lst[0]=="-"):
                            net_worth=net_worth+ float(bal_lst[0].replace("-","0"))
                            print ("kk %s" %net_worth)
        if(k.lower().find("total equity")!=-1 and word_count(k.lower())==word_count("total equity")):
            break
    return net_worth
Exemple #23
0
 def test_quotations(self):
     self.assertDictEqual(
         word_count("Joe can't tell between 'large' and large."), {
             'joe': 1,
             "can't": 1,
             'tell': 1,
             'between': 1,
             'large': 2,
             'and': 1
         })
 def test_apostrophes(self):
     self.assertEqual(
         word_count("First: don't laugh. Then: don't cry."),
         {
             "first": 1,
             "don't": 2,
             "laugh": 1,
             "then": 1,
             "cry": 1
         },
     )
 def test_ignores_punctuation(self):
     self.assertEqual(
         word_count("car : carpet as java : javascript!!&@$%^&"),
         {
             "car": 1,
             "carpet": 1,
             "as": 1,
             "java": 1,
             "javascript": 1
         },
     )
 def test_count_multiple_occurrences_of_a_word(self):
     self.assertEqual(
         word_count("one fish two fish red fish blue fish"),
         {
             "one": 1,
             "fish": 4,
             "two": 1,
             "red": 1,
             "blue": 1
         },
     )
 def test_non_alphanumeric(self):
     self.assertEqual(
         word_count("hey,my_spacebar_is_broken."),
         {
             "hey": 1,
             "my": 1,
             "spacebar": 1,
             "is": 1,
             "broken": 1
         },
     )
 def test_quotations(self):
     self.assertEqual(
         word_count("Joe can't tell between 'large' and large."),
         {
             "joe": 1,
             "can't": 1,
             "tell": 1,
             "between": 1,
             "large": 2,
             "and": 1
         },
     )
def draw_histogram(file_name):
    with open(file_name) as f:
        words = f.read()
    words = word_count(words)
    sorted_words = sorted(words.items(), key=lambda x: (-x[1], x[0]))
    longest_word = max(len(w) for w in words)

    for x in sorted_words:
        res = " " * (longest_word - len(x[0]) + 2)
        for _ in range(x[1]):
            res += "#"
        print(f"{x[0].lower()}{res}")
Exemple #30
0
def fquncy(s):
    wordkey = word_count(s)
    quincy = {}
    for w in wordkey:
        hertz = wordkey[w]
        if hertz in quincy:
            quincy[hertz].append(w)
        else:
            quincy[hertz] = [w]
    hzs = list(quincy.keys())
    hzs.sort(reverse=True)
    for f in hzs:
        for word in quincy[f]:
            print(word, ' ' * (20 - len(word)), '#' * f)
def main():
    # get data
    try:
        df_raw = pd.read_csv('df_raw.csv')
    except IOError:
        articles = ['articles1.csv', 'articles2.csv', 'articles3.csv']
        df_raw = get_data(articles)
        df_raw.to_csv('df_raw.csv')

    # word count and df initialization
    try:
        stats_by_pub = pd.read_csv('stats_by_pub.csv')
        print(stats_by_pub)
        print('stats on disk')
    except IOError:
        stats_by_pub = word_count(df_raw)
        stats_by_pub.to_csv('stats_by_pub.csv')
 def test_tabs(self):
     self.assertEqual(
         word_count("rah rah ah ah ah\troma roma ma\tga ga oh la la\t"
                    "want your bad romance"),
         {
             "rah": 2,
             "ah": 3,
             "roma": 2,
             "ma": 1,
             "ga": 2,
             "oh": 1,
             "la": 2,
             "want": 1,
             "your": 1,
             "bad": 1,
             "romance": 1,
         },
     )
Exemple #33
0
def test_word_count():
    expected = {
        '2000': 1,
        'Green': 1,
        'Hop': 1,
        'Splash': 1,
        'and': 2,
        'cool': 1,
        'in': 2,
        'legs': 1,
        'lily': 1,
        'logs': 1,
        'on': 4,
        'pads': 1,
        'speckled': 1,
        'water': 2
    }
    filename = 'testfile.txt'
    assert expected == word_count(filename)
 def test_multiple_spaces_not_detected_as_a_word(self):
     self.assertEqual(
         word_count(' multiple   whitespaces'),
         {'multiple': 1, 'whitespaces': 1}
     )
 def test_cramped_list(self):
     self.assertEqual(
         word_count('one,two,three'),
         {'one': 1, 'two': 1, 'three': 1}
     )
Exemple #36
0
import sys
from word_count import word_count

word_count(instances=int(sys.argv[1]))
 def test_mixed_case(self):
     self.assertEqual(
         word_count('go Go GO Stop stop'),
         {'go': 3, 'stop': 2}
     )
 def test_expanded_list(self):
     self.assertEqual(
         word_count('one,\ntwo,\nthree'),
         {'one': 1, 'two': 1, 'three': 1}
     )
 def test_count_one_of_each(self):
     self.assertEqual(
         {'one': 1, 'of': 1, 'each': 1},
         word_count('one of each')
     )
 def test_count_multiple_occurrences_of_a_word(self):
     self.assertEqual(
         word_count('one fish two fish red fish blue fish'),
         {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1}
     )
 def test_unicode(self):
     self.assertEqual(
         {decode_if_needed('аДаО'): 1, decode_if_needed('баВаИаДаАаНаИб'): 1},
         word_count('аДаО№Ÿ––баВаИаДаАаНаИб!')
     )
 def test_count_multiple_occurences(self):
     self.assertEqual(
         {'one': 1, 'fish': 4, 'two': 1, 'red': 1, 'blue': 1},
         word_count('one fish two fish red fish blue fish')
     )
 def test_apostrophes(self):
     self.assertEqual(
         word_count("First: don't laugh. Then: don't cry."),
         {'first': 1, "don't": 2, 'laugh': 1, 'then': 1, 'cry': 1}
     )
 def test_multiple_spaces(self):
     self.assertEqual(
         {'wait': 1, 'for': 1, 'it': 1},
         word_count('wait for       it')
     )
Exemple #45
0
from word_count import word_count

word_count(instances=1)

word_count(instances=10)

word_count(instances=20)
  
 def test_include_numbers(self):
     self.assertEqual(
         {'testing': 2, '1': 1, '2': 1},
         word_count('testing 1 2 testing')
     )
 def test_ignores_punctuation(self):
     self.assertEqual(
         {'car': 1, 'carpet': 1, 'as': 1, 'java': 1, 'javascript': 1},
         word_count('car : carpet as java : javascript!!&@$%^&')
     )
 def test_non_alphanumeric(self):
     self.assertEqual(
         {'hey': 1, 'my': 1, 'spacebar': 1, 'is': 1, 'broken': 1},
         word_count('hey,my_spacebar_is_broken.')
     )
 def test_count_one_word(self):
     self.assertEqual(
         {'word': 1},
         word_count('word')
     )
 def test_mixed_case(self):
     self.assertEqual(
         [2, 3],
         sorted(list(word_count('go Go GO Stop stop').values()))
     )
 def test_quotations(self):
     self.assertEqual(
         word_count("Joe can't tell between 'large' and large."),
         {'joe': 1, "can't": 1, 'tell': 1, 'between': 1, 'large': 2,
          'and': 1}
     )