Python Dict Examples, dict.Dict Python Examples

Example #1

0

Show file

File: reFile.py Project: lyletzzzw/changeFileName

 def __init__(self,f):
     Dict.__init__(self)
     
     self.mp3File = MP3(f,ID3=EasyID3)
     
     self['fileName']=f
     self['srcName']=f

Example #2

0

Show file

File: reFile.py Project: ronalzidane/changeFileName

    def __init__(self, f):
        Dict.__init__(self)

        self.mp3File = MP3(f, ID3=EasyID3)

        self['fileName'] = f
        self['srcName'] = f

Example #3

0

Show file

File: main.py Project: MikeYan01/companies-sentiment-analysis

def test_article(article, svm, pos_dict, neg_dict):
    # Word segmentation on raw articles
    file_context = str(article)
    seg_list = jieba.cut(file_context.strip(), cut_all=False)
    test_data = seg_article(article, seg_list)

    # Pre-judge based on dictionary
    dict = Dict(file_context, seg_list)
    factor = dict.calculate_factor(test_data, pos_dict, neg_dict)

    # SVM's prediction
    result = []
    for each in test_data:
        if (each == '') is False:
            result.append(svm.predict(each))

    # Calculate points and normalize
    polar = np.mean(result)
    final_score = 0.7*polar + 0.3*factor
    if (final_score < 0.5):
        return '-1'
    elif (final_score == 0.5):
        return '0'
    else:
        return '1'

Example #4

0

Show file

    def run(self):
        super().run()

        # split input file into num_core many files
        self.prep_input()

        # get slang, stop words and emoticon dict
        # NOTE: For now, we load these dicts here (shared between threads)
        # but we load one enchant dict per thread. This has concurrency reasons.
        # We could load these dicts also one per thread, but we need to do
        # some adjustements.
        dict = Dict()
        slang_dict = dict.get_slang()
        stop_words = dict.get_stopwords()
        emoji_dict = dict.get_emoticon()

        # process input files
        ts = [
            threading.Thread(target=self.checker,
                             args=(i, slang_dict, stop_words, emoji_dict))
            for i in range(self.cores)
        ]

        for t in ts:
            t.start()

        for t in ts:
            t.join()

        # merge num_core output files into one, delete the split files
        self.merge_and_delete()

Example #5

0

Show file

    def run(self):
        super().run()

        dict = Dict()
        slang_dict = dict.get_slang()
        stop_words = dict.get_stopwords()
        emoji_dict = dict.get_emoticon()
        d = enchant.Dict("en_US")

        self.prep_input()

        # dictionnary defined in MMST __init___
        share = floor(self.nb / self.cores)

        ts = [threading.Thread(target=self.checker, args=(i, d, slang_dict, stop_words, emoji_dict)) for i in range(self.cores)]

        for t in ts:
            t.start()

        for t in ts:
            t.join()

        print("merging")

        self.merge_outputs()

Example #6

0

Show file

 def load_db(self, SQLiteDB):
     if self.path1 == ".":
         return False
     #print self.path + SQLiteDB
     self.dict_obj = Dict(
         self.path1 +
         SQLiteDB)  #Always intitiate first Dict object then Word object
     self.word_obj = Word()

Example #7

0

Show file

File: main.py Project: satans404/repackaged_redis_for_JCoder

def dict_test():
    x = Dict()
    x['dict1'] = {'1': 1, '2': 2, '3': 3}
    # x['dict1','5']='5'
    # del x['dict1','5']
    # for i in x.ergodic('dict1'):
    #     print(i)
    print(x['dict1'])

Example #8

0

Show file

File: clean_spaces.py Project: tejareddy8888/Sentiment-Analysis

    def run(self):
        super().run()

        spec_sign = "[@_!#$%^&*()<>?/\|}{~:];'-"

        # init emoticon dict
        d = Dict()
        emot_dict = d.get_emoticon()

        # normalize words
        output = open(self.output, 'w+')
        with open(self.input, mode='r') as input:
            for line in input:
                words = line.split()

                i = 0
                while i < len(words):
                    word = words[i]

                    # remove emoticon spacing
                    word_given = ''
                    word_nospace = ''
                    while word in spec_sign and len(
                            word) == 1 and i < len(words):
                        word = words[i]
                        word_given += word + ' '
                        word_nospace += word
                        if word_nospace in emot_dict:
                            output.write(word_nospace + ' ')
                            word_given = ''
                            word_nospace = ''
                        i += 1

                    if len(word_given) > 0:
                        output.write(word_given)

                    else:
                        # question mark
                        if word[-1] == '?':
                            word = word[:-1] + ' ' + '?'

                        output.write(word + ' ')
                        i += 1

                output.write('\n')

        output.close()

Example #9

0

Show file

def makeVocabulary(filename, size):
    vocab = Dict([
        Constants.PAD_WORD, Constants.UNK_WORD, Constants.BOS_WORD,
        Constants.EOS_WORD
    ])

    with open(filename) as f:
        for sent in f.readlines():
            for word in sent.split():
                vocab.add(word.lower())  # Lowercase all words

    originalSize = vocab.size()
    vocab = vocab.prune(size)
    print("Created dictionary of size %d (pruned from %d)" %
          (vocab.size(), originalSize))

    return vocab

Example #10

0

Show file

    def run(self):
        super().run();

        # get emoticon dict
        d = Dict()
        dict = d.get_emoticon()

        # replace emoticons in input file
        output = open(self.output, 'w+')
        with open(self.input, mode='r') as input:
                for line in input:
                    for word in line.split():
                        if word in dict:
                            output.write(dict[word] + ' ')
                        else:
                            output.write(word + ' ')

                    output.write('\n')

        output.close()

Example #11

0

Show file

    def run(self):
        super().run()

        # init english dict
        self.en_dict = enchant.Dict("en_US")
        d = Dict()
        self.slang_dict = d.get_slang()
        self.emoticon_dict = d.get_emoticon()

        # normalize words
        output = open(self.output, 'w+')
        with open(self.input, mode='r') as input:
            for line in input:
                for word in line.split():
                    if not self.en_dict.check(word):
                        l = [''.join(g) for _, g in groupby(word)]
                        if len(l) <= 10:
                            word, _ = self.get_norm_string(l, 0)

                    output.write(word + ' ')

                output.write('\n')

        output.close()

Example #12

0

Show file

 def test_attr(self):
     d = Dict()
     d.key = 'value'
     self.assertTrue('key' in d)
     self.assertEquals(d['key'], 'value')

Example #13

0

Show file

 def test_key(self):
     d = Dict()
     d['key'] = 'value'
     self.assertEquals(d.key, 'value')

Example #14

0

Show file

def test_e2c_words(capfd):  # 所继承的capfd不知道是哪里来的?
    Dict(['Test'])  # 通过关键字来调用程序
    out, err = capfd.readouterr()  # 捕获到的输出和错误,返回的应该是个列表或字典什么的
    assert '测试' in out  # 验证是否在输出中包含了所要测试的内容

Example #15

0

Show file

File: ocr_translation.py Project: HuangJiaLian/ocr_translation

sendmessage('Start to capture')
# import pyscreenshot as ImageGrab
# 1. Screenshot
cmd = 'scrot -s -q 100 /tmp/foo.png ; xclip -selection c -t image/png < /tmp/foo.png'
os.system(cmd)
# 2. OCR
# Define config parameters.
# '-l eng'  for using the English language
# '--oem 1' for using LSTM OCR Engine
config = ('-l eng --oem 1 --psm 3')
im = cv2.imread('/tmp/foo.png', cv2.IMREAD_COLOR)
# print(im.size)
# Run tesseract OCR on image
text = pytesseract.image_to_string(im, config=config)
if (len(text) == 0):
    exit()
# print(text)
input_str = text.split()
# print(input_str)
# 3. Translation
dc = Dict(input_str)
result = dc.translate()
print(result)

# # 4. Output Result
sendmessage(result)
cmd_add_to_a = 'echo ' + '\"' + result + '\"' + '| xclip'
# print(cmd_add_to_a)
os.system(cmd_add_to_a)

Example #16

0

Show file

File: test_dict.py Project: xueqy/dict

def test_c2e_sentences(capfd):
    Dict(['我爱你'])
    out, err = capfd.readouterr()
    assert 'I love you' in out

Example #17

0

Show file

File: test_dict.py Project: xueqy/dict

def test_c2e_words(capfd):
    Dict(['测试'])
    out, err = capfd.readouterr()
    assert 'Test' in out

Example #18

0

Show file

from classParser import Parser
from dict import Dict
from classesWordRootType import Word, Root
from classValidator import Validator
import re

parser = Parser(Dict())
for i in range(1, 16):
    fileName = str(i) + '.txt'
    inFile = open(fileName, 'r', encoding='utf8').read()
    formula = parser.parse(inFile)
    print(i, inFile)
    print(formula.words)
    print(i, formula.get_schema())
    print(i, formula.types)
    print(i, formula.is_valid())
    print(i, formula.get_value())
    print(formula.words[1], formula.words[1].get_type())
    print(formula.words[1].get_type().get_correlation())
    print(formula.words[-1], formula.words[-1].get_type())
    print(formula.words[-1].get_type().get_correlation())
    print('\n')

Example #19

0

Show file

 def test_init(self):
     d = Dict(a=1, b='test')
     self.assertEquals(d.a, 1)
     self.assertEquals(d.b, 'test')
     self.assertTrue(isinstance(d, dict))

Example #20

0

Show file

 def test_keyerror(self):
     d = Dict()
     with self.assertRaises(KeyError):
         value = d['empty']

Example #21

0

Show file

    cfg.param_file = 'data/rmrb_ngram_changed.json' if cfg.use_re else 'data/rmrb_ngram_nochanged.json'
    # Generate n-grame parameters
    # param_file = 'data/rmrb_ngram_changed.json' if cfg.use_re else 'data/rmrb_ngram_nochanged.json'
    print("Loading model parameters calculated from rmrb ... ")
    if os.path.exists(cfg.param_file):
        f = open(cfg.param_file, 'r', encoding='utf-8')
        params = json.load(f)
        f.close()
    else:
        params = get_ngram_prob(cfg)
        f = open(cfg.param_file, 'w', encoding='utf-8')
        json.dump(params, f)
        f.close()
    test_targets, dicts = get_test_sets()

    dicts = Dict(dicts, data_structure="set")

    # Simple 2-gram model from rmrb-train
    model_simple = HMM_word(params['p3'], '<BOS>', '<EOS>')

    # Build an HMM model
    model_hmm = HMM(params['p2'], params['p1'])
    results = []

    model_rmrb = model_simple if args.score == 'Markov' else model_hmm

    print("Test on rmrb train subset")
    for sen in tqdm(test_targets[:10000:100]):
        # Get candidates
        ori_sen = ''.join(sen)
        nums, words, cands = get_proposals(ori_sen, dicts, cfg)

Example #22

0

Show file

File: test_exp2.py Project: volgachen/Chinese-Tokenization

def test_dev():
    # load test set
    nlpcc_f = open('data/nlpcc2016-wordseg-dev.dat', 'r', encoding='utf-8')
    lines = nlpcc_f.readlines()
    lines = [changenum(line) for line in lines]
    lines = [line.strip().split() for line in lines]
    nlpcc_f.close()

    # get dict from rmrb
    _, dicts = get_test_sets()
    dicts = Dict(dicts, data_structure="ac")  # or "set"

    # model from rmrb
    cfg = Config()
    params = get_ngram_prob(cfg)

    print(
        "Simple 2-gram model trained from rmrb, test on nlpcc-dev, with re-match"
    )
    # Simple 2-gram model from rmrb-train
    model_rmrb = HMM_word(params['p3'], '<BOS>', '<EOS>')
    results = []
    for line in tqdm(lines):
        ori_line = ''.join(line)
        res = model_rmrb.find(ori_line)
        results.append(res)
    evaluateSet(results, lines)

    # Simple n-gram model from weibo-train
    print(
        "Simple 2-gram model from nlpcc-train, test on nlpcc-dev, with re-match"
    )
    filename = 'weibo_model/nlpcc_train.replace-2gram'
    with open(filename, 'r', encoding='utf-8') as f:
        dict_lines = f.readlines()
        dict_lines = [l.strip().split('\t') for l in dict_lines]
        probs = {}
        for l in dict_lines:
            if len(l) < 2:
                continue
            probs[l[0]] = float(l[1])
    model_weibo_train = HMM_word(probs)
    # Test with Simple 2-gram model
    results = []
    for line in tqdm(lines):
        ori_line = ''.join(line)
        res = model_weibo_train.find(ori_line)
        results.append(res)
    evaluateSet(results, lines)

    # load test set without number and english replace
    nlpcc_f = open('data/nlpcc2016-wordseg-dev.dat', 'r', encoding='utf-8')
    lines = nlpcc_f.readlines()
    lines = [line.strip().split() for line in lines]
    nlpcc_f.close()
    # Simple n-gram model from weibo-train
    print(
        "Simple 2-gram model from nlpcc-train, test on nlpcc-dev, without re-match"
    )
    filename = 'weibo_model/nlpcc_train.mod-2gram'
    with open(filename, 'r', encoding='utf-8') as f:
        dict_lines = f.readlines()
        dict_lines = [l.strip().split('\t') for l in dict_lines]
        probs = {}
        for l in dict_lines:
            if len(l) < 2:
                continue
            probs[l[0]] = float(l[1])
    model_weibo_train = HMM_word(probs)

    # Test with Simple 2-gram model
    results = []
    for line in tqdm(lines):
        ori_line = ''.join(line)
        res = model_weibo_train.find(ori_line)
        results.append(res)
    evaluateSet(results, lines)

    # model from rmrb
    cfg = Config()
    cfg.use_re = 0
    params = get_ngram_prob(cfg)

    # Simple 2-gram model from rmrb-train
    print("Simple 2-gram model from rmrb, test on nlpcc-dev, without re-match")
    model_rmrb = HMM_word(params['p3'], '<BOS>', '<EOS>')
    results = []
    for line in tqdm(lines):
        ori_line = ''.join(line)
        res = model_rmrb.find(ori_line)
        results.append(res)
    evaluateSet(results, lines)

Example #23

0

Show file

 def test_attrerror(self):
     d = Dict()
     with self.assertRaises(AttributeError):
         value = d.empty

Example #24

0

Show file

File: test_dict.py Project: xueqy/dict

def test_e2c_words(capfd):
    Dict(['Test'])
    out, err = capfd.readouterr()
    assert '测试' in out

Example #25

0

Show file

File: test_dict.py Project: xueqy/dict

def test_e2c_sentences(capfd):
    Dict(['I', 'Love', 'You'])
    out, err = capfd.readouterr()
    assert '我爱你' in out

Example #26

0

Show file

		p2[keysN] = t_numN[keysN] / t_numN_[keysN_]

	# calc p3: p(w|w,..,w)
	for keysN in w_numN.keys():
		tmp = keysN.split()
		keysN_ = " ".join(tmp[:-1])
		p3[keysN] = w_numN[keysN] / w_numN_[keysN_]

	return {
		"p1": p1,
		"p2": p2,
		"p3": p3,
	}

def get_ngram_prob(cfg):
	lines = readfile(cfg)
	return calc_prob_fun(lines, cfg)



if __name__ == '__main__':
	cfg = Config()
	superline = readfile(cfg)
	# fs = get_prob_fun(superline, cfg)
	dict = Dict(["中共","总书记"],"set")
	# s = "迈向，，，充满123希望的word新世纪，一九九八新年讲话。"
	# s = "刚刚看到的一段话：“你特别烦的时候先保持冷静或者看一部开心的电影者喝一大杯水不要试图跟朋友聊天朋友是跟你分享快乐的人而不是分享你痛苦的人不要做一个唠唠叨叨的抱怨者从现在起要学会自己去化解去承受”送给和我一样最近有点烦闷的人"
	s = "中共中央总书记、国家主席江泽民"
	# s  = "你好吗"
	digit, english, pro = get_proposals(s, dict, cfg)
	print(pro)