Python json_dict_from_file Examples

Programming Language: Python

Namespace/Package Name: corpus_preprocess

Method/Function: json_dict_from_file

Examples at hotexamples.com: 4

Python json_dict_from_file - 4 examples found. These are the top rated real world Python examples of corpus_preprocess.json_dict_from_file extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

File: word2vec_graph2.py Project: ShuhaiLi/experience

 def __iter__(self):
     json_list = json_dict_from_file(self.dirname,"content")
     for json_dict in json_list:
         # try:
         content = delete_stop_words(clean_comment(json_dict['content']), return_list=True)
         # content = delete_stop_words(clean_comment(json_dict), return_list=True)
         # return content
         yield content

Example #2

Show file

File: word2vec_graph2.py Project: wac81/experience

 def __iter__(self):
     json_list = json_dict_from_file(self.dirname, "content")
     for json_dict in json_list:
         # try:
         content = delete_stop_words(clean_comment(json_dict['content']),
                                     return_list=True)
         # content = delete_stop_words(clean_comment(json_dict), return_list=True)
         # return content
         yield content

Example #3

Show file

File: word2vec_graph2.py Project: ShuhaiLi/experience

    feature_size = 500
    content_window = 10
    freq_min_count = 4
    threads_num = 8
    negative = 6   # best采样使用hierarchical softmax方法(负采样，对常见词有利)，不使用negative sampling方法(对罕见词有利)。
    t_iter = 60

    print("word2vec...")
    tic = time.time()
    if os.path.isfile(save_model):
        model = Word2Vec.load(save_model)
        print(model.vocab)
        print("Loaded word2vec model")
    else:
        s_list = json_dict_from_file(file_name,"content")
        model = Word2Vec(s_list, size=feature_size, window=content_window, iter=t_iter, min_count=freq_min_count,negative=negative, workers=multiprocessing.cpu_count())
        toc = time.time()
        print("Word2vec completed! Elapsed time is %s." % (toc-tic))
        model.save(save_model)
        model.save_word2vec_format(save_model2, binary=False)
        print("Word2vec Saved!")





    """
    品牌维度
    """
    # brand =[u'性能',

Example #4

Show file

File: word2vec_graph2.py Project: wac81/experience

    feature_size = 500
    content_window = 10
    freq_min_count = 4
    threads_num = 8
    negative = 6  # best采样使用hierarchical softmax方法(负采样，对常见词有利)，不使用negative sampling方法(对罕见词有利)。
    t_iter = 60

    print("word2vec...")
    tic = time.time()
    if os.path.isfile(save_model):
        model = Word2Vec.load(save_model)
        print(model.vocab)
        print("Loaded word2vec model")
    else:
        s_list = json_dict_from_file(file_name, "content")
        model = Word2Vec(s_list,
                         size=feature_size,
                         window=content_window,
                         iter=t_iter,
                         min_count=freq_min_count,
                         negative=negative,
                         workers=multiprocessing.cpu_count())
        toc = time.time()
        print("Word2vec completed! Elapsed time is %s." % (toc - tic))
        model.save(save_model)
        model.save_word2vec_format(save_model2, binary=False)
        print("Word2vec Saved!")
    """
    品牌维度
    """