Python Environment.filename_vocabulary_csv 예제들

프로그래밍 언어: Python

네임스페이스/패키지 이름: common

클래스/타입: Environment

메소드/함수: filename_vocabulary_csv

hotexamples.com에서의 예제들: 2

Python Environment.filename_vocabulary_csv - 2개의 예제가 발견되었습니다. 이것들은 오픈소스 프로젝트에서 추출된 Python의 common.Environment.filename_vocabulary_csv에 대한 실세계 최고 등급의 예제들입니다. 예제들을 평가하여 예제의 품질 향상에 도움을 줄 수 있습니다.

자주 사용되는 메소드들

보기 숨기기

Environment(30)

debug(20)

job_time(10)

bgm_columns_list(4)

filename_results_csv(3)

filename_model_tree(3)

filename_corpus_csv(3)

filename_authors_csv(3)

filename_stat_test_csv(2)

filename_dict_csv(2)

filename_grammemes_csv(2)

filename_model_texts(2)

filename_vocabulary_csv(2)

filename_tokenz_csv(2)

filename_corpus_xml(2)

filename_corpus_txt(1)

list_rus_letters(1)

filename_xtrain_csv(1)

filename_vocabulary_patch_csv(1)

filename_texts_csv(1)

filename_test_err_csv(1)

filename_scaler(1)

filename_stat_pos_tokenz_csv(1)

filename_stat_bigram_letters_csv(1)

filename_predict_csv(1)

add(1)

filename_model_texts_pca(1)

filename_mlcache_csv(1)

filename_grammemes_xml(1)

filename_global_report_html(1)

filename_dict_xml(1)

path_templates(1)

예제 #1

파일 보기

파일: corpus.py 프로젝트: shashmaxus/mlivos

 def vocabulary_from_corpus(self, n_min=1, n_max=10, persistent=True):
     env = Environment()
     df_voc = pd.DataFrame()
     #dfgram = self.grammemes()
     for i in range(n_min, n_max + 1):
         file_csv = env.filename_corpus_csv(i)
         try:
             dffile = pd.read_csv(file_csv,
                                  index_col='idcorpus',
                                  encoding='utf-8')
         except:
             env.debug(1, ['Failed to read corpus file:', file_csv])
         else:
             env.debug(1, ['Read OK:', file_csv])
             if not dffile.empty:
                 df_voc = df_voc.append(dffile)
     df_voc = df_voc.drop_duplicates()
     df_voc.columns = ['word', 'gram', 'idgram']
     df_voc = df_voc.reset_index(drop=True)
     df_voc.index.name = 'idcorpus'
     if persistent:
         file_voc = env.filename_vocabulary_csv()
         env.debug(1, ['Write vocabulary to CSV:', file_voc])
         df_voc.to_csv(file_voc, encoding='utf-8')
     return df_voc

예제 #2

파일 보기

파일: corpus.py 프로젝트: shashmaxus/mlivos

 def vocabulary(self):
     env = Environment()
     file_voc = env.filename_vocabulary_csv()  #from vocabulary file
     file_dict = env.filename_dict_csv()  #from dictionary file
     try:
         df_voc = pd.read_csv(file_voc,
                              index_col='idcorpus',
                              encoding='utf-8')
     except:
         env.debug(1, ['Failed to read vocabulary file:', file_voc])
     else:
         env.debug(1, ['Read vocabulary OK:', file_voc])
     try:
         df_dict = pd.read_csv(file_dict,
                               index_col='idcorpus',
                               encoding='utf-8')
     except:
         env.debug(1, ['Failed to read dictionary file:', file_dict])
     else:
         env.debug(1, ['Read dictionary OK:', file_dict])
     #Concat
     df_res = pd.concat([df_voc, df_dict])
     df_res = df_res.drop_duplicates()
     #Apply patch words
     df_patch = pd.read_csv(env.filename_vocabulary_patch_csv(),
                            index_col='idcorpus',
                            encoding='utf-8')
     df_res = df_res.drop(df_res[df_res['word'].isin(
         df_patch['word'])].index,
                          axis=0)
     df_res = pd.concat([df_res, df_patch])
     #print(df_res[df_res['word'].isin(df_patch['word'])])
     df_res = df_res.reset_index(drop=True)
     df_res.index.name = 'idcorpus'
     #print(df_res)
     return df_res