Python clean_textの例

プログラミング言語: Python

名前空間/パッケージ名: src.utils.text_processor

メソッド/関数: clean_text

hotexamples.comのコード掲載数: 4

Python clean_text - 4件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのsrc.utils.text_processor.clean_textの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

コード例 #1

ファイルを表示

def get_emotion_sequence(posts, EMO_RESOURCES):
    emo_seq = []
    agg_profile = {}

    for post in posts:
        clean_post = text_utils.clean_text(post)
        sentences = sent_tokenize(clean_post)

        for sent in sentences:
            profile, seq = get_emotion_profile_per_post(sent, EMO_RESOURCES)
            if profile != {}:
                max_emotion = max(profile.items(),
                                  key=operator.itemgetter(1))[0]
                max_emo_val = max(profile.items(),
                                  key=operator.itemgetter(1))[1]

            if max_emo_val > 0:
                for key in profile:
                    if profile[key] == max_emo_val:
                        emo_seq.append(max_emotion)
                    if key in agg_profile.keys():
                        agg_profile[key] = agg_profile[key] + profile[key]
                    else:
                        agg_profile[key] = profile[key]

    return emo_seq, agg_profile

コード例 #2

ファイルを表示

ファイル: main.py プロジェクト: huilanwen/emotion-analysis

def cleanText(inpath):
    output = r'F:\projects-he\nlp-emotion-analysis-Jeloh\nlp-emotion-analysis-core\data\coronavirus_reddit_raw_comments.xls'
    csv_xls.csv_to_xlsx_pd(inpath, output)
    wb = load_workbook(inpath)
    a_sheet

    indata = xlrd.open_workbook(inpath)
    book = Workbook(encoding='utf-8')
    outdata = book.add_sheet('sheet1')
    table = indata.sheets()[0]
    nrows = table.nrows
    ncols = table.ncols
    for i in range(1, nrows):
        alldata = table.row_values(i)
        #result = alldata[1]
        clean_text_1 = text_utils.clean_text(alldata[1])
        outdata.write(i, 1, alldata[0])
        outdata.write(i, 2, clean_text_1)
    outdata.save(
        r"F:\projects-he\nlp-emotion-analysis-Jeloh\nlp-emotion-analysis-core\data\AfterClean.csv"
    )

コード例 #3

ファイルを表示

        'NEGATION': NEGATION_MAP,
        'INTENSIFIERS': INTENSIFIER_MAP,
        'PHYSICAL': PHYSICAL
    }

    return EMO_RESOURCES


results_df = pd.DataFrame()
df = pd.read_csv(path)
print(df.columns)
print(df['emotion'].unique())
EMO_RESOURCES = load_emotion_dictionaries()
for i, row in df.iterrows():
    # if(i>20):continue
    row_dict = row.to_dict()
    # print()
    sentence = row['utterance']
    clean_text_1 = text_utils.clean_text(sentence)
    emotion_profile, emo_seq = emotion_extractor.get_emotion_profile_per_post(
        clean_text_1, EMO_RESOURCES)
    print(emotion_profile)

    row_dict.update(emotion_profile)
    print(row_dict)
    results_df = results_df.append(row_dict, ignore_index=True)

results_df.to_csv(
    r"E:\Projects\Emotion_detection_gihan\finbert_experiments\evaluations\twitter_emotion_evaluations.csv"
)

コード例 #4

ファイルを表示

    with open('models/emotions/intensifier_vocab.pkl', 'rb') as f:
        INTENSIFIER_MAP = pickle.load(f)
    with open('models/emotions/negation_vocab.pkl', 'rb') as f:
        NEGATION_MAP = pickle.load(f)
    with open('models/clinical_info/physical.pkl', 'rb') as f:
        PHYSICAL = pickle.load(f)

    EMO_RESOURCES = {'EMOTIONS': EMOTION_MAP,
                     'NEGATION': NEGATION_MAP,
                     'INTENSIFIERS': INTENSIFIER_MAP,
                     'PHYSICAL': PHYSICAL}

    return EMO_RESOURCES


if __name__ == '__main__':
    EMO_RESOURCES = load_emotion_dictionaries()

    text_1 = 'still i feel very sad about the unexpected incident. hopefully the pain will be less, and i am grateful for..'
    clean_text_1 = text_utils.clean_text(text_1)
    emotion_profile = emotion_extractor.get_emotion_profile_per_post(clean_text_1, EMO_RESOURCES)
    clinical_info = clinical_info_extractor.get_physical_sym_profile(clean_text_1, EMO_RESOURCES)
    keyphrases = keyphrase_extractor.analyze_keyphrases(clean_text_1)

    print(keyphrases)
    print(clinical_info)
    print(emotion_profile)