Python split_sentences 예제들, kss.split_sentences Python 예제들

예제 #1

0

파일 보기

def get_sentences(corpus, save=False, save_path="sentences.txt"):
    sentences = kss.split_sentences(corpus)

    if not save:
        return kss.split_sentences(corpus)

    save_iter_data(save_path, sentences)

예제 #2

0

파일 보기

def preprocessing(script):
    lines = script.split('\n')
    filtered_lines = list()
    for line in lines:
        condition_1 = (line != '')
        condition_2 = (line.startswith('✔') or line.startswith('▶')
                       or line.startswith('📌') or line.startswith('#')
                       or line.startswith('📡') or line.startswith('http')
                       or line.startswith('-')
                       or line.startswith('[')) == False
        # condition_3 = line.startswith('○브리핑 전문○')
        # check actual briefing part
        # if condition_3:
        #     is_briefing = True
        if condition_1 and condition_2:
            sents = kss.split_sentences(line)
            for sent in sents:
                words = sent.split(' ')
                word_list = list()
                for word in words:
                    if word.find('○') == -1 and word.find('●') == -1:
                        word_list.append(word)
                sent = ' '.join(word_list)
                sent = sent.strip()
                if sent != '' and sent != '브리핑 전문':
                    filtered_lines.append(sent)

    return filtered_lines

예제 #3

0

파일 보기

파일: ext.py 프로젝트: SOMJANG/canrevan

def _tokenize_sentences_worker(input_file: str, output_file: str,
                               min_len: int, max_len: int,
                               split_sent: bool = True):
    with open(input_file, 'r', encoding='utf-8') as src, \
            open(output_file, 'w', encoding='utf-8') as dst:
        total_lines = ''
        for line in src:
            if not line.strip():
                # Write the rest sentences.
                if not split_sent and len(total_lines.strip()) > min_len:
                    dst.write(total_lines.strip() + '\n')
                total_lines = ''
                continue

            for s in kss.split_sentences(line):
                s = s.strip()

                if split_sent:
                    if len(s) > min_len and len(s) < max_len:
                        dst.write(s + '\n')
                else:
                    if len(total_lines) + len(s) > max_len:
                        dst.write(total_lines.strip() + '\n')
                        total_lines = ''
                    total_lines += s + ' '

예제 #4

0

파일 보기

    def guess_summary(self, aspect, document, rel_words):
        neighbors = [{
            'entity': aspect,
            'reasoning': f'aspect [[{aspect}]] is in the text',
            'relation_weight': float('inf')
        }]

        for rel_word in rel_words:
            neighbors.append({
                'entity': rel_word,
                'reasoning': f'Related word [[{rel_word}]] is in the text',
                'relation_weight': float('inf')
            })

        picked_sents, reasonings = [], []
        for sent in kss.split_sentences(document):
            sent_morphs = okt.morphs(sent)
            for neighbor in neighbors:
                if neighbor['entity'] in sent_morphs:
                    if sent not in picked_sents:
                        picked_sents.append(sent)
                    if neighbor['reasoning'] not in reasonings:
                        reasonings.append(neighbor['reasoning'])

        if len(picked_sents) > 0:
            return {
                'aspect_summary': ' '.join(picked_sents),
                'reasonings': reasonings
            }
        else:
            return None

예제 #5

0

파일 보기

파일: keywords.py 프로젝트: CSID-DGU/2020-1-CECD3-Cesco-4

 def text2sentences(self, text):
     self.sentences = kss.split_sentences(text)
     for idx in range(0, len(self.sentences)):
         if len(self.sentences[idx]) <= 10:
             self.sentences[idx-1] += (' ' + self.sentences[idx])
             self.sentences[idx] = ''
     return self.sentences

예제 #6

0

파일 보기

파일: 2_kss.py 프로젝트: didw/crawling_naver_news

def run_kss(in_path, out_dir):
    nlen = get_nlen(in_path)
    pbar = tqdm(total=nlen)
    out_idx = 0
    cnt_sent = 0
    with open(in_path) as f_in:
        out_path = os.path.join(out_dir, f"kss_{out_idx}.txt")
        f_out = open(out_path, 'w')
        for _ in range(nlen):
            line = f_in.readline()
            pbar.update(1)
            sentences = kss.split_sentences(line)
            if is_bad_paragraph(sentences):
                continue
            for sent in sentences:
                if is_other_sentence(sent):
                    continue
                if is_end_sentence(sent):
                    break
                sent = remove_sentence(sent)
                f_out.write(f"{sent}\n")
                cnt_sent += 1
            f_out.write("\n")
            if cnt_sent > 1000000:
                f_out.close()
                out_idx += 1
                out_path = os.path.join(out_dir, f"kss_{out_idx}.txt")
                f_out = open(out_path, 'w')
                cnt_sent = 0
    pbar.close()
    f_out.close()

예제 #7

0

파일 보기

파일: question_finetuning.py 프로젝트: nitz0211/KorGPT2Tutorial

    def load_data(self):
        question_file = open(self.file_path, 'r', encoding='utf-8')
        question_lines = question_file.readlines()
        question_file.close()

        for line in question_lines:
            line = line.strip()
            context = line.split('\t')[0]
            answer = line.split('\t')[1]
            question = line.split('\t')[2]

            total_tokens = []

            qa_tokens = ['<answer>'] + tokenizer.tokenize(answer) + [
                '</answer>'
            ] + ['<question>'
                 ] + tokenizer.tokenize(question) + ['</question>']

            for single_line in kss.split_sentences(context):
                tokenized_single_line = [
                    '<s>'
                ] + tokenizer.tokenize(single_line) + ['</s>']
                if len(total_tokens) + len(qa_tokens) + len(
                        tokenized_single_line) < 1024:
                    total_tokens += tokenized_single_line
                else:
                    break
            total_tokens += qa_tokens
            padded_total_tokens = total_tokens + ['<pad>'] * (
                1024 - len(total_tokens))
            self.data.append(
                torch.tensor(
                    tokenizer.convert_tokens_to_ids(
                        padded_total_tokens)).unsqueeze(0))

예제 #8

0

파일 보기

    def get_df(self, *colnames, by_sentence_textColname=None):
        '''

        :param colnames: 행이름 str
        :param by_sentence_textColname: 문장 분해 대상 text 행이름
        :return: DataFrame
        '''
        df_documents = self.df.loc[:, list(colnames)]
        if by_sentence_textColname:
            df_sentences = pd.DataFrame()
            nrows = df_documents.shape[0]
            for i in tqdm(range(nrows), "loader : Getting Sentences "):
                row = df_documents.iloc[i]
                text = row[by_sentence_textColname]
                if len(text) > 0:
                    text = cleanse_text(text)
                    sentences = kss.split_sentences(
                        text)  #텍스트 길이 300 넘는게 허다하게 나옴... 체크 필요함
                    for s in sentences:
                        s = cleanse_sentence(s)
                        if len(s) > 0:
                            row_temp = row.copy()
                            row_temp[by_sentence_textColname] = s
                            df_sentences = df_sentences.append(row_temp)
                else:
                    continue
            print(
                f"loader : Getting DataFrame Done {nrows} Documents to {df_sentences.shape[0]} Sentences"
            )
            return df_sentences
        else:
            return df_documents

예제 #9

0

파일 보기

파일: answer_finetuning.py 프로젝트: nitz0211/KorGPT2Tutorial

    def load_data(self):
        answer_file = open(self.file_path, 'r', encoding='utf-8')
        answer_lines = answer_file.readlines()
        answer_file.close()

        for line in answer_lines:
            line = line.strip()
            context = line.split('\t')[0]
            answers = line.split('\t')[1].split('|')
            total_tokens = []
            answer_tokens = ['<answer>']
            for i in range(0, len(answers) - 2):
                answer_tokens += tokenizer.tokenize(answers[i]) + ['<sep>']
            answer_tokens += tokenizer.tokenize(answers[-2]) + ['</answer>']
            for single_line in kss.split_sentences(context):
                tokenized_single_line = [
                    '<s>'
                ] + tokenizer.tokenize(single_line) + ['</s>']
                if len(total_tokens) + len(answer_tokens) + len(
                        tokenized_single_line) < 1024:
                    total_tokens += tokenized_single_line
                else:
                    break
            total_tokens += answer_tokens
            padded_total_tokens = total_tokens + ['<pad>'] * (
                1024 - len(total_tokens))
            self.data.append(
                torch.tensor(
                    tokenizer.convert_tokens_to_ids(
                        padded_total_tokens)).unsqueeze(0))

예제 #10

0

파일 보기

def write_subtitle(aligned_vtt_dict):
    for idx in aligned_vtt_dict:
        subtitle = SubtitleWrapper()
        vtts = aligned_vtt_dict[idx]
        for vtt in vtts:
            subtitle.write_caption(vtt['start'], vtt['end'],
                                   kss.split_sentences(vtt['text']))
        subtitle.save_caption(os.path.join(args.vtt_path, idx))
    print('[INFO] aligned subtitles saved.')

예제 #11

0

파일 보기

def sentAnalyze(user_input):
    try:
        adamsURL = "http://api.adams.ai/datamixiApi/tms"
        accessKey = "5071738647222560661"
        text = ' '.join(str(s) for s in kss.split_sentences(
            user_input[:800]))  # AdamsAI가 877자 까지밖에 처리하지 못함
        analysisCode = 'om'
        language = 'kor'

        params = dict(key=accessKey,
                      query=text,
                      analysis=analysisCode,
                      lang=language)

        resp = requests.get(url=adamsURL, params=params)
        data = resp.json()

        sentiword = []
        for i in range(len(data['return_object']['sentence'])):
            if 'sa' in data['return_object']['sentence'][i] and \
    data['return_object']['sentence'][i]['sa']['sentiword'] and \
    data['return_object']['sentence'][i]['sa']['polarity']<0:
                print(data['return_object']['sentence'][i]['text'], '\n', '\t',
                      data['return_object']['sentence'][i]['sa'])
                sentiword.append(
                    (data['return_object']['sentence'][i]['sa']['score'],
                     list(
                         filter(
                             lambda w: '/' not in w, data['return_object']
                             ['sentence'][i]['sa']['sentiword']))))

        dup = set()
        output = []
        for x, y in sorted(sentiword):
            for w in y:
                if not w in dup and 'ㄹ' not in w and '다' in w:
                    dup.add(w)
                    output.append((x, w))
        print(output)

        if not output:
            print("empty")
            return None
        else:
            sent2print = []
            for i in range(len(output)):
                if len(output) >= 5:
                    sent2print = [
                        output[0][1], output[1][1], output[2][1], output[3][1]
                    ]
                else:
                    sent2print.append(output[i][1])
            return ', '.join(str(s) for s in sent2print)

    except KeyError:
        return None

예제 #12

0

파일 보기

파일: qa_generation.py 프로젝트: nitz0211/KorGPT2Tutorial

def context_tokenizer(text, tokenizer):
    sent_list = kss.split_sentences(text)
    tokens = []
    for sent in sent_list:
        tokenized_sentence = tokenizer.tokenize(sent)
        if len(tokens) + len(tokenized_sentence) < 912:
            tokens += ['<s>'] + tokenized_sentence + ['</s>']
        else:
            break
    return tokens

예제 #13

0

파일 보기

 def parse_json_recursively(json_object, target_key):
     if type(json_object) is dict and json_object:
         for key in json_object:
             if key in target_key:
                 for sent in kss.split_sentences(json_object[key]):
                     output_set.add(str(sent))
             parse_json_recursively(json_object[key], target_key)
     elif type(json_object) is list and json_object:
         for item in json_object:
             parse_json_recursively(item, target_key)

예제 #14

0

파일 보기

파일: categorize.py 프로젝트: rhgo1749/refind-web_history

def mecabFreqToSentenceList(text):
    # 리스트에 넣기전 자료형 초기화
    sentence_token = []
    allnoun = []
    # 문서를 문장 단위로 분리해 sentence_token 리스트에 저장
    sentence_token = kss.split_sentences(text)
    # 토큰화한 문장에서 명사만 뽑아 다시 join한 결과를 각각 리스트에 저장
    for i in range(0, len(sentence_token)):
        #명사 분류
        allnoun.append(" ".join(mecab.nouns(sentence_token[i])))
    return allnoun

예제 #15

0

파일 보기

def _tokenize_sentences_worker(input_file: str, output_file: str,
                               min_len: int):
    with open(input_file, 'r', encoding='utf-8') as src, \
            open(output_file, 'w', encoding='utf-8') as dst:
        for line in src:
            for s in kss.split_sentences(line):
                s = s.strip()
                if len(s) < min_len:
                    continue

                dst.write(s + '\n')

예제 #16

0

파일 보기

파일: test_kss.py 프로젝트: poveteen/kss

    def test_quote_misalignment(self):
        # testcase from https://github.com/likejazz/korean-sentence-splitter/issues/8
        text = """부부 싸움 규칙 가운데 ‘돈 히트 언더 더 벨트’(Don’t hit under the belt)가 있다. 권투할 때 벨트 아래를 치면 반칙이듯이, 상대가 너무 아파할 만한 것을 건드리면 회복하기 어렵다. 그 부분은 사람마다 다르다."""
        splitted = kss.split_sentences(text)
        self.assertEqual(len(splitted), 3)

        text = """안녕하십니까? 삼성전자가 11월 13일 삼성전자 서초사옥 다목적홀 5층에서 진행되는 2013 S'데이 멤버십 블루 강연회 "Challenge BLUE, 박찬호&이동우의 삶과 도전" 멤버십 블루 고객 480명을 초청한다.강연회는 삼성전자 멤버십 블루 회원들을 위해 마련된 고객 혜택 행사로 한국인 최초의 메이저리거 박찬호와 시각장애 개그맨 이동우를 초청, 그들의 삶 속에서 펼쳐진 다양한 도전기를 들을 수 있도록 마련했다."""
        splitted = kss.split_sentences(text)
        self.assertEqual(len(splitted), 3)

        text = """삼성전자가 11월 13일 삼성전자 서초사옥 다목적홀 5층에서 진행되는 2013 S'데이 멤버십 블루 강연회 "Challenge BLUE, 박찬호&이동우의 삶과 도전" 멤버십 블루 고객 480명을 초청한다.강연회는 삼성전자 멤버십 블루 회원들을 위해 마련된 고객 혜택 행사로 한국인 최초의 메이저리거 박찬호와 시각장애 개그맨 이동우를 초청, 그들의 삶 속에서 펼쳐진 다양한 도전기를 들을 수 있도록 마련했다."""
        splitted = kss.split_sentences(text)
        self.assertEqual(len(splitted), 2)

        text = """삼성전자가 11월 13일 삼성전자 서초사옥 다목적홀 5층에서 진행되는 2013 S"데이 멤버십 블루 강연회 "Challenge BLUE, 박찬호&이동우의 삶과 도전" 멤버십 블루 고객 480명을 초청한다.강연회는 삼성전자 멤버십 블루 회원들을 위해 마련된 고객 혜택 행사로 한국인 최초의 메이저리거 박찬호와 시각장애 개그맨 이동우를 초청, 그들의 삶 속에서 펼쳐진 다양한 도전기를 들을 수 있도록 마련했다."""
        splitted = kss.split_sentences(text)
        self.assertEqual(len(splitted), 2)

        text = """삼성전자가 11월 13일 삼성전자 서초사옥 다목적홀 5층에서 진행되는 2013 S"데'이 멤버십 블루 강연회 "Challenge BLUE, 박찬호&이동우의 삶과 도전" 멤버십 블루 고객 480명을 초청한다.강연회는 삼성전자 멤버십 블루 회원들을 위해 마련된 고객 혜택 행사로 한국인 최초의 메이저리거 박찬호와 시각장애 개그맨 이동우를 초청, 그들의 삶 속에서 펼쳐진 다양한 도전기를 들을 수 있도록 마련했'다."""
        splitted = kss.split_sentences(text)
        self.assertEqual(len(splitted), 2)

예제 #17

0

파일 보기

파일: analyze_news.py 프로젝트: canasta/collect_news

def extract_nouns_v2(news: str) -> dict:
    """Extract nouns from news.

    :param news: contents of news.
    :return: dict(). Extracted keyword and its count. {keyword: count, }
    """
    mecab = Mecab()

    news_lines = kss.split_sentences(news)

    nouns = []

    for line in news_lines:
        nn = 0
        pos = 0

        for token in mecab.pos(line):
            pos = pos + line[pos:].find(token[0])

            if token[1] == 'NNG':
                # 일반 명사
                if nn > 0:
                    if line[pos - 1] == ' ':
                        nouns.append(
                            (f'{nouns[-1][0]} {token[0]}', nouns[-1][1]))
                        nouns.append(token[0])
                    else:
                        nouns[-1] = (f'{nouns[-1][0]}{token[0]}', nouns[-1][1])

                    nn += 1
                else:
                    nn = 1
                    nouns.append(token)

            elif token[1] == 'NNP':
                # 고유 명사
                if nn > 0:
                    if line[pos - 1] == ' ':
                        nouns.append((f'{nouns[-1][0]} {token[0]}', 'NNP'))
                        nouns.append(token[0])
                    else:
                        nouns[-1] = (f'{nouns[-1][0]}{token[0]}', 'NNP')

                    nn += 2
                else:
                    nn = 2
                    nouns.append(token)
            else:
                nn = 0

            pos += len(token[0])

    return dict(Counter(nouns))

예제 #18

0

파일 보기

파일: summary_generation.py 프로젝트: hunkim/KorGPT2Tutorial

def encoding(category, text):
    sent_list = kss.split_sentences(text)
    tokens = []
    for sent in sent_list:
        tokenized_sentence = tokenizer.tokenize(text)
        if len(tokens) + len(tokenized_sentence) < 912:
            tokens += ['<s>'] + tokenized_sentence + ['</s>']
        else:
            break
    tokens += [category]
    tokens += ['<title>']
    return torch.tensor(tokenizer.convert_tokens_to_ids(tokens)).unsqueeze(0)

예제 #19

0

파일 보기

def check_duplicate(data):
    temp = list()
    for raw in data:
        r = kss.split_sentences(raw)
        temp.append(r)

    print(temp)
    print(type(temp))
    print(len(temp))
    print(type(temp[0]))
    print(len(temp[0]))
    
    return set(sum(temp, []))

예제 #20

0

파일 보기

파일: MakeText.py 프로젝트: HongJuHe/summary

    def makeSentence(self, originaltext):
        #받아온 text 문장으로 나누기

        textList = []
        text = ''

        for sent in kss.split_sentences(originaltext):
            textList.append(sent)

        for i in textList:
            text = text + i + '. '#문장으로 구분하기 위해 '. '추가

        return text

예제 #21

0

파일 보기

def ensembleSummarize(text):
    sentences = kss.split_sentences(text)
    n = len(sentences)
    s = [0] * n
    for idx in range(len(sentences)):
        if sentences[idx] in summarize(text):
            s[idx] += 1
        if sentences[idx] in summarizeLexRank(text):
            s[idx] += 1
        if sentences[idx] in summarizeTextRank(text).split("\n"):
            s[idx] += 1
    i = s.index(max(s))
    return sentences[i]

예제 #22

0

파일 보기

def makeLabel(question, label):
    ques, lab = [], []
    for q in question:  # type(q) -> str
        tmp = clean_text('\n'.join(kss.split_sentences(q)))
        sents = clean_punc(tmp, punct, punct_mapping)
        if sents is not None:
            ques.append(sents)
            lab.append(label)

    ques_col = pd.DataFrame(ques, columns=['sentences'])
    lab_col = pd.DataFrame(lab, columns=['label'])
    result = pd.concat([ques_col, lab_col], axis=1)

    return result

예제 #23

0

파일 보기

파일: Diary2.py 프로젝트: thisishoon/AIDiary

def predict(paragraph):
    with session.as_default():
        with session.graph.as_default():
            emotions = [0, 0, 0, 0, 0, 0]
            sentences = kss.split_sentences(paragraph)
            from NLP import Model
            for sentence in sentences:
                output = Model.put(token, sentence, mc)  #문장 토크나이즈
                emotion = Model.out(output,
                                    model)  #1, model2, model3)#, model4,
                #model5,model6,model7)#,model8,model9,model10) #토큰화된 문장을 감정으로 출력
                emotions[emotion[0]] = emotions[
                    emotion[0]] + 1  #감정에 따라 감정 리스트에 각 감정점수를 플러스
            return emotions

예제 #24

0

파일 보기

파일: test.py 프로젝트: KOOKDONGHUN/study

def replace_str(data):
    data = data.replace('\n', ' ')
    data = data.replace('//', '')
    data = data.replace('ㅠ', '')
    data = data.replace('ㅋ', '')

    data = kss.split_sentences(data)
    # print(f'!! data  : {data}')

    data = ' '.join(data[:3])
    data = data.replace('  ', ' ')
    data = data.replace('  ', ' ')

    return data

예제 #25

0

파일 보기

 def chat(self, sent='0'):
     self.tok_path
     tok = SentencepieceTokenizer(self.tok_path, num_best=0, alpha=0)
     sent_tokens = tok(sent)
     with torch.no_grad():
         while 1:
             q = input('user > ').strip()
             if q == 'quit':
                 break
             q_tok = tok(q)
             a = ''
             a_tok = []
             timeout = time.time() + 60
             while 1:
                 input_ids = torch.LongTensor([self.vocab[U_TKN]] +
                                              self.vocab[q_tok] +
                                              self.vocab[EOS, SENT] +
                                              self.vocab[sent_tokens] +
                                              self.vocab[EOS, S_TKN] +
                                              self.vocab[a_tok]).unsqueeze(
                                                  dim=0)
                 pred = self(input_ids)
                 gen = self.vocab.to_tokens(
                     torch.argmax(pred,
                                  dim=-1).squeeze().numpy().tolist())[-1]
                 if gen == EOS:
                     break
                 a += gen.replace('▁', ' ')
                 a_tok = tok(a)
                 if time.time() > timeout:
                     break
             answer_list = kss.split_sentences(a)[1:-2]
             Simsimi_answer = "".join(answer_list)
             sentence_list = Simsimi_answer.split('.')
             sentences = []
             for s in sentence_list:
                 word_list = s.split()  #리스트
                 # sentences=[]
                 for word in word_list:
                     if word.endswith('*님이') == True:
                         word_list[word_list.index(word)] = word.replace(
                             word, "상담자님이")
                         # print(word)
                     else:
                         pass
                 sentence = " ".join(word_list)
                 sentences.append(sentence)
                 # print(sentence)
             print("Simsimi > ", ".".join(sentences))

예제 #26

0

파일 보기

파일: process.py 프로젝트: bbq12340/sentimentAnalysis

def process_reviews(reviews):
    processed_reviews = []
    for review in reviews:
        review = repeat_normalize(review,
                                  num_repeats=2)  # normalize repeats by two
        review = spacing(review)  # space by words
        review = ('.').join(split_sentences(review))  # split by sentence
        try:
            review = spell_checker.check(review).as_dict()['checked']
        except:
            print('pass')
            pass
        print(review)
        processed_reviews.append(review)
        time.sleep(0.5)
    return processed_reviews

예제 #27

0

파일 보기

파일: doc_summarize.py 프로젝트: Maphnew/studio

def _tokenize_for_summarize(text):

    # Sentence separator
    splitted_array = kss.split_sentences(text)

    # Tokenizer
    tokenized_table = _tokenizer_kor(texts=splitted_array,
                                     pos_extraction=['Noun'])
    len_doc = len(tokenized_table)

    # Tokenizer afterprocess
    tokenized_sentence = ([
        ' '.join(tokenized_table[i]) for i in range(len_doc)
    ])

    return tokenized_sentence, splitted_array, len_doc

예제 #28

0

파일 보기

def summarization(txt):
    segment = []
    for sent in kss.split_sentences(txt):  # 품사 분석을 통한 분리
        if "." in sent[0:-1]:  # 문장 내 불필요한 온점은 삭제
            sent = sent.replace(".", "")
        if sent[-1] in [".", "?", "!"]:  # 마침표 유무에 따라 구분
            sent = sent
        else:
            sent = sent + "."  # 분리된 문장 끝에 온점 부여
        segment.append(sent)
    if len(segment) <= 10:
        result = " ".join(segment).replace("\n", "")  # 문장 내 enter 제거
    else:
        # \n 처리
        # 요약된 문장 수 조정
        seg_str = " ".join(segment).replace("\n", "")
        result = summarize(seg_str, ratio=0.15, word_count=80)
    return result

예제 #29

0

파일 보기

파일: postagging_for_wordvec.py 프로젝트: YunjinPark/modu_project

def read_token(file_name):
    okt = Okt()  # 품사 분석기
    result = []
    with open(file_name, encoding='UTF8') as fread:
        while True:
            line = fread.readline()  #한 줄씩 읽음.
            if not line: break  #모두 읽으면 while문 종료.
            #line = okt.morphs(line)   #형태소 분석

            for sent in kss.split_sentences(line):  # 문단 -> 문장
                tokenlist = okt.pos(sent, stem=True,
                                    norm=True)  # 형태소 분석 후 단어 품사 태깅
                tmp = []
                for word in tokenlist:
                    if word[1] in ["Noun", "Verb", "Adjective"]:
                        tmp.append((word[0]))  # 해당 단어를 저장함`
                result.append(tmp)
    return '\n'.join([' '.join(r) for r in result])

예제 #30

0

파일 보기

def result():  # 주소함수
    # 0. initialize
    now = str(time.time())
    dloads_src = '/static/img/after_' + now + '.jpg'
    time_info = datetime.datetime.now().strftime('%Y%m%d%H%M%S')
    sentence = request.form['sentence']
    sentence = re.sub('\n', ' ', sentence)
    sentence = re.sub('\r', ' ', sentence)
    ###############################
    # 1. Get input text from user #
    ###############################
    input_text = kss.split_sentences(sentence)
    print(input_text)

    ######################
    # 2. Extract keyword #
    ######################
    keyword = get_keyword(input_text)
    print("추출된 키워드 > ", keyword)

    #######################
    # # 3. Image crawling #
    #######################
    if len(keyword) < 3:
        pass  # 예외처리
    else:
        image_link = get_crawlingImage(keyword, "KRTSOhiLDjFo8VpVkekS",
                                       "PnJAftBpaI", time_info, now)
        # image_link = get_crawlingImage(keyword, "KRTSOhiLDjFo8VpVkekS", "PnJAftBpaI", time_info)

        ##############################
        # 4. Predict sentiment label #
        ##############################
        sentiment_label = get_sentimentLabel(input_text, time_info)

        #####################
        # 5. Style transfer #
        #####################
        get_finalImage(image_link, sentiment_label, filename='after_' + now)

    return render_template('result.html',
                           sentence=sentence,
                           after_img='img/after_' + now + '.jpg',
                           dloads_src=dloads_src)