def optword(self): query = Word.select() for i in query: name = i.word words = Word.select().where(Word.word == name) if len(words) > 1: for j in words[1:]: j.re2 = 'd' j.save()
def news_with_one_company(): positive_words = Word.select().where(Word.is_positive == True) negative_words = Word.select().where(Word.is_positive == False) df = pd.read_csv("./../../data/news/news_with_one_company.csv") df[["sent_score", "word_count", "words", "parsed_sentence"]] = df.apply(sentence_info_pd, axis=1, args=(list(positive_words), list(negative_words))) df.to_csv( "./../../data/news/news_with_one_company_and_sentiment_analysis.csv", index=False)
def crawl(pixiv_id, password, cron): words = [w for w in Word.select().order_by(Word.id)] crawler = PixivCrawler() crawler.login(pixiv_id, password) for word in words: data = {"word": word, "stored_at": date.today()} try: r = SearchResult.get(word=data["word"], stored_at=data["stored_at"]) if not cron: click.echo( f"【登録済】{word.text} - safe: {r.num_of_safe}件 / r18: {r.num_of_r18}件" ) except SearchResult.DoesNotExist: for mode in ("safe", "r18"): crawler.search(word.text, mode) if mode == "safe": data["safe"] = crawler.get_search_count() else: data["r18"] = crawler.get_search_count() time.sleep(3) SearchResult.create( word=data["word"], stored_at=data["stored_at"], num_of_safe=data["safe"], num_of_r18=data["r18"], ) if not cron: click.echo( f"{word.text} - safe: {data['safe']}件 / r18: {data['r18']}件" )
def statistic(self): query_l = Lrc.select() print(len(query_l)) query_w = Word.select().where(Word.word != 'd') print(len(query_w)) query_r = Rhyme.select() print(len(query_r))
def get_next_word(used=None): try: # Fetch a random word that hasn't been used much subquery = Word.select(fn.Avg(Word.plays)) result = (Word.select().order_by( fn.Random()).where(Word.plays <= subquery)) if used: result = result.where((Word.text << used) == False) result = result[0] # Update its play count query = Word.update(plays=Word.plays + 1).where(Word.id == result.id) query.execute() return result except: db.rollback() raise
def get_next_word(used=None): try: # Fetch a random word that hasn't been used much subquery = Word.select(fn.Avg(Word.plays)) result = (Word.select() .order_by(fn.Random()) .where(Word.plays <= subquery)) if used: result = result.where((Word.text << used) == False) result = result[0] # Update its play count query = Word.update(plays=Word.plays + 1).where(Word.id == result.id) query.execute() return result except: db.rollback() raise
def _get_words(self, integ): query = Word.select().where(Word.rhyme == integ) if len(query) < 2: return r = [] for i in query: r.append((i.word, i.re3+1)) return sorted(r, key=lambda x:x[1], reverse=True)
def remove_word(c): word_id = c.data.split(" ")[1] word = Word.get(Word.id == word_id) word.delete_instance() words = Word.select(Word) keyboard = types.InlineKeyboardMarkup() if len(words) == 0: bot.send_message(cid(c), s.empty_words) for w in words: callback_button = types.InlineKeyboardButton(text=w.word, callback_data=str("remove {}".format(w.id))) keyboard.add(callback_button) bot.edit_message_reply_markup(chat_id=cid(c), message_id=c.message.message_id, reply_markup=keyboard)
def get_most_common(self, num, f='n'): query = Word.select().where(Word.re3 > num) res = [] for i in query: words = pseg.cut(i.word) for word, flag in words: # 筛选某类词性词汇 if flag == f and word not in self.exclude: res.append([u'{}'.format(i.word), i.re3]) # res.sort(key=lambda x: x[1], reverse=True) # for i in res: # print(i) return res
def sentence_info(news: News, positive=[], negative=[]): if len(positive) == 0 and len(negative) == 0: positive = Word.select(Word.word).where(Word.is_positive == 1) negative = Word.select(Word.word).where(Word.is_positive == 0) positive = sorted(positive, key=lambda x: len(x.word.split(' ')), reverse=True) negative = sorted(negative, key=lambda x: len(x.word.split(' ')), reverse=True) words = [] # splitted = sentence.split('.') positives = 0 negatives = 0 result = {"sent_score": 0, "word_count": 0} sentence = str(news.body).replace('.', ' ').replace(',', ' ').replace( '\n', '').replace('\t', '').lstrip(' ').rstrip(' ').replace('"', '').replace(' ', ' ') sentence = sentence.split(' ') for i in range(len(sentence)): c = 0 while sentence[i] != morpher.parse(sentence[i])[0].normal_form: sentence[i] = morpher.parse(sentence[i])[0].normal_form c += 1 if c > 5: break result["word_count"] = len(sentence) result["sentence"] = ' '.join(sentence) if news.title is not None and news.title.replace(" ", "") != "": title = str(news.title) title = title.replace('.', ' ').replace(',', ' ').replace('\n', '').replace( '\t', '').lstrip(' ').rstrip(' ').replace( '"', '').replace(' ', ' ') result["word_count"] += len(title) title = title.split(' ') for i in range(len(title)): c = 0 while title[i] != morpher.parse(title[i])[0].normal_form: title[i] = morpher.parse(title[i])[0].normal_form c += 1 if c > 5: break # sentence = '.'.join(splitted[1:]) result["sentence"] = ' '.join(title) + ' ' + result["sentence"] # title for p in positive: words_to_delete = [] spl = p.word.split(' ') l = len(spl) _p = {} for i in range(0, len(title)): tr = True for j in range(0, l): if i + j < len(title): if spl[j] != title[i + j]: tr = False break else: words_to_delete.append(i + j) tr = True else: tr = False break if tr: _p["word"] = p.word _p["score"] = 1 if "count" not in _p: _p["count"] = 0 _p["count"] += 10 positives += 10 new_title = [] index = 0 for _ in title: if index not in words_to_delete: new_title.append(title[index]) index += 1 title = new_title words_to_delete = [] if "word" in _p: words.append(_p) for p in negative: words_to_delete = [] spl = p.word.split(' ') l = len(spl) _p = {} for i in range(0, len(title)): tr = True for j in range(0, l): if i + j < len(title): if spl[j] != title[i + j]: tr = False break else: words_to_delete.append(i + j) tr = True else: tr = False break if tr: _p["word"] = p.word _p["score"] = -1 if "count" not in _p: _p["count"] = 0 _p["count"] += 10 negatives += 10 new_title = [] index = 0 for s in title: if index not in words_to_delete: new_title.append(title[index]) index += 1 title = new_title words_to_delete = [] if "word" in _p: words.append(_p) # print(sentence) # print(positive) # Text for p in positive: words_to_delete = [] spl = p.word.split(' ') # print(spl) l = len(spl) _p = dict() for i in range(0, len(sentence)): tr = True for j in range(0, l): if i + j < len(sentence): if spl[j] != sentence[i + j]: tr = False break else: words_to_delete.append(i + j) tr = True else: tr = False break if tr: _p["word"] = p.word _p["score"] = 1 if "count" not in _p: _p["count"] = 0 _p["count"] += 1 positives += 1 new_sentence = [] index = 0 for s in sentence: if index not in words_to_delete: new_sentence.append(sentence[index]) index += 1 sentence = new_sentence words_to_delete = [] if "word" in _p: words.append(_p) for p in negative: words_to_delete = [] spl = p.word.split(' ') l = len(spl) _p = dict() for i in range(0, len(sentence)): tr = True for j in range(0, l): if i + j < len(sentence): if spl[j] != sentence[i + j]: tr = False break else: words_to_delete.append(i + j) tr = True else: tr = False break if tr: _p["word"] = p.word _p["score"] = -1 if "count" not in _p: _p["count"] = 0 _p["count"] += 1 negatives += 1 new_title = [] index = 0 for s in sentence: if index not in words_to_delete: new_title.append(sentence[index]) index += 1 sentence = new_title words_to_delete = [] if "word" in _p: words.append(_p) if positives - negatives != 0: result["sent_score"] = (positives - negatives) / (positives + negatives) result["words"] = words return result