Example #1
0
 def optword(self):
     query = Word.select()
     for i in query:
         name = i.word
         words = Word.select().where(Word.word == name)
         if len(words) > 1:
             for j in words[1:]:
                 j.re2 = 'd'
                 j.save()
Example #2
0
def news_with_one_company():
    positive_words = Word.select().where(Word.is_positive == True)
    negative_words = Word.select().where(Word.is_positive == False)
    df = pd.read_csv("./../../data/news/news_with_one_company.csv")
    df[["sent_score", "word_count", "words",
        "parsed_sentence"]] = df.apply(sentence_info_pd,
                                       axis=1,
                                       args=(list(positive_words),
                                             list(negative_words)))
    df.to_csv(
        "./../../data/news/news_with_one_company_and_sentiment_analysis.csv",
        index=False)
Example #3
0
def crawl(pixiv_id, password, cron):
    words = [w for w in Word.select().order_by(Word.id)]
    crawler = PixivCrawler()
    crawler.login(pixiv_id, password)
    for word in words:
        data = {"word": word, "stored_at": date.today()}
        try:
            r = SearchResult.get(word=data["word"],
                                 stored_at=data["stored_at"])
            if not cron:
                click.echo(
                    f"【登録済】{word.text} - safe: {r.num_of_safe}件 / r18: {r.num_of_r18}件"
                )
        except SearchResult.DoesNotExist:
            for mode in ("safe", "r18"):
                crawler.search(word.text, mode)
                if mode == "safe":
                    data["safe"] = crawler.get_search_count()
                else:
                    data["r18"] = crawler.get_search_count()
                time.sleep(3)

            SearchResult.create(
                word=data["word"],
                stored_at=data["stored_at"],
                num_of_safe=data["safe"],
                num_of_r18=data["r18"],
            )
            if not cron:
                click.echo(
                    f"{word.text} - safe: {data['safe']}件 / r18: {data['r18']}件"
                )
Example #4
0
    def statistic(self):

        query_l = Lrc.select()
        print(len(query_l))
        query_w = Word.select().where(Word.word != 'd')
        print(len(query_w))
        query_r = Rhyme.select()
        print(len(query_r))
Example #5
0
def get_next_word(used=None):
    try:
        # Fetch a random word that hasn't been used much
        subquery = Word.select(fn.Avg(Word.plays))
        result = (Word.select().order_by(
            fn.Random()).where(Word.plays <= subquery))
        if used:
            result = result.where((Word.text << used) == False)
        result = result[0]

        # Update its play count
        query = Word.update(plays=Word.plays + 1).where(Word.id == result.id)
        query.execute()

        return result
    except:
        db.rollback()
        raise
Example #6
0
def get_next_word(used=None):
    try:
        # Fetch a random word that hasn't been used much
        subquery = Word.select(fn.Avg(Word.plays))
        result = (Word.select()
                    .order_by(fn.Random())
                    .where(Word.plays <= subquery))
        if used:
            result = result.where((Word.text << used) == False)
        result = result[0]

        # Update its play count
        query = Word.update(plays=Word.plays + 1).where(Word.id == result.id)
        query.execute()

        return result
    except:
        db.rollback()
        raise
Example #7
0
    def _get_words(self, integ):

        query = Word.select().where(Word.rhyme == integ)
        if len(query) < 2:
            return

        r = []
        for i in query:
            r.append((i.word, i.re3+1))

        return sorted(r, key=lambda x:x[1], reverse=True)
Example #8
0
def remove_word(c):
	word_id = c.data.split(" ")[1]
	word = Word.get(Word.id == word_id)
	word.delete_instance()
	words = Word.select(Word)
	keyboard = types.InlineKeyboardMarkup()
	if len(words) == 0:
		bot.send_message(cid(c), s.empty_words)
	for w in words:
		callback_button = types.InlineKeyboardButton(text=w.word, callback_data=str("remove {}".format(w.id)))
		keyboard.add(callback_button)
	bot.edit_message_reply_markup(chat_id=cid(c), message_id=c.message.message_id, reply_markup=keyboard)
Example #9
0
    def get_most_common(self, num, f='n'):

        query = Word.select().where(Word.re3 > num)
        res = []
        for i in query:
            words = pseg.cut(i.word)
            for word, flag in words:
                # 筛选某类词性词汇
                if flag == f and word not in self.exclude:
                    res.append([u'{}'.format(i.word), i.re3])
            #
        res.sort(key=lambda x: x[1], reverse=True)
        # for i in res:
        #     print(i)
        return res
Example #10
0
def sentence_info(news: News, positive=[], negative=[]):
    if len(positive) == 0 and len(negative) == 0:
        positive = Word.select(Word.word).where(Word.is_positive == 1)
        negative = Word.select(Word.word).where(Word.is_positive == 0)
        positive = sorted(positive,
                          key=lambda x: len(x.word.split(' ')),
                          reverse=True)
        negative = sorted(negative,
                          key=lambda x: len(x.word.split(' ')),
                          reverse=True)
    words = []
    # splitted = sentence.split('.')
    positives = 0
    negatives = 0
    result = {"sent_score": 0, "word_count": 0}
    sentence = str(news.body).replace('.', ' ').replace(',', ' ').replace(
        '\n',
        '').replace('\t',
                    '').lstrip(' ').rstrip(' ').replace('"',
                                                        '').replace('  ', ' ')
    sentence = sentence.split(' ')
    for i in range(len(sentence)):
        c = 0
        while sentence[i] != morpher.parse(sentence[i])[0].normal_form:
            sentence[i] = morpher.parse(sentence[i])[0].normal_form
            c += 1
            if c > 5:
                break
    result["word_count"] = len(sentence)
    result["sentence"] = ' '.join(sentence)
    if news.title is not None and news.title.replace(" ", "") != "":
        title = str(news.title)
        title = title.replace('.',
                              ' ').replace(',', ' ').replace('\n', '').replace(
                                  '\t', '').lstrip(' ').rstrip(' ').replace(
                                      '"', '').replace('  ', ' ')
        result["word_count"] += len(title)
        title = title.split(' ')
        for i in range(len(title)):
            c = 0
            while title[i] != morpher.parse(title[i])[0].normal_form:
                title[i] = morpher.parse(title[i])[0].normal_form
                c += 1
                if c > 5:
                    break
        # sentence = '.'.join(splitted[1:])
        result["sentence"] = ' '.join(title) + ' ' + result["sentence"]

        # title
        for p in positive:
            words_to_delete = []
            spl = p.word.split(' ')
            l = len(spl)
            _p = {}
            for i in range(0, len(title)):
                tr = True
                for j in range(0, l):
                    if i + j < len(title):
                        if spl[j] != title[i + j]:
                            tr = False
                            break
                        else:
                            words_to_delete.append(i + j)
                            tr = True
                    else:
                        tr = False
                        break
                if tr:
                    _p["word"] = p.word
                    _p["score"] = 1
                    if "count" not in _p:
                        _p["count"] = 0
                    _p["count"] += 10
                    positives += 10
            new_title = []
            index = 0
            for _ in title:
                if index not in words_to_delete:
                    new_title.append(title[index])
                index += 1
            title = new_title
            words_to_delete = []
            if "word" in _p:
                words.append(_p)
        for p in negative:
            words_to_delete = []
            spl = p.word.split(' ')
            l = len(spl)
            _p = {}
            for i in range(0, len(title)):
                tr = True
                for j in range(0, l):
                    if i + j < len(title):
                        if spl[j] != title[i + j]:
                            tr = False
                            break
                        else:
                            words_to_delete.append(i + j)
                            tr = True
                    else:
                        tr = False
                        break
                if tr:
                    _p["word"] = p.word
                    _p["score"] = -1
                    if "count" not in _p:
                        _p["count"] = 0
                    _p["count"] += 10
                    negatives += 10
            new_title = []
            index = 0
            for s in title:
                if index not in words_to_delete:
                    new_title.append(title[index])
                index += 1
            title = new_title
            words_to_delete = []
            if "word" in _p:
                words.append(_p)
    # print(sentence)
    # print(positive)
    # Text
    for p in positive:
        words_to_delete = []
        spl = p.word.split(' ')
        # print(spl)
        l = len(spl)
        _p = dict()
        for i in range(0, len(sentence)):
            tr = True
            for j in range(0, l):
                if i + j < len(sentence):
                    if spl[j] != sentence[i + j]:
                        tr = False
                        break
                    else:
                        words_to_delete.append(i + j)
                        tr = True
                else:
                    tr = False
                    break
            if tr:
                _p["word"] = p.word
                _p["score"] = 1
                if "count" not in _p:
                    _p["count"] = 0
                _p["count"] += 1
                positives += 1
        new_sentence = []
        index = 0
        for s in sentence:
            if index not in words_to_delete:
                new_sentence.append(sentence[index])
            index += 1
        sentence = new_sentence
        words_to_delete = []
        if "word" in _p:
            words.append(_p)
    for p in negative:
        words_to_delete = []
        spl = p.word.split(' ')
        l = len(spl)
        _p = dict()
        for i in range(0, len(sentence)):
            tr = True
            for j in range(0, l):
                if i + j < len(sentence):
                    if spl[j] != sentence[i + j]:
                        tr = False
                        break
                    else:
                        words_to_delete.append(i + j)
                        tr = True
                else:
                    tr = False
                    break
            if tr:
                _p["word"] = p.word
                _p["score"] = -1
                if "count" not in _p:
                    _p["count"] = 0
                _p["count"] += 1
                negatives += 1
        new_title = []
        index = 0
        for s in sentence:
            if index not in words_to_delete:
                new_title.append(sentence[index])
            index += 1
        sentence = new_title
        words_to_delete = []
        if "word" in _p:
            words.append(_p)
    if positives - negatives != 0:
        result["sent_score"] = (positives - negatives) / (positives +
                                                          negatives)
    result["words"] = words
    return result