Esempio n. 1
0
def get_wordcloud_with_project(project_id=None):
    """
    获取词云
    :return:
    """
    # 字体文件路径
    font = CONF.TTF_PATH + 'msyh.ttc'

    # 源数据处理(模块名称)
    str_list = get_model_data_with_project(project_id)

    # 分词
    text = ""
    for item in str_list:
        text += item + " "

    # 生成词云
    pil_img = WordCloud(
        font_path=font,
        width=800,
        height=300,
        background_color="white",
        prefer_horizontal=0.6,
        collocations=False).generate(text=text).to_image()

    # base64输出
    img = io.BytesIO()
    pil_img.save(img, "PNG")
    img.seek(0)
    img_base64 = base64.b64encode(img.getvalue()).decode()

    return img_base64
Esempio n. 2
0
def get_wordcloud(text):
    pil_img = WordCloud(stopwords=stopwords).generate(text=text).to_image()
    img = io.BytesIO()
    pil_img.save(img, "PNG")
    img.seek(0)
    img_b64 = base64.b64encode(img.getvalue()).decode()
    return img_b64
Esempio n. 3
0
def get_wordcloud(text):
    #text = text.decode("utf-8")
    if langue == 'zh-CN':
        pil_img = WordCloud(
            font_path=font_path,
            background_color="white",
            max_words=2000,
            max_font_size=100,
            random_state=42,
            width=1000,
            height=860,
            margin=2,
        ).generate(jieba_processing_txt(text))
    else:
        pil_img = WordCloud(width=1600,
                            height=800,
                            scale=20,
                            background_color='white',
                            mode="RGBA",
                            max_font_size=600).generate(
                                text=text)  #mask = mask,
    img = BytesIO(
    )  #mask = np.array(Image.open(mask).convert('RGB')) #image_colors = ImageColorGenerator(mask)
    plt.figure(figsize=[7, 7])
    pil_img = pil_img.to_image()
    plt.axis("off")  #no axis?
    plt.tight_layout(pad=0)
    img = BytesIO()  #save it to a temporary buffer
    pil_img.save(img, "PNG")  #save this byte to a PNG
    img.seek(0)
    img_64 = base64.b64encode(img.getvalue()).decode('utf-8')
    return img_64
Esempio n. 4
0
def results():
    # "if-the-internet-is-slow-so-local-items" tester
    word_freq = [('hawk', 10), ('apple', 3), ('spoon', 2), ('red', 1),
                 ('mine', 1)]
    list_of_tokens = [
        'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk', 'hawk',
        'hawk', 'apple', 'apple', 'apple', 'spoon', 'spoon', 'red', 'mine'
    ]

    # url = request.form['url']
    # word_freq, list_of_tokens = quick_prime(url)

    text = " ".join(list_of_tokens)

    cloud_PIL = WordCloud(background_color='white').generate(text).to_image()

    # converts cloudPIL from PIL to bytes
    output = BytesIO()
    cloud_PIL.save(output, format='JPEG')

    # converts bytes to base64 encoding minus the "b'"" prexif and "'" suffix
    img = base64.b64encode(output.getvalue())
    img = str(img)[2:-1]

    return render_template('results.html', items=word_freq, img=img)
Esempio n. 5
0
    async def _generate_word_cloud_async(self, msg_id: str, reply_msg, to_chat,
                                         search_chat, user, start: datetime,
                                         end: datetime):
        try:
            import jieba
            from wordcloud import WordCloud
        except ImportError as e:
            print(e)
            return
        words = defaultdict(int)
        count = 0
        initial_msg = reply_msg.text + '\n'
        async for msg in self._client.iter_messages(search_chat,
                                                    from_user=user,
                                                    offset_date=end):
            if start and msg.date < start:
                break
            if msg.text:
                for word in jieba.cut(msg.text):
                    word = word.lower()
                    if not await self.redis.sismember(
                            f'{self.prefix}stop_words', word):
                        words[word] += 1
                # words += [w for w in jieba.cut(msg.text) if not await self.redis.sismember(f'{self.prefix}stop_words', w)]
            # if msg.sticker:
            # words += [a.alt for a in msg.sticker.attributes if isinstance(a, DocumentAttributeSticker)]

            count += 1
            if count >= 1000:
                p = math.floor(math.log(count, 10))
                if count % int(math.pow(10, p)) == 0 and count // 1000:
                    try:
                        await reply_msg.edit(text=initial_msg + '.' *
                                             (count // 1000))
                    except Exception as _:
                        traceback.print_exc()

        wordcloud_msg = None
        try:
            image = WordCloud(
                font_path="simsun.ttf", width=800,
                height=400).generate_from_frequencies(words).to_image()
            stream = BytesIO()
            image.save(stream, 'PNG')
            wordcloud_msg = await self._client.send_message(
                to_chat,
                '词云 for\n{}{}{}'.format(
                    f'{search_chat.title}',
                    f'\n{utils.get_display_name(user)}' if user else '',
                    '\n{}-{}'.format(
                        start.strftime('%Y/%m/%d') if start else 'Join',
                        end.strftime('%Y/%m/%d') if end else 'Now')
                    if start or end else ''),
                reply_to=msg_id,
                file=stream.getvalue())
        except Exception as _:
            traceback.print_exc()
        finally:
            await reply_msg.delete()
Esempio n. 6
0
def wordcloud(text):
    stopwords = set(STOPWORDS)
    wordcloud = WordCloud(width=800,
                          height=800,
                          background_color='white',
                          stopwords=stopwords,
                          min_font_size=10).generate(text).to_image()
    img = BytesIO()
    wordcloud.save(img, "PNG")
    img.seek(0)
    img_b64 = base64.b64encode(img.getvalue()).decode()
    return img_b64
Esempio n. 7
0
def get_word_cloud(text):
    # font = "./SimHei.ttf"
    # pil_img = WordCloud(width=500, height=500, font_path=font).generate(text=text).to_image()

    pil_img = WordCloud(
        width=800, height=300,
        background_color="white").generate(text=text).to_image()
    img = io.BytesIO()
    pil_img.save(img, "PNG")
    img.seek(0)
    img_base64 = base64.b64encode(img.getvalue()).decode()
    return img_base64
Esempio n. 8
0
def get_wordcloud(text):
    """
    Convert text to a word cloud
    """
    pillow_image = WordCloud().generate(text=text).to_image()
    img = io.BytesIO()
    pillow_image.save(img, "PNG")
    img.seek(0)
    # .decode -> returns base64 string instead of base64 byte string
    # .b64encode -> returns base64 byte string
    img_b64 = base64.b64encode(img.getvalue()).decode()
    return img_b64
def wordcloud():
    sentence = wordsentence()
    stopwords = set(STOPWORDS)
    wordcloud = WordCloud(width=800,
                          height=800,
                          background_color='white',
                          stopwords=stopwords,
                          min_font_size=10).generate(sentence)
    wordcloud = wordcloud.to_image()
    img = BytesIO()
    wordcloud.save(img, format='PNG')
    return 'data:image/png;base64,{}'.format(
        base64.b64encode(img.getvalue()).decode())
Esempio n. 10
0
    async def _generate_word_cloud_async(self, chat, user, start, end):
        try:
            import jieba
            from wordcloud import WordCloud
        except ImportError as e:
            print(e)
            return

        chat = await self._get_entity(chat)
        if user is not None:
            user = await self._get_entity(user)
        if start is not None:
            start = parser.parse(start).replace(tzinfo=timezone.utc)
        if end is not None:
            end = parser.parse(end)
        words = []
        async for msg in self._client.iter_messages(chat,
                                                    from_user=user,
                                                    offset_date=end):
            if start and msg.date < start:
                break
            if msg.text:
                print("[{}][{}] {}".format(
                    msg.date,
                    utils.get_display_name(await msg.get_sender()) if
                    user is None else utils.get_display_name(user), msg.text))
                words += [
                    w for w in jieba.cut_for_search(msg.text)
                    if w not in stop_words
                ]
        image = WordCloud(font_path="simsun.ttf", width=800,
                          height=400).generate(' '.join(words)).to_image()
        stream = BytesIO()
        image.save(stream, 'PNG')
        await self._client.send_message(
            'gua_mei_debug',
            '{}{}{}'.format(
                f'{chat.title}',
                f'\n{utils.get_display_name(user)}' if user else '',
                '\n{}-{}'.format(
                    start.strftime('%Y/%m/%d') if start else 'Join',
                    end.strftime('%Y/%m/%d') if end else 'Now')
                if start or end else ''),
            file=stream.getvalue())
Esempio n. 11
0
def main(argv):                 #Given by Dr. Brown on Piazza
    argc = len(argv)

    if 2 > argc:
       print("More arguments, please.")
       return 1

    frequency = WordFrequency()
    for i in range(1, argc):
        frequency.open(argv[i])

    count = 23
    stop_words = ['A', 'An', 'And', 'But', 'That', 'The', 'Which',
                  'a', 'an', 'and', 'but', 'that', 'the', 'which']

    for count in [5, 20, 33, 50]:
        cloud = WordCloud(count, frequency, stop_words)
        cloud.save('cloud-{0}.txt'.format(count))

        cloud = HtmlWordCloud(count, frequency, stop_words)
        cloud.save('cloud-{0}.html'.format(count))
    return 0
Esempio n. 12
0
def index():
    r = requests.get(
        "http://loklak.org/api/search.json?q={0}&count=100".format(
            request.args.get('q')))
    data = r.json()
    text = " "
    for value in data["statuses"]:
        value = re.sub(r"@", " ", value["text"])
        value = re.sub(r"\#", " ", value)
        value = re.sub(
            r"https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}\b([-a-zA-Z0-9@:%_\+.~#?&//=]*)",
            " ", value)
        text += value
    cloud = WordCloud(width=800, height=400,
                      stopwords=STOPWORDS).generate(text).to_image()
    #tempFile = NamedTemporaryFile(mode='w+b',suffix='jpg')
    #copyfileobj(cloud,tempFileObj)
    buffer = cStringIO.StringIO()
    cloud.save(buffer, format="PNG")
    #cloud.close()
    #tempFile.seek(0,0)
    return base64.b64encode(buffer.getvalue())
Esempio n. 13
0
async def group_word(context):
    imported_1 = False
    if len(context.parameter) >= 1:
        imported_1 = True
    if not imported:
        try:
            await context.edit("支持库 `jieba` 未安装...\n正在尝试自动安装...")
            await execute(f'{executable} -m pip install jieba')
            await sleep(10)
            result = await execute(f'{executable} -m pip show jieba')
            if len(result) > 0:
                await context.edit('支持库 `jieba` 安装成功...\n正在尝试自动重启...')
                await context.client.disconnect()
            else:
                await context.edit(f"自动安装失败..请尝试手动安装 `{executable} -m pip install jieba` 随后,请重启 PagerMaid-Modify 。")
                return
        except:
            return
    if not imported_ and imported_1:
        try:
            await context.edit("支持库 `paddlepaddle-tiny` 未安装...\n正在尝试自动安装...")
            await execute(f'{executable} -m pip install paddlepaddle-tiny')
            await sleep(10)
            result = await execute(f'{executable} -m pip show paddlepaddle-tiny')
            if len(result) > 0 and not 'WARNING' in result:
                await context.edit('支持库 `paddlepaddle-tiny` 安装成功...\n正在尝试自动重启...')
                await context.client.disconnect()
            else:
                await context.edit(f"自动安装失败,可能是系统不支持..\nAI 分词不可用,切换到基础分词。\n"
                                   f"您可以尝试手动安装 `{executable} -m pip install paddlepaddle-tiny` 。")
                await sleep(4)
        except:
            return
    try:
        await context.edit('正在生成中。。。')
    except:
        return
    if not exists("plugins/groupword"):
        makedirs("plugins/groupword")
    if not exists("plugins/groupword/wqy-microhei.ttc"):
        await context.edit('正在拉取中文字体文件。。。(等待时间请评估你的服务器)')
        r = get('https://cdn.jsdelivr.net/gh/anthonyfok/fonts-wqy-microhei/wqy-microhei.ttc')
        with open("plugins/groupword/wqy-microhei.ttc", "wb") as code:
            code.write(r.content)
    words = defaultdict(int)
    count = 0
    try:
        if imported_ and imported_1:
            try:
                jieba.enable_paddle()
            except:
                imported_1 = False
        async for msg in context.client.iter_messages(context.chat, limit=500):
            if msg.id == context.id:
                continue
            if msg.text and not msg.text.startswith('/') and not msg.text.startswith('-') and not '//' in msg.text:
                try:
                    if imported_ and imported_1:
                        for word in jieba.cut(msg.text.translate(punctuation), use_paddle=True):
                            word = word.lower()
                            words[word] += 1
                    else:
                        for word in jieba.cut(msg.text.translate(punctuation)):
                            word = word.lower()
                            words[word] += 1
                    count += 1
                except:
                    pass
    except:
        if count == 0:
            try:
                await context.edit('您已被 TG 官方限制。')
                return
            except:
                return
    try:
        image = WordCloud(font_path="plugins/groupword/wqy-microhei.ttc", width=800,
                          height=400).generate_from_frequencies(
            words).to_image()
        stream = BytesIO()
        image.save(stream, 'PNG')
    except:
        await context.edit('词云生成失败。')
        return
    try:
        await context.client.send_message(context.chat, f'对最近的 {count} 条消息进行了分析。', file=stream.getvalue())
        await context.delete()
    except:
        return