def trans(rdd: RDD): words = rdd.collect() global res if len(words) == 0: return res = "" for word in words: res = res + word + " " wordcloud = WordCloud(font_path="font.ttf", mask=mask_pic, background_color='white').generate(res) wordcloud.to_file(r'wordCloud-' + datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S') + '.png') # 保存生成的词云图片 image = wordcloud.to_image() image.show()
def __call__(self, head: RDD): return head.collect()
def test_null_in_rdd(self): jrdd = self.sc._jvm.PythonUtils.generateRDDWithNull(self.sc._jsc) rdd = RDD(jrdd, self.sc, UTF8Deserializer()) self.assertEqual([u"a", None, u"b"], rdd.collect()) rdd = RDD(jrdd, self.sc, NoOpSerializer()) self.assertEqual([b"a", None, b"b"], rdd.collect())
def to_pandas(rdd: RDD): rdd_result: RDD = rdd.collect() pdf_from_rdd: DataFrame = pd.DataFrame(rdd_result) del rdd_result return pdf_from_rdd
def show_all(rdd: RDD): print(type(rdd), rdd.collect())