# merging with panda dataframe pdf2.rename(columns={'Country': 'Cname'}, inplace=True) joined = fdf1.merge(pdf2, left_on="Country", right_on="Cname") joined.show() # conversion demo print(fdf1.to_panda_dataframe()) print print(joined.to_panda_dataframe()) print joined.release() # miscellaneous print "all count: ", fdf1.count() print # all column counts print "min(age): ", fdf1.min("Age") print print "max(age): ", fdf1.max("Age") print print "sum(age): ", fdf1.sum("Age") print print "avg(age): ", fdf1.avg("Age") print print "std(age): ", fdf1.std("Age") print print "count(age): ", fdf1.count("Age") print print(fdf1.describe()) print # describe demo
print ("* merge (join) two tables") fdf1.merge(fdf3, left_on="Country", right_on="Cname").show() # with defaults # note: frovedis doesn't support multiple key joining at this moment. # thus below call would cause an exception at frovedis server #fdf1.merge(fdf3, left_on=["Country","Country"], # right_on=["Cname","Cname"], how='outer', join_type='hash').show() # operation chaining: join -> sort -> select -> show print ("* chaining: merge two tables, sort by Age, and select Age, Ename and Country") fdf1.merge(fdf3, left_on="Country", right_on="Cname") \ .sort("Age")[["Age", "Ename", "Country"]].show() # column statistics print ("* column statistics") print ("min(Age): {}".format(fdf1.min("Age"))) print ("max(Age): {}".format(fdf1.max("Age"))) print ("sum(Age): {}".format(fdf1.sum("Age"))) print ("avg(Age): {}".format(fdf1.avg("Age"))) print ("std(Age): {}".format(fdf1.std("Age"))) print ("count(Age): {}".format(fdf1.count("Age"))) print ("describe: ") print (fdf1.describe()) print ("\n") # merging with panda dataframe print ("* merge with pandas table") pdf2.rename(columns={'Country' : 'Cname'},inplace=True) joined = fdf1.merge(pdf2, left_on="Country", right_on="Cname") joined.show()