Ejemplo n.º 1
0
joined = fdf1.merge(pdf2, left_on="Country", right_on="Cname")
joined.show()

# conversion demo
print(fdf1.to_panda_dataframe())
print
print(joined.to_panda_dataframe())
print
joined.release()

# miscellaneous
print "all count: ", fdf1.count()
print  # all column counts
print "min(age): ", fdf1.min("Age")
print
print "max(age): ", fdf1.max("Age")
print
print "sum(age): ", fdf1.sum("Age")
print
print "avg(age): ", fdf1.avg("Age")
print
print "std(age): ", fdf1.std("Age")
print
print "count(age): ", fdf1.count("Age")
print
print(fdf1.describe())
print

# describe demo
data = {
    'one': [10, 12, 13, 15],
Ejemplo n.º 2
0
fdf1.merge(fdf3, left_on="Country", right_on="Cname").show() # with defaults

# note: frovedis doesn't support multiple key joining at this moment.
# thus below call would cause an exception at frovedis server
#fdf1.merge(fdf3, left_on=["Country","Country"], 
#           right_on=["Cname","Cname"], how='outer', join_type='hash').show()

# operation chaining: join -> sort -> select -> show
print ("* chaining: merge two tables, sort by Age, and select Age, Ename and Country")
fdf1.merge(fdf3, left_on="Country", right_on="Cname") \
    .sort("Age")[["Age", "Ename", "Country"]].show()

# column statistics
print ("* column statistics")
print ("min(Age): {}".format(fdf1.min("Age")))
print ("max(Age): {}".format(fdf1.max("Age")))
print ("sum(Age): {}".format(fdf1.sum("Age")))
print ("avg(Age): {}".format(fdf1.avg("Age")))
print ("std(Age): {}".format(fdf1.std("Age")))
print ("count(Age): {}".format(fdf1.count("Age")))
print ("describe: ")
print (fdf1.describe())
print ("\n")

# merging with panda dataframe
print ("* merge with pandas table")
pdf2.rename(columns={'Country' : 'Cname'},inplace=True)
joined = fdf1.merge(pdf2, left_on="Country", right_on="Cname")
joined.show()

# conversion