def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() from pyspark.sql import functions SQLTestUtils.assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) SQLTestUtils.assert_close([math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()) SQLTestUtils.assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()) SQLTestUtils.assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df["a"])).collect()) SQLTestUtils.assert_close([math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()) SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()) SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()) SQLTestUtils.assert_close( [math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot(df.a, df.b)).collect(), ) SQLTestUtils.assert_close( [math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot("a", "b")).collect(), ) SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()) SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect())
def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() from pyspark.sql import functions import math def get_values(l): return [j[0] for j in l] def assert_close(a, b): c = get_values(b) diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)] return sum(diff) == len(a) assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) assert_close([math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()) assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()) assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df['a'])).collect()) assert_close([math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()) assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()) assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()) assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot(df.a, df.b)).collect()) assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot("a", u"b")).collect()) assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()) assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect())
def test_math_functions(self): df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF() from pyspark.sql import functions import math def get_values(l): return [j[0] for j in l] def assert_close(a, b): c = get_values(b) diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)] return sum(diff) == len(a) assert_close([math.cos(i) for i in range(10)], df.select(functions.cos(df.a)).collect()) assert_close([math.cos(i) for i in range(10)], df.select(functions.cos("a")).collect()) assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df.a)).collect()) assert_close([math.sin(i) for i in range(10)], df.select(functions.sin(df['a'])).collect()) assert_close([math.pow(i, 2 * i) for i in range(10)], df.select(functions.pow(df.a, df.b)).collect()) assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2)).collect()) assert_close([math.pow(i, 2) for i in range(10)], df.select(functions.pow(df.a, 2.0)).collect()) assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot(df.a, df.b)).collect()) assert_close([math.hypot(i, 2 * i) for i in range(10)], df.select(functions.hypot("a", u"b")).collect()) assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot("a", 2)).collect()) assert_close([math.hypot(i, 2) for i in range(10)], df.select(functions.hypot(df.a, 2)).collect())
df = df.withColumn("x", F.sin(df["theta"]) * F.cos(df["phi"])).withColumn( "y", F.sin(df["theta"]) * F.sin(df["phi"])).withColumn( "z", F.cos(df["theta"])).drop("theta", "phi") df = df.withColumn("xc", F.sin(df["theta_c"]) * F.cos(df["phi_c"])).withColumn( "yc", F.sin(df["theta_c"]) * F.sin(df["phi_c"])).withColumn( "zc", F.cos(df["theta_c"])).drop("theta_c", "phi_c") df = df.withColumn("rr", F.hypot(df.x - df.xc, F.hypot(df.y - df.yc, df.z - df.zc))).drop( "x", "y", "z", "xc", "yc", "zc") df = df.withColumn("rx", F.degrees(df.rr) * 60) df = df.withColumn("angdist", F.degrees(2 * F.asin(df.rr / 2)) * 60) df.cache().count() maxr = df.select(F.max(df.angdist)).take(1)[0][0] p = df_histplot(df, "angdist") xlabel(r"radius [arcmin]") text(0.8, 0.8, r"$\theta_u={:.2f}^\prime$".format(maxr), transform=gca().transAxes)
def tocolumns(df, expr): import pyspark.sql.functions as fcns if isinstance(expr, histbook.expr.Const): return fcns.lit(expr.value) elif isinstance(expr, (histbook.expr.Name, histbook.expr.Predicate)): return df[expr.value] elif isinstance(expr, histbook.expr.Call): if expr.fcn == "abs" or expr.fcn == "fabs": return fcns.abs(tocolumns(df, expr.args[0])) elif expr.fcn == "max" or expr.fcn == "fmax": return fcns.greatest(*[tocolumns(df, x) for x in expr.args]) elif expr.fcn == "min" or expr.fcn == "fmin": return fcns.least(*[tocolumns(df, x) for x in expr.args]) elif expr.fcn == "arccos": return fcns.acos(tocolumns(df, expr.args[0])) elif expr.fcn == "arccosh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "arcsin": return fcns.asin(tocolumns(df, expr.args[0])) elif expr.fcn == "arcsinh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "arctan2": return fcns.atan2(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "arctan": return fcns.atan(tocolumns(df, expr.args[0])) elif expr.fcn == "arctanh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "ceil": return fcns.ceil(tocolumns(df, expr.args[0])) elif expr.fcn == "copysign": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "cos": return fcns.cos(tocolumns(df, expr.args[0])) elif expr.fcn == "cosh": return fcns.cosh(tocolumns(df, expr.args[0])) elif expr.fcn == "rad2deg": return tocolumns(df, expr.args[0]) * (180.0 / math.pi) elif expr.fcn == "erfc": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "erf": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "exp": return fcns.exp(tocolumns(df, expr.args[0])) elif expr.fcn == "expm1": return fcns.expm1(tocolumns(df, expr.args[0])) elif expr.fcn == "factorial": return fcns.factorial(tocolumns(df, expr.args[0])) elif expr.fcn == "floor": return fcns.floor(tocolumns(df, expr.args[0])) elif expr.fcn == "fmod": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "gamma": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "hypot": return fcns.hypot(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "isinf": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "isnan": return fcns.isnan(tocolumns(df, expr.args[0])) elif expr.fcn == "lgamma": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "log10": return fcns.log10(tocolumns(df, expr.args[0])) elif expr.fcn == "log1p": return fcns.log1p(tocolumns(df, expr.args[0])) elif expr.fcn == "log": return fcns.log(tocolumns(df, expr.args[0])) elif expr.fcn == "pow": return fcns.pow(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "deg2rad": return tocolumns(df, expr.args[0]) * (math.pi / 180.0) elif expr.fcn == "sinh": return fcns.sinh(tocolumns(df, expr.args[0])) elif expr.fcn == "sin": return fcns.sin(tocolumns(df, expr.args[0])) elif expr.fcn == "sqrt": return fcns.sqrt(tocolumns(df, expr.args[0])) elif expr.fcn == "tanh": return fcns.tanh(tocolumns(df, expr.args[0])) elif expr.fcn == "tan": return fcns.tan(tocolumns(df, expr.args[0])) elif expr.fcn == "trunc": raise NotImplementedError( expr.fcn) # FIXME (fcns.trunc is for dates) elif expr.fcn == "xor": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "conjugate": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "exp2": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "heaviside": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "isfinite": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "left_shift" and isinstance(expr.args[1], histbook.expr.Const): return fcns.shiftLeft(tocolumns(df, expr.args[0]), expr.args[1].value) elif expr.fcn == "log2": return fcns.log2(tocolumns(df, expr.args[0])) elif expr.fcn == "logaddexp2": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "logaddexp": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "mod" or expr.fcn == "fmod": return tocolumns(df, expr.args[0]) % tocolumns(df, expr.args[1]) elif expr.fcn == "right_shift" and isinstance(expr.args[1], histbook.expr.Const): return fcns.shiftRight(tocolumns(df, expr.args[0]), expr.args[1].value) elif expr.fcn == "rint": return fcns.rint(tocolumns(df, expr.args[0])) elif expr.fcn == "sign": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "where": return fcns.when(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])).otherwise( tocolumns(df, expr.args[2])) elif expr.fcn == "numpy.equal": return tocolumns(df, expr.args[0]) == tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.not_equal": return tocolumns(df, expr.args[0]) != tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.less": return tocolumns(df, expr.args[0]) < tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.less_equal": return tocolumns(df, expr.args[0]) <= tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.isin": return tocolumns(df, expr.args[0]) in tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_not": return ~tocolumns(df, expr.args[0]) elif expr.fcn == "numpy.add": return tocolumns(df, expr.args[0]) + tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.subtract": return tocolumns(df, expr.args[0]) - tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.multiply": return tocolumns(df, expr.args[0]) * tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.true_divide": return tocolumns(df, expr.args[0]) / tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_or": return tocolumns(df, expr.args[0]) | tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_and": return tocolumns(df, expr.args[0]) & tocolumns(df, expr.args[1]) else: raise NotImplementedError(expr.fcn) else: raise AssertionError(expr)
# In[61]: from pyspark.sql import functions as F Q11 = "IxxPSF_i" Q22 = "IyyPSF_i" Q12 = "IxyPSF_i" # pre-compute denominator df_shear = df.withColumn("denom", F.col(Q11) + F.col(Q22)) #read and img parts of shear df_shear = df_shear.withColumn("R_E", (F.col(Q11) - F.col(Q22)) / F.col('denom')).withColumn( "I_E", (2 * F.col(Q12)) / F.col('denom')) # convert to amplitude and phase df_shear = df_shear.withColumn("amp_E", F.hypot(F.col("R_E"), F.col("I_E"))).withColumn( "phase_E", F.atan2(F.col("R_E"), F.col("I_E"))) df_shear.select("R_E", "I_E", "amp_E", "phase_E").show(5) # In[63]: var = "amp_E" var_sys = "avg(" + var + ")" df_map = df_shear.groupBy("ipix").mean(var) df_map.describe([var_sys]).show() dfp = df_map.toPandas() map_e = np.zeros(hp.nside2npix(nside)) map_e[dfp['ipix'].values] = dfp[var_sys].values hp.gnomview(map_e, rot=[55, -29.8],
fn=method+"_nside{}.parquet".format(nside) print("reading: "+fn) df=spark.read.parquet(fn) df=df.withColumn("dx",F.sin((df["theta"]+df["theta_c"])/2)*(df["phi"]-df["phi_c"])/Rsq) df=df.withColumn("dy",(df["theta"]-df["theta_c"])/Rsq) #df=df.drop("theta","phi","theta_c","phi_c") #df=df.withColumn("R",F.hypot(df["dx"],df["dy"])) df=df.withColumn("x",F.sin(df["theta"])*F.cos(df["phi"])).withColumn("y",F.sin(df["theta"])*F.sin(df["phi"])).withColumn("z",F.cos(df["theta"])).drop("theta","phi") df=df.withColumn("xc",F.sin(df["theta_c"])*F.cos(df["phi_c"])).withColumn("yc",F.sin(df["theta_c"])*F.sin(df["phi_c"])).withColumn("zc",F.cos(df["theta_c"])).drop("theta_c","phi_c") df=df.withColumn("R",F.hypot(df.x-df.xc,F.hypot(df.y-df.yc,df.z-df.zc))).drop("x","y","z","xc","yc","zc") #df=df.withColumn("R",F.degrees(df['Rad'])*60/Rsq) #df=df.withColumn("RR",F.hypot(df.dx,df.dy)) #angular distance in arcmin #df=df.withColumn("angdist",F.degrees(2*F.asin(df.rr/2))*60) df.cache().count() print("Rsq={} arcmin".format(Rsq)) #maxr=df.select(F.max(df.R)).take(1)[0][0] #print("max radius={} arcmin".format(maxr)) #2d x,y,m=df_histplot2(df,"dx","dy",bounds=[[-1.5,1.5],[-1.5,1.5]],Nbin1=200,Nbin2=200)