コード例 #1
0
ファイル: test_functions.py プロジェクト: yliou/spark
    def test_math_functions(self):
        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
        from pyspark.sql import functions

        SQLTestUtils.assert_close([math.cos(i) for i in range(10)],
                                  df.select(functions.cos(df.a)).collect())
        SQLTestUtils.assert_close([math.cos(i) for i in range(10)],
                                  df.select(functions.cos("a")).collect())
        SQLTestUtils.assert_close([math.sin(i) for i in range(10)],
                                  df.select(functions.sin(df.a)).collect())
        SQLTestUtils.assert_close([math.sin(i) for i in range(10)],
                                  df.select(functions.sin(df["a"])).collect())
        SQLTestUtils.assert_close([math.pow(i, 2 * i) for i in range(10)],
                                  df.select(functions.pow(df.a,
                                                          df.b)).collect())
        SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)],
                                  df.select(functions.pow(df.a, 2)).collect())
        SQLTestUtils.assert_close([math.pow(i, 2) for i in range(10)],
                                  df.select(functions.pow(df.a,
                                                          2.0)).collect())
        SQLTestUtils.assert_close(
            [math.hypot(i, 2 * i) for i in range(10)],
            df.select(functions.hypot(df.a, df.b)).collect(),
        )
        SQLTestUtils.assert_close(
            [math.hypot(i, 2 * i) for i in range(10)],
            df.select(functions.hypot("a", "b")).collect(),
        )
        SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)],
                                  df.select(functions.hypot("a", 2)).collect())
        SQLTestUtils.assert_close([math.hypot(i, 2) for i in range(10)],
                                  df.select(functions.hypot(df.a,
                                                            2)).collect())
コード例 #2
0
ファイル: test_functions.py プロジェクト: Spencerzsp/spark-1
    def test_math_functions(self):
        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
        from pyspark.sql import functions
        import math

        def get_values(l):
            return [j[0] for j in l]

        def assert_close(a, b):
            c = get_values(b)
            diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)]
            return sum(diff) == len(a)

        assert_close([math.cos(i) for i in range(10)],
                     df.select(functions.cos(df.a)).collect())
        assert_close([math.cos(i) for i in range(10)],
                     df.select(functions.cos("a")).collect())
        assert_close([math.sin(i) for i in range(10)],
                     df.select(functions.sin(df.a)).collect())
        assert_close([math.sin(i) for i in range(10)],
                     df.select(functions.sin(df['a'])).collect())
        assert_close([math.pow(i, 2 * i) for i in range(10)],
                     df.select(functions.pow(df.a, df.b)).collect())
        assert_close([math.pow(i, 2) for i in range(10)],
                     df.select(functions.pow(df.a, 2)).collect())
        assert_close([math.pow(i, 2) for i in range(10)],
                     df.select(functions.pow(df.a, 2.0)).collect())
        assert_close([math.hypot(i, 2 * i) for i in range(10)],
                     df.select(functions.hypot(df.a, df.b)).collect())
        assert_close([math.hypot(i, 2 * i) for i in range(10)],
                     df.select(functions.hypot("a", u"b")).collect())
        assert_close([math.hypot(i, 2) for i in range(10)],
                     df.select(functions.hypot("a", 2)).collect())
        assert_close([math.hypot(i, 2) for i in range(10)],
                     df.select(functions.hypot(df.a, 2)).collect())
コード例 #3
0
ファイル: test_functions.py プロジェクト: apache/spark
    def test_math_functions(self):
        df = self.sc.parallelize([Row(a=i, b=2 * i) for i in range(10)]).toDF()
        from pyspark.sql import functions
        import math

        def get_values(l):
            return [j[0] for j in l]

        def assert_close(a, b):
            c = get_values(b)
            diff = [abs(v - c[k]) < 1e-6 for k, v in enumerate(a)]
            return sum(diff) == len(a)
        assert_close([math.cos(i) for i in range(10)],
                     df.select(functions.cos(df.a)).collect())
        assert_close([math.cos(i) for i in range(10)],
                     df.select(functions.cos("a")).collect())
        assert_close([math.sin(i) for i in range(10)],
                     df.select(functions.sin(df.a)).collect())
        assert_close([math.sin(i) for i in range(10)],
                     df.select(functions.sin(df['a'])).collect())
        assert_close([math.pow(i, 2 * i) for i in range(10)],
                     df.select(functions.pow(df.a, df.b)).collect())
        assert_close([math.pow(i, 2) for i in range(10)],
                     df.select(functions.pow(df.a, 2)).collect())
        assert_close([math.pow(i, 2) for i in range(10)],
                     df.select(functions.pow(df.a, 2.0)).collect())
        assert_close([math.hypot(i, 2 * i) for i in range(10)],
                     df.select(functions.hypot(df.a, df.b)).collect())
        assert_close([math.hypot(i, 2 * i) for i in range(10)],
                     df.select(functions.hypot("a", u"b")).collect())
        assert_close([math.hypot(i, 2) for i in range(10)],
                     df.select(functions.hypot("a", 2)).collect())
        assert_close([math.hypot(i, 2) for i in range(10)],
                     df.select(functions.hypot(df.a, 2)).collect())
コード例 #4
0
ファイル: anahpsize.py プロジェクト: faisalrahman36/SparkCorr
df = df.withColumn("x",
                   F.sin(df["theta"]) * F.cos(df["phi"])).withColumn(
                       "y",
                       F.sin(df["theta"]) * F.sin(df["phi"])).withColumn(
                           "z", F.cos(df["theta"])).drop("theta", "phi")

df = df.withColumn("xc",
                   F.sin(df["theta_c"]) * F.cos(df["phi_c"])).withColumn(
                       "yc",
                       F.sin(df["theta_c"]) * F.sin(df["phi_c"])).withColumn(
                           "zc",
                           F.cos(df["theta_c"])).drop("theta_c", "phi_c")

df = df.withColumn("rr",
                   F.hypot(df.x - df.xc,
                           F.hypot(df.y - df.yc, df.z - df.zc))).drop(
                               "x", "y", "z", "xc", "yc", "zc")

df = df.withColumn("rx", F.degrees(df.rr) * 60)
df = df.withColumn("angdist", F.degrees(2 * F.asin(df.rr / 2)) * 60)

df.cache().count()

maxr = df.select(F.max(df.angdist)).take(1)[0][0]

p = df_histplot(df, "angdist")
xlabel(r"radius [arcmin]")
text(0.8,
     0.8,
     r"$\theta_u={:.2f}^\prime$".format(maxr),
     transform=gca().transAxes)
コード例 #5
0
ファイル: spark.py プロジェクト: weidenka/histbook
def tocolumns(df, expr):
    import pyspark.sql.functions as fcns

    if isinstance(expr, histbook.expr.Const):
        return fcns.lit(expr.value)

    elif isinstance(expr, (histbook.expr.Name, histbook.expr.Predicate)):
        return df[expr.value]

    elif isinstance(expr, histbook.expr.Call):
        if expr.fcn == "abs" or expr.fcn == "fabs":
            return fcns.abs(tocolumns(df, expr.args[0]))
        elif expr.fcn == "max" or expr.fcn == "fmax":
            return fcns.greatest(*[tocolumns(df, x) for x in expr.args])
        elif expr.fcn == "min" or expr.fcn == "fmin":
            return fcns.least(*[tocolumns(df, x) for x in expr.args])
        elif expr.fcn == "arccos":
            return fcns.acos(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arccosh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "arcsin":
            return fcns.asin(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arcsinh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "arctan2":
            return fcns.atan2(tocolumns(df, expr.args[0]),
                              tocolumns(df, expr.args[1]))
        elif expr.fcn == "arctan":
            return fcns.atan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "arctanh":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "ceil":
            return fcns.ceil(tocolumns(df, expr.args[0]))
        elif expr.fcn == "copysign":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "cos":
            return fcns.cos(tocolumns(df, expr.args[0]))
        elif expr.fcn == "cosh":
            return fcns.cosh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "rad2deg":
            return tocolumns(df, expr.args[0]) * (180.0 / math.pi)
        elif expr.fcn == "erfc":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "erf":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "exp":
            return fcns.exp(tocolumns(df, expr.args[0]))
        elif expr.fcn == "expm1":
            return fcns.expm1(tocolumns(df, expr.args[0]))
        elif expr.fcn == "factorial":
            return fcns.factorial(tocolumns(df, expr.args[0]))
        elif expr.fcn == "floor":
            return fcns.floor(tocolumns(df, expr.args[0]))
        elif expr.fcn == "fmod":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "gamma":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "hypot":
            return fcns.hypot(tocolumns(df, expr.args[0]),
                              tocolumns(df, expr.args[1]))
        elif expr.fcn == "isinf":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "isnan":
            return fcns.isnan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "lgamma":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "log10":
            return fcns.log10(tocolumns(df, expr.args[0]))
        elif expr.fcn == "log1p":
            return fcns.log1p(tocolumns(df, expr.args[0]))
        elif expr.fcn == "log":
            return fcns.log(tocolumns(df, expr.args[0]))
        elif expr.fcn == "pow":
            return fcns.pow(tocolumns(df, expr.args[0]),
                            tocolumns(df, expr.args[1]))
        elif expr.fcn == "deg2rad":
            return tocolumns(df, expr.args[0]) * (math.pi / 180.0)
        elif expr.fcn == "sinh":
            return fcns.sinh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sin":
            return fcns.sin(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sqrt":
            return fcns.sqrt(tocolumns(df, expr.args[0]))
        elif expr.fcn == "tanh":
            return fcns.tanh(tocolumns(df, expr.args[0]))
        elif expr.fcn == "tan":
            return fcns.tan(tocolumns(df, expr.args[0]))
        elif expr.fcn == "trunc":
            raise NotImplementedError(
                expr.fcn)  # FIXME (fcns.trunc is for dates)
        elif expr.fcn == "xor":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "conjugate":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "exp2":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "heaviside":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "isfinite":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "left_shift" and isinstance(expr.args[1],
                                                     histbook.expr.Const):
            return fcns.shiftLeft(tocolumns(df, expr.args[0]),
                                  expr.args[1].value)
        elif expr.fcn == "log2":
            return fcns.log2(tocolumns(df, expr.args[0]))
        elif expr.fcn == "logaddexp2":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "logaddexp":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "mod" or expr.fcn == "fmod":
            return tocolumns(df, expr.args[0]) % tocolumns(df, expr.args[1])
        elif expr.fcn == "right_shift" and isinstance(expr.args[1],
                                                      histbook.expr.Const):
            return fcns.shiftRight(tocolumns(df, expr.args[0]),
                                   expr.args[1].value)
        elif expr.fcn == "rint":
            return fcns.rint(tocolumns(df, expr.args[0]))
        elif expr.fcn == "sign":
            raise NotImplementedError(expr.fcn)  # FIXME
        elif expr.fcn == "where":
            return fcns.when(tocolumns(df, expr.args[0]),
                             tocolumns(df, expr.args[1])).otherwise(
                                 tocolumns(df, expr.args[2]))
        elif expr.fcn == "numpy.equal":
            return tocolumns(df, expr.args[0]) == tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.not_equal":
            return tocolumns(df, expr.args[0]) != tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.less":
            return tocolumns(df, expr.args[0]) < tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.less_equal":
            return tocolumns(df, expr.args[0]) <= tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.isin":
            return tocolumns(df, expr.args[0]) in tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_not":
            return ~tocolumns(df, expr.args[0])
        elif expr.fcn == "numpy.add":
            return tocolumns(df, expr.args[0]) + tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.subtract":
            return tocolumns(df, expr.args[0]) - tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.multiply":
            return tocolumns(df, expr.args[0]) * tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.true_divide":
            return tocolumns(df, expr.args[0]) / tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_or":
            return tocolumns(df, expr.args[0]) | tocolumns(df, expr.args[1])
        elif expr.fcn == "numpy.logical_and":
            return tocolumns(df, expr.args[0]) & tocolumns(df, expr.args[1])
        else:
            raise NotImplementedError(expr.fcn)

    else:
        raise AssertionError(expr)
コード例 #6
0
ファイル: Syst3x2pt.py プロジェクト: plaszczy/spark-fits-apps
# In[61]:

from pyspark.sql import functions as F
Q11 = "IxxPSF_i"
Q22 = "IyyPSF_i"
Q12 = "IxyPSF_i"

# pre-compute denominator
df_shear = df.withColumn("denom", F.col(Q11) + F.col(Q22))
#read and img parts of shear
df_shear = df_shear.withColumn("R_E", (F.col(Q11) - F.col(Q22)) /
                               F.col('denom')).withColumn(
                                   "I_E", (2 * F.col(Q12)) / F.col('denom'))
# convert to amplitude and phase
df_shear = df_shear.withColumn("amp_E",
                               F.hypot(F.col("R_E"), F.col("I_E"))).withColumn(
                                   "phase_E",
                                   F.atan2(F.col("R_E"), F.col("I_E")))
df_shear.select("R_E", "I_E", "amp_E", "phase_E").show(5)

# In[63]:

var = "amp_E"
var_sys = "avg(" + var + ")"
df_map = df_shear.groupBy("ipix").mean(var)
df_map.describe([var_sys]).show()
dfp = df_map.toPandas()
map_e = np.zeros(hp.nside2npix(nside))
map_e[dfp['ipix'].values] = dfp[var_sys].values
hp.gnomview(map_e,
            rot=[55, -29.8],
コード例 #7
0
fn=method+"_nside{}.parquet".format(nside)
print("reading: "+fn)

df=spark.read.parquet(fn)

df=df.withColumn("dx",F.sin((df["theta"]+df["theta_c"])/2)*(df["phi"]-df["phi_c"])/Rsq)

df=df.withColumn("dy",(df["theta"]-df["theta_c"])/Rsq)

#df=df.drop("theta","phi","theta_c","phi_c")
#df=df.withColumn("R",F.hypot(df["dx"],df["dy"]))

df=df.withColumn("x",F.sin(df["theta"])*F.cos(df["phi"])).withColumn("y",F.sin(df["theta"])*F.sin(df["phi"])).withColumn("z",F.cos(df["theta"])).drop("theta","phi")
df=df.withColumn("xc",F.sin(df["theta_c"])*F.cos(df["phi_c"])).withColumn("yc",F.sin(df["theta_c"])*F.sin(df["phi_c"])).withColumn("zc",F.cos(df["theta_c"])).drop("theta_c","phi_c")
df=df.withColumn("R",F.hypot(df.x-df.xc,F.hypot(df.y-df.yc,df.z-df.zc))).drop("x","y","z","xc","yc","zc")
#df=df.withColumn("R",F.degrees(df['Rad'])*60/Rsq)

#df=df.withColumn("RR",F.hypot(df.dx,df.dy))

#angular distance in arcmin
#df=df.withColumn("angdist",F.degrees(2*F.asin(df.rr/2))*60)

df.cache().count()

print("Rsq={} arcmin".format(Rsq))
#maxr=df.select(F.max(df.R)).take(1)[0][0]
#print("max radius={} arcmin".format(maxr))

#2d
x,y,m=df_histplot2(df,"dx","dy",bounds=[[-1.5,1.5],[-1.5,1.5]],Nbin1=200,Nbin2=200)