def distance(long1, lat1, long2, lat2): radius = 6371 diff_lat = radians(lat2 - lat1) diff_long = radians(long2 - long1) a = sin(diff_lat / 2)**2 + cos(lat1) * cos(lat2) * sin(diff_long / 2)**2 c = 2 * atan2(a**0.5, (1 - a)**0.5) return radius * c
def km(lat1, lon1, lat2, lon2): R = 6371 # Radius of the earth in km dLat = deg2rad(lat2 - lat1) # deg2rad below dLon = deg2rad(lon2 - lon1) a = sin(dLat / 2) * sin(dLat / 2) + cos(deg2rad(lat1)) * cos( deg2rad(lat2)) * sin(dLon / 2) * sin(dLon / 2) c = 2 * atan2(sqrt(a), sqrt(1 - a)) d = R * c # Distance in km return d
def complicated_arithmetic_operation(df): theta_1 = df['pickup_longitude'] phi_1 = df['pickup_latitude'] theta_2 = df['dropoff_longitude'] phi_2 = df['dropoff_latitude'] temp = (f.cos(theta_1) * np.pi / 180) * (f.cos(theta_2) * np.pi / 180) * ( f.sin(phi_2 - phi_1) / 2 * np.pi / 180)**2 expression = 2 * f.atan2(f.sqrt(temp), f.sqrt(1 - temp)) df.select(f.mean(expression)).collect()
def complementary_filter(ds, freq: int = 16, accelerometer_x: str = "accelerometer_x", accelerometer_y: str = "accelerometer_y", accelerometer_z: str = "accelerometer_z", gyroscope_x: str = "gyroscope_x", gyroscope_y: str = "gyroscope_y", gyroscope_z: str = "gyroscope_z"): """ Compute complementary filter on gyro and accel data. Args: ds (DataStream ): Non-Windowed/grouped dataframe freq (int): frequency of accel/gryo. Assumption is that frequency is equal for both gyro and accel. accelerometer_x (str): name of the column accelerometer_y (str): name of the column accelerometer_z (str): name of the column gyroscope_x (str): name of the column gyroscope_y (str): name of the column gyroscope_z (str): name of the column """ dt = 1.0 / freq # 1/16.0; M_PI = math.pi; hpf = 0.90; lpf = 0.10; window = Window.partitionBy(ds._data['user']).orderBy(ds._data['timestamp']) data = ds._data.withColumn("thetaX_accel", ((F.atan2(-F.col(accelerometer_z), F.col(accelerometer_y)) * 180 / M_PI)) * lpf) \ .withColumn("roll", (F.lag("thetaX_accel").over(window) + F.col(gyroscope_x) * dt) * hpf + F.col("thetaX_accel")).drop( "thetaX_accel") \ .withColumn("thetaY_accel", ((F.atan2(-F.col(accelerometer_x), F.col(accelerometer_z)) * 180 / M_PI)) * lpf) \ .withColumn("pitch", (F.lag("thetaY_accel").over(window) + F.col(gyroscope_y) * dt) * hpf + F.col("thetaY_accel")).drop( "thetaY_accel") \ .withColumn("thetaZ_accel", ((F.atan2(-F.col(accelerometer_y), F.col(accelerometer_x)) * 180 / M_PI)) * lpf) \ .withColumn("yaw", (F.lag("thetaZ_accel").over(window) + F.col(gyroscope_z) * dt) * hpf + F.col("thetaZ_accel")).drop( "thetaZ_accel") return DataStream(data=data.dropna(), metadata=Metadata())
def coords2elem(df): df = df.withColumn('Omega', Omega_udf(df['a'])) df = df.withColumn('Kappa', Kappa_udf(df['a'])) df = df.withColumn('e_sin_M', df['vr'] / (df['a'] * df['Kappa'])) df = df.withColumn('e_cos_M', 1.0 - df['r'] / df['a']) e2 = df['e_sin_M']**2 + df['e_cos_M']**2 df = df.withColumn('e', sqrt(e2)) df = df.withColumn('M', atan2(df['e_sin_M'], df['e_cos_M'])) wt = df['t'] - (df['Omega'] / df['Kappa']) * (df['M'] + 2.0 * df['e_sin_M']) df = df.withColumn('wt', adjust_angle_udf(wt)) cols = ['id', 'timestep', 'streamline', 'a', 'e', 'M', 'wt'] return df.select(cols)
def calculate_bearing_degrees(latitude_1, longitude_1, latitude_2, longitude_2): diff_longitude = F.radians(longitude_2 - longitude_1) r_latitude_1 = F.radians(latitude_1) r_longitude_1 = F.radians(longitude_1) r_latitude_2 = F.radians(latitude_2) r_longitude_2 = F.radians(longitude_2) y = F.sin(diff_longitude) * F.cos(r_longitude_2) x = (F.cos(r_latitude_1) * F.sin(r_latitude_2) - F.sin(r_latitude_1) * F.cos(r_latitude_2) * F.cos(diff_longitude)) return F.degrees(F.atan2(x, y))
def distance(lat, lon, lat2, lon2): ''' Uses the "haversine" formula to calculate the distance between two points using they latitude and longitude Parameters ---------- lat: latitude co-ordinate using signed decimal degrees without compass direction for first location lon: longitude co-ordinate using signed decimal degrees without compass direction for first location lat2: latitude co-ordinate using signed decimal degrees without compass direction for second location lon2: longitude co-ordinate using signed decimal degrees without compass direction for second location Returns ------- Returns distance between two points Notes ----- Haversine formula Δφ = φ1 - φ2 Δλ = λ1 - λ2 a = sin²(Δφ/2) + cos φ1 ⋅ cos φ2 ⋅ sin²(Δλ/2) c = 2 ⋅ atan2( √a, √(1−a) ) d = R ⋅ c φ -> latitude λ -> longitude R -> 6371 ''' R = 6371 delta_lat = lat - lat2 delta_lon = lon - lon2 a = pow(sin(toRadians(delta_lat/2)),2) + cos(toRadians(lat)) * cos(toRadians(lat2)) * pow(sin(toRadians(delta_lon/2)),2) c = 2 * atan2(pow(a,0.5) , pow(1-a, 0.5) ) d = R * c return d
def join_and_analyze(df_poi,df_sample): """ Joins the Requests data and POI list data, calculates distance between POI Centers and retains the record with the minimum distance to a particular POI center Parameters: df_poi: POI List datafarme df_sample: Requests dataframe """ # Since there are no matching fields between the data, cartesian product is done to combine the datasets df_joined = df_sample.crossJoin(df_poi) # Caching to memory df_joined.cache() # Applying the Haversine formula to determine distance between coordinate pairs df_joined = df_joined.withColumn("a", ( F.pow(F.sin(F.radians(F.col("POI_Latitude") - F.col("Latitude")) / 2), 2) + F.cos(F.radians(F.col("Latitude"))) * F.cos(F.radians(F.col("POI_Latitude"))) * F.pow(F.sin(F.radians(F.col("POI_Longitude") - F.col("Longitude")) / 2), 2) )).withColumn("distance", F.atan2(F.sqrt(F.col("a")), F.sqrt(-F.col("a") + 1)) * 2 * 6371) # Applying window function to retain the records with the least distance to a POI center w = Window.partitionBy('_ID') df_joined = df_joined.withColumn('min', F.min('distance').over(w)) .where(F.col('distance') == F.col('min')) .drop('min').drop('a') return df_joined
def distance_measure(): return atan2(sqrt(col("distance_inter")), sqrt(1 - col("distance_inter")))
(col('Start_Longitude') > -80) &\ (col('Start_Longitude') < -70) &\ (col('Start_Latitude') > 40) &\ (col('Start_Latitude') < 46) &\ (col('End_Longitude') > -80) &\ (col('End_Longitude') < -70) &\ (col('End_Latitude') > 40) &\ (col('End_Latitude') < 46) &\ (col('Cost') > 0)) yellow_tripdata_1m = yellow_tripdata_1m.withColumn("Duration", ((unix_timestamp(col("End_Datetime")) - unix_timestamp(col("Start_Datetime")))/60))\ .withColumn("Diff_Longitude", col("End_Longitude") - col("Start_Longitude"))\ .withColumn("Diff_Latitude", col("End_Latitude") - col("Start_Latitude"))\ .withColumn("a", F.pow(F.sin(col("Diff_Latitude")/2),2) +\ F.cos(col("Start_Latitude"))*F.cos(col("End_Latitude"))*F.pow(F.sin(col("Diff_Longitude")/2),2))\ .withColumn("Distance", 2 * 6371 * F.atan2(F.sqrt(col("a")), F.sqrt(1.0 - col("a"))))\ .drop("Diff_Longitude").drop("Diff_Latitude").drop("Start_Datetime")\ .drop("End_Datetime").drop("Start_Longitude").drop("Start_Latitude")\ .drop("End_Longitude").drop("End_Latitude").drop("a").drop("Cost") yellow_trip_joined = yellow_tripdata_1m.join(yellow_tripvendors_1m, "ID", "inner").drop("ID") yellow_trip_joined.createOrReplaceTempView("yellow_trip_joined") window = Window.partitionBy("Vendor") res = yellow_trip_joined.withColumn("Max_Distance", F.max("Distance").over(window))\ .where(col("Distance") == col("Max_Distance"))\ .drop("Max_Distance").select(["Vendor", "Distance", "Duration"]) res.show() print("Time of Q2 using SQL with parquet is: %s seconds" % (time.time() - start_time_parquet))
def tocolumns(df, expr): import pyspark.sql.functions as fcns if isinstance(expr, histbook.expr.Const): return fcns.lit(expr.value) elif isinstance(expr, (histbook.expr.Name, histbook.expr.Predicate)): return df[expr.value] elif isinstance(expr, histbook.expr.Call): if expr.fcn == "abs" or expr.fcn == "fabs": return fcns.abs(tocolumns(df, expr.args[0])) elif expr.fcn == "max" or expr.fcn == "fmax": return fcns.greatest(*[tocolumns(df, x) for x in expr.args]) elif expr.fcn == "min" or expr.fcn == "fmin": return fcns.least(*[tocolumns(df, x) for x in expr.args]) elif expr.fcn == "arccos": return fcns.acos(tocolumns(df, expr.args[0])) elif expr.fcn == "arccosh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "arcsin": return fcns.asin(tocolumns(df, expr.args[0])) elif expr.fcn == "arcsinh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "arctan2": return fcns.atan2(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "arctan": return fcns.atan(tocolumns(df, expr.args[0])) elif expr.fcn == "arctanh": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "ceil": return fcns.ceil(tocolumns(df, expr.args[0])) elif expr.fcn == "copysign": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "cos": return fcns.cos(tocolumns(df, expr.args[0])) elif expr.fcn == "cosh": return fcns.cosh(tocolumns(df, expr.args[0])) elif expr.fcn == "rad2deg": return tocolumns(df, expr.args[0]) * (180.0 / math.pi) elif expr.fcn == "erfc": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "erf": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "exp": return fcns.exp(tocolumns(df, expr.args[0])) elif expr.fcn == "expm1": return fcns.expm1(tocolumns(df, expr.args[0])) elif expr.fcn == "factorial": return fcns.factorial(tocolumns(df, expr.args[0])) elif expr.fcn == "floor": return fcns.floor(tocolumns(df, expr.args[0])) elif expr.fcn == "fmod": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "gamma": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "hypot": return fcns.hypot(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "isinf": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "isnan": return fcns.isnan(tocolumns(df, expr.args[0])) elif expr.fcn == "lgamma": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "log10": return fcns.log10(tocolumns(df, expr.args[0])) elif expr.fcn == "log1p": return fcns.log1p(tocolumns(df, expr.args[0])) elif expr.fcn == "log": return fcns.log(tocolumns(df, expr.args[0])) elif expr.fcn == "pow": return fcns.pow(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])) elif expr.fcn == "deg2rad": return tocolumns(df, expr.args[0]) * (math.pi / 180.0) elif expr.fcn == "sinh": return fcns.sinh(tocolumns(df, expr.args[0])) elif expr.fcn == "sin": return fcns.sin(tocolumns(df, expr.args[0])) elif expr.fcn == "sqrt": return fcns.sqrt(tocolumns(df, expr.args[0])) elif expr.fcn == "tanh": return fcns.tanh(tocolumns(df, expr.args[0])) elif expr.fcn == "tan": return fcns.tan(tocolumns(df, expr.args[0])) elif expr.fcn == "trunc": raise NotImplementedError( expr.fcn) # FIXME (fcns.trunc is for dates) elif expr.fcn == "xor": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "conjugate": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "exp2": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "heaviside": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "isfinite": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "left_shift" and isinstance(expr.args[1], histbook.expr.Const): return fcns.shiftLeft(tocolumns(df, expr.args[0]), expr.args[1].value) elif expr.fcn == "log2": return fcns.log2(tocolumns(df, expr.args[0])) elif expr.fcn == "logaddexp2": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "logaddexp": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "mod" or expr.fcn == "fmod": return tocolumns(df, expr.args[0]) % tocolumns(df, expr.args[1]) elif expr.fcn == "right_shift" and isinstance(expr.args[1], histbook.expr.Const): return fcns.shiftRight(tocolumns(df, expr.args[0]), expr.args[1].value) elif expr.fcn == "rint": return fcns.rint(tocolumns(df, expr.args[0])) elif expr.fcn == "sign": raise NotImplementedError(expr.fcn) # FIXME elif expr.fcn == "where": return fcns.when(tocolumns(df, expr.args[0]), tocolumns(df, expr.args[1])).otherwise( tocolumns(df, expr.args[2])) elif expr.fcn == "numpy.equal": return tocolumns(df, expr.args[0]) == tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.not_equal": return tocolumns(df, expr.args[0]) != tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.less": return tocolumns(df, expr.args[0]) < tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.less_equal": return tocolumns(df, expr.args[0]) <= tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.isin": return tocolumns(df, expr.args[0]) in tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_not": return ~tocolumns(df, expr.args[0]) elif expr.fcn == "numpy.add": return tocolumns(df, expr.args[0]) + tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.subtract": return tocolumns(df, expr.args[0]) - tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.multiply": return tocolumns(df, expr.args[0]) * tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.true_divide": return tocolumns(df, expr.args[0]) / tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_or": return tocolumns(df, expr.args[0]) | tocolumns(df, expr.args[1]) elif expr.fcn == "numpy.logical_and": return tocolumns(df, expr.args[0]) & tocolumns(df, expr.args[1]) else: raise NotImplementedError(expr.fcn) else: raise AssertionError(expr)
from pyspark.sql import functions as F Q11 = "IxxPSF_i" Q22 = "IyyPSF_i" Q12 = "IxyPSF_i" # pre-compute denominator df_shear = df.withColumn("denom", F.col(Q11) + F.col(Q22)) #read and img parts of shear df_shear = df_shear.withColumn("R_E", (F.col(Q11) - F.col(Q22)) / F.col('denom')).withColumn( "I_E", (2 * F.col(Q12)) / F.col('denom')) # convert to amplitude and phase df_shear = df_shear.withColumn("amp_E", F.hypot(F.col("R_E"), F.col("I_E"))).withColumn( "phase_E", F.atan2(F.col("R_E"), F.col("I_E"))) df_shear.select("R_E", "I_E", "amp_E", "phase_E").show(5) # In[63]: var = "amp_E" var_sys = "avg(" + var + ")" df_map = df_shear.groupBy("ipix").mean(var) df_map.describe([var_sys]).show() dfp = df_map.toPandas() map_e = np.zeros(hp.nside2npix(nside)) map_e[dfp['ipix'].values] = dfp[var_sys].values hp.gnomview(map_e, rot=[55, -29.8], reso=hp.nside2resol(nside, arcmin=True), title=var_sys)
df_join = df_join.withColumn( 'longitude_distance', functions.radians(over_station_coord['near_longitude']) - functions.radians(short_station_coord['start_longitude'])) df_join = df_join.withColumn( 'a', (pow(functions.sin('latitude_distance'), 2) + functions.cos(functions.radians(short_station_coord['start_latitude'])) * functions.cos(functions.radians(over_station_coord['near_latitude'])) * (pow(functions.sin('longitude_distance'), 2)))) df_join = df_join.withColumn( 'distance', 6373 * 2 * functions.atan2(sqrt(df_join['a']), sqrt(1 - df_join['a']))) # distance less than 3 km #df_join = df_join.filter(df_join['distance'] < 3) df_join = df_join.select('date', 'hour', 'start_station_name', 'near_station_name', 'distance') df_join = df_join.dropDuplicates( ['date', 'hour', 'start_station_name', 'near_station_name']) df_join = df_join.orderBy('date', 'hour', 'distance').select('date', 'hour', 'start_station_name', 'near_station_name')
if showHolding == True: # option to show only planes in a holding pattern, note that the below is NOT foolproof depending on flight # route # # first we calculate the bearing change from the previous coordinate to the current coordinate, on how to # calculate bearing see: # # https://www.mrexcel.com/forum/excel-questions/626081-calculate-bearing-direction-between-2-coordinates.html adsbDf5 = adsbDf4.withColumn("lon_rad", adsbDf4.lon * 3.14159265358979 / 180) adsbDf6 = adsbDf5.withColumn("lat_rad", adsbDf4.lat * 3.14159265358979 / 180) adsbDf7 = adsbDf6.withColumn("bearing", psf.when(psf.isnull(psf.lag('lat_rad').over(window_flightNum_UTC)),0). \ otherwise(psf.atan2(psf.sin(adsbDf6.lon_rad-psf.lag('lon_rad').over(window_flightNum_UTC))* \ psf.cos('lat_rad'),psf.cos(psf.lag('lat_rad').over(window_flightNum_UTC))*psf.sin('lat_rad')- \ psf.sin(psf.lag('lat_rad').over(window_flightNum_UTC))*psf.cos('lat_rad')*psf.cos(adsbDf6.lon_rad- \ psf.lag('lon_rad').over(window_flightNum_UTC)))/( 3.14159265358979/180))) adsbDf8 = adsbDf7.withColumn("bearing_final",psf.when(adsbDf7.bearing < 0, adsbDf7.bearing+360). \ otherwise(adsbDf7.bearing)) # calculate bearing change from the previous coordinate to the current adsbDf9 = adsbDf8.withColumn("bearing_change", psf.when(psf.lag('bearing_final'). \ over(window_flightNum_UTC)==0,0).otherwise(psf.when(adsbDf8.bearing_final==0,0). \ otherwise(psf.lag('bearing_final').over(window_flightNum_UTC)-adsbDf8.bearing_final))) # crude way of ignoring changes when the bearing crosses zero degrees, such as 350 to 10 degrees, because it's # not really 340 degree change but rather 20 degrees, but I'm too lazy to calculate this so ignore anything # thathas a bearing change of 200 degrees adsbDf10 = adsbDf9.withColumn("bearing_change_final", psf.when(psf.abs(adsbDf9.bearing_change)>200,0). \ otherwise(adsbDf9.bearing_change))