def color_locator(column, color_string): """ This function creates a column declaring whether or not a given PySpark column contains the UPPERCASED color. Returns a new column type that can be used in a select statement. """ c = '' return locate(color_string.upper(), column)\ .cast("boolean")\ .alias("is_" + c)
def color_locator(column, color_string): return locate(color_string.upper(), column)\ .cast("boolean")\ .alias("is_" + color_string)
def color_location(column, color_string): return locate(color_string.upper(), column)\ .cast('boolean')\ .alias('is_' + color_string)
def color_locator(column, color_string): return locate(color_string.upper(), column)\ .cast("boolean")\ .alias("is_" + color_string)
from pyspark.sql import SparkSession from pyspark.sql.functions import col, expr, column, lit, avg, monotonically_increasing_id, rand, locate, instr if __name__ == '__main__': spark = SparkSession.builder.appName("learning").master( "local").getOrCreate() df = spark.read.format('csv')\ .option('sep', ';')\ .option('header', 'true')\ .load('user.csv') df.select(instr(col('name'), 'Jorge'), locate('Jorge', col('name'))).show() df.select(rand().alias("random")).where(expr("random > 0")).show()