def color_locator(column, color_string):
    """
    This function creates a column declaring whether or
    not a given PySpark column contains the UPPERCASED
    color.
    Returns a new column type that can be used
    in a select statement.
    """
    c = ''
    return locate(color_string.upper(), column)\
        .cast("boolean")\
        .alias("is_" + c)
def color_locator(column, color_string):
    return locate(color_string.upper(), column)\
            .cast("boolean")\
            .alias("is_" + color_string)
Beispiel #3
0
def color_location(column, color_string):
    return locate(color_string.upper(), column)\
            .cast('boolean')\
            .alias('is_' + color_string)
def color_locator(column, color_string):
  return locate(color_string.upper(), column)\
          .cast("boolean")\
          .alias("is_" + color_string)
Beispiel #5
0
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, expr, column, lit, avg, monotonically_increasing_id, rand, locate, instr

if __name__ == '__main__':
    spark = SparkSession.builder.appName("learning").master(
        "local").getOrCreate()
    df = spark.read.format('csv')\
                   .option('sep', ';')\
                   .option('header', 'true')\
                   .load('user.csv')

    df.select(instr(col('name'), 'Jorge'), locate('Jorge', col('name'))).show()
    df.select(rand().alias("random")).where(expr("random > 0")).show()