Ejemplos de SparkContext.addFile en Python

Lenguaje de programación: Python

Namespace/Package Name: pyspark.context

Clase / Tipo: SparkContext

Método / Función: addFile

Ejemplos en hotexamples.com: 8

Python SparkContext.addFile - 8 ejemplos encontrados. Estos son los ejemplos en Python del mundo real mejor valorados de pyspark.context.SparkContext.addFile extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

SparkContext(30)

textFile(30)

stop(30)

getOrCreate(30)

parallelize(30)

setLogLevel(24)

broadcast(18)

_ensure_initialized(11)

setSystemProperty(9)

setCheckpointDir(9)

addFile(8)

addPyFile(6)

union(5)

pickleFile(5)

wholeTextFiles(5)

accumulator(5)

getConf(3)

newAPIHadoopFile(2)

setJobGroup(2)

binaryFiles(2)

join(1)

hadoopFile(1)

read_csv(1)

sequenceFile(1)

flatMap(1)

emptyRDD(1)

_stop(1)

map(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: preprocess.py Proyecto: aspk/askedagain

def main():
    spark_conf = SparkConf().setAppName("Text Preprocesser").set(
        "spark.cores.max", "30")

    global sc
    sc = SparkContext(conf=spark_conf)
    sc.setLogLevel("ERROR")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/lib/util.py")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/config/config.py")

    global sql_context
    sql_context = SQLContext(sc)

    start_time = time.time()
    preprocess_all()
    end_time = time.time()
    print(
        colored(
            "Preprocessing run time (seconds): {0}".format(end_time -
                                                           start_time),
            "magenta"))

Ejemplo n.º 2

Mostrar archivo

Archivo: preprocess.py Proyecto: matthewsilver/mergereduce

def main():
    #spark_conf = SparkConf().setAppName("Text Preprocesser").set("spark.cores.max", "30")

    global sc
    #sc = SparkContext(conf=spark_conf)
    sc_conf = SparkConf()
    sc_conf.set("spark.redis.host", config.REDIS_SERVER)
    sc_conf.set("spark.redis.port", "6379")
    sc = SparkContext(conf=sc_conf)
    sc.setLogLevel("ERROR")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/lib/util.py")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/config/config.py")

    global sql_context
    sql_context = SQLContext(sc)

    start_time = time.time()
    preprocess_files(config.S3_BUCKET, config.S3_FOLDER_EXTRACTED)
    end_time = time.time()
    print(
        colored(
            "Preprocessing run time (seconds): {0}".format(end_time -
                                                           start_time),
            "magenta"))

Ejemplo n.º 3

Mostrar archivo

Archivo: spark_mllib_tag_indexed_batch.py Proyecto: aspk/askedagain

def main():
    spark_conf = SparkConf().setAppName("Spark Custom MinHashLSH").set(
        "spark.cores.max", "30")

    global sc
    sc = SparkContext(conf=spark_conf)
    sc.setLogLevel("ERROR")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/lib/min_hash.py")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/lib/locality_sensitive_hash.py")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/lib/util.py")
    sc.addFile(
        os.path.dirname(os.path.dirname(os.path.abspath(__file__))) +
        "/config/config.py")

    global sql_context
    sql_context = SQLContext(sc)

    start_time = time.time()
    run_minhash_lsh()
    end_time = time.time()
    print(
        colored(
            "Spark Custom MinHashLSH run time (seconds): {0} seconds".format(
                end_time - start_time), "magenta"))

Ejemplo n.º 4

Mostrar archivo

def main():
    spark_conf = SparkConf().setAppName("Spark Custom MinHashLSH").set("spark.cores.max", "30")

    global sc
    global sql_context    

    sc = SparkContext(conf=spark_conf)
    sc.setLogLevel("ERROR")
    sql_context = SQLContext(sc)
    sc.addFile(os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/util.py")
    sc.addFile(os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/config/config.py")


    start_time = time.time()
    similarity_scores_df = compare_text()

    config = configparser.ConfigParser()
    config.read('../config/db_properties.ini')
    similarity_scores_df.write.jdbc(config['postgres']['url'], config['postgres']['table'], mode='overwrite', properties={'user': config['postgres']['user'], 'password': config['postgres']['password']})    

    end_time = time.time()
    print(colored("Spark MinHash run time (seconds): {0} seconds".format(end_time - start_time), "magenta"))

Ejemplo n.º 5

Mostrar archivo


if __name__ == '__main__':

    # Creating the streaming spark context and spark session
    sp_context = SparkContext('local[2]', "Read_Stream")
    ssp_context = StreamingContext(sp_context, 5.000)
    sp_sess = SparkSession.builder.appName('Read_Data').getOrCreate()

    # Adding the files to Spark Context
    # sp_context.addFile("calc_stats_1.py")
    # sp_context.addFile("calc_stats_2.py")
    # sp_context.addFile("init_stats.py")
    # sp_context.addFile("model.py")
    # sp_context.addFile("chances_of_winning.py")
    sp_context.addFile('metrics.py')

    # Reading the CSV files using Spark session
    players = sp_sess.read.csv(play_path, header=True, inferSchema=True)
    teams = sp_sess.read.csv(team_path, header=True, inferSchema=True)

    # Importing the files
    from metrics import *
    """global player_chemistry
    global player_ratings
    global regr_player
    global player_profile
    global match_details
"""
    # Initializing the player chem and ratings
    player_chemistry = init_chemistry(players)

Ejemplo n.º 6

Mostrar archivo

    write_to_file("player_rate.json", player_rate_json)
    write_to_file("player_profile.json", player_prof_json)
    write_to_file("match_details.json", match_details_json)

    # Writing the JSON strings to files


if __name__ == '__main__':

    # Creating the streaming spark context and spark session
    sp_context = SparkContext('local[2]', "Read_Stream")
    ssp_context = StreamingContext(sp_context, 5.000)
    sp_sess = SparkSession.builder.appName('Read_Data').getOrCreate()

    # Adding the files to Spark Context
    sp_context.addFile("calc_stats_1.py")
    sp_context.addFile("calc_stats_2.py")
    sp_context.addFile("init_stats.py")
    sp_context.addFile("model.py")
    sp_context.addFile("chances_of_winning.py")

    # Reading the CSV files using Spark session
    players = sp_sess.read.csv(play_path, header=True, inferSchema=True)
    teams = sp_sess.read.csv(team_path, header=True, inferSchema=True)

    # Importing the files
    from calc_stats_1 import *
    from calc_stats_2 import *
    from init_stats import *
    from model import *
    from chances_of_winning import *

Ejemplo n.º 7

Mostrar archivo

                    "own_goals": i['own_goals'],
                    "yellow_cards": i['yellow_cards'],
                    "red_cards": i['red_cards']
                }
                json_object = json.dumps(dictionary, indent=4)
                # Writing to sample.json
                with open("output_req_2.json", "w") as outfile:
                    print("Writing....to JSON")
                    outfile.write(json_object)
                break


if __name__ == "__main__":
    sp_context = SparkContext('local[2]', "UI")
    sp_sess = SparkSession.builder.appName('user_input').getOrCreate()
    sp_context.addFile("model.py")
    input_file = sys.argv[1]
    with open(input_file, 'r') as file:
        content = file.read()
        input_data = eval(content)
        if input_data["req_type"] == 1:
            # calling predict function:
            """
            output = predict(input_)
            """
            predict_helper(input_data)

        elif input_data["req_type"] == 2:
            # calling profile function
            player_profile_helper(input_data)

Ejemplo n.º 8

Mostrar archivo

Archivo: ui.py Proyecto: AshishAsokan/BigData_Assignment2020

                json_object = json.dumps(dictionary, indent=4) 
                # Writing to sample.json 
                with open("output_req_3.json", "w") as outfile:
                    outfile.write(json_object) 
                return
                break
    with open("output_req_3.json", "w") as outfile:
        dicte = {"match_found":False}
        outfile.write(json.dumps(dicte, indent=4))    



if __name__ == "__main__":
    sp_context = SparkContext('local[2]', "UI")
    sp_sess = SparkSession.builder.appName('user_input').getOrCreate()
    sp_context.addFile("metrics.py")
    input_file = sys.argv[1]
    with open(input_file, 'r') as file:
        content = file.read()
        input_data = eval(content)
        if 'req_type' not in input_data:
            # calling match info function 
            match_data_helper(input_data)
        else:
            if input_data["req_type"] == 1:
                # calling predict function:
                """
                output = predict(input_)
                """
                predict_helper(input_data)