def main(): spark_conf = SparkConf().setAppName("Text Preprocesser").set( "spark.cores.max", "30") global sc sc = SparkContext(conf=spark_conf) sc.setLogLevel("ERROR") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/util.py") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/config/config.py") global sql_context sql_context = SQLContext(sc) start_time = time.time() preprocess_all() end_time = time.time() print( colored( "Preprocessing run time (seconds): {0}".format(end_time - start_time), "magenta"))
def main(): #spark_conf = SparkConf().setAppName("Text Preprocesser").set("spark.cores.max", "30") global sc #sc = SparkContext(conf=spark_conf) sc_conf = SparkConf() sc_conf.set("spark.redis.host", config.REDIS_SERVER) sc_conf.set("spark.redis.port", "6379") sc = SparkContext(conf=sc_conf) sc.setLogLevel("ERROR") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/util.py") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/config/config.py") global sql_context sql_context = SQLContext(sc) start_time = time.time() preprocess_files(config.S3_BUCKET, config.S3_FOLDER_EXTRACTED) end_time = time.time() print( colored( "Preprocessing run time (seconds): {0}".format(end_time - start_time), "magenta"))
def main(): spark_conf = SparkConf().setAppName("Spark Custom MinHashLSH").set( "spark.cores.max", "30") global sc sc = SparkContext(conf=spark_conf) sc.setLogLevel("ERROR") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/min_hash.py") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/locality_sensitive_hash.py") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/util.py") sc.addFile( os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/config/config.py") global sql_context sql_context = SQLContext(sc) start_time = time.time() run_minhash_lsh() end_time = time.time() print( colored( "Spark Custom MinHashLSH run time (seconds): {0} seconds".format( end_time - start_time), "magenta"))
def main(): spark_conf = SparkConf().setAppName("Spark Custom MinHashLSH").set("spark.cores.max", "30") global sc global sql_context sc = SparkContext(conf=spark_conf) sc.setLogLevel("ERROR") sql_context = SQLContext(sc) sc.addFile(os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/lib/util.py") sc.addFile(os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + "/config/config.py") start_time = time.time() similarity_scores_df = compare_text() config = configparser.ConfigParser() config.read('../config/db_properties.ini') similarity_scores_df.write.jdbc(config['postgres']['url'], config['postgres']['table'], mode='overwrite', properties={'user': config['postgres']['user'], 'password': config['postgres']['password']}) end_time = time.time() print(colored("Spark MinHash run time (seconds): {0} seconds".format(end_time - start_time), "magenta"))
if __name__ == '__main__': # Creating the streaming spark context and spark session sp_context = SparkContext('local[2]', "Read_Stream") ssp_context = StreamingContext(sp_context, 5.000) sp_sess = SparkSession.builder.appName('Read_Data').getOrCreate() # Adding the files to Spark Context # sp_context.addFile("calc_stats_1.py") # sp_context.addFile("calc_stats_2.py") # sp_context.addFile("init_stats.py") # sp_context.addFile("model.py") # sp_context.addFile("chances_of_winning.py") sp_context.addFile('metrics.py') # Reading the CSV files using Spark session players = sp_sess.read.csv(play_path, header=True, inferSchema=True) teams = sp_sess.read.csv(team_path, header=True, inferSchema=True) # Importing the files from metrics import * """global player_chemistry global player_ratings global regr_player global player_profile global match_details """ # Initializing the player chem and ratings player_chemistry = init_chemistry(players)
write_to_file("player_rate.json", player_rate_json) write_to_file("player_profile.json", player_prof_json) write_to_file("match_details.json", match_details_json) # Writing the JSON strings to files if __name__ == '__main__': # Creating the streaming spark context and spark session sp_context = SparkContext('local[2]', "Read_Stream") ssp_context = StreamingContext(sp_context, 5.000) sp_sess = SparkSession.builder.appName('Read_Data').getOrCreate() # Adding the files to Spark Context sp_context.addFile("calc_stats_1.py") sp_context.addFile("calc_stats_2.py") sp_context.addFile("init_stats.py") sp_context.addFile("model.py") sp_context.addFile("chances_of_winning.py") # Reading the CSV files using Spark session players = sp_sess.read.csv(play_path, header=True, inferSchema=True) teams = sp_sess.read.csv(team_path, header=True, inferSchema=True) # Importing the files from calc_stats_1 import * from calc_stats_2 import * from init_stats import * from model import * from chances_of_winning import *
"own_goals": i['own_goals'], "yellow_cards": i['yellow_cards'], "red_cards": i['red_cards'] } json_object = json.dumps(dictionary, indent=4) # Writing to sample.json with open("output_req_2.json", "w") as outfile: print("Writing....to JSON") outfile.write(json_object) break if __name__ == "__main__": sp_context = SparkContext('local[2]', "UI") sp_sess = SparkSession.builder.appName('user_input').getOrCreate() sp_context.addFile("model.py") input_file = sys.argv[1] with open(input_file, 'r') as file: content = file.read() input_data = eval(content) if input_data["req_type"] == 1: # calling predict function: """ output = predict(input_) """ predict_helper(input_data) elif input_data["req_type"] == 2: # calling profile function player_profile_helper(input_data)
json_object = json.dumps(dictionary, indent=4) # Writing to sample.json with open("output_req_3.json", "w") as outfile: outfile.write(json_object) return break with open("output_req_3.json", "w") as outfile: dicte = {"match_found":False} outfile.write(json.dumps(dicte, indent=4)) if __name__ == "__main__": sp_context = SparkContext('local[2]', "UI") sp_sess = SparkSession.builder.appName('user_input').getOrCreate() sp_context.addFile("metrics.py") input_file = sys.argv[1] with open(input_file, 'r') as file: content = file.read() input_data = eval(content) if 'req_type' not in input_data: # calling match info function match_data_helper(input_data) else: if input_data["req_type"] == 1: # calling predict function: """ output = predict(input_) """ predict_helper(input_data)