def main(): spark = create_session(c.FILTERED_COLLECTION) spark.sparkContext.setLogLevel('ERROR') try: census_df = create_df(spark) census = SparkCensus(census_df) census.race_by_borough(img_out=True, csv_out=True, path=PATH) except Exception as e: print(e) finally: spark.stop()
def main(): spark = create_session(c.COLLECTION_NAME) spark.sparkContext.setLogLevel('ERROR') try: politics_df = create_df(spark) politics = SparkPolitics(politics_df) politics.polls_map(img_out=True, path=PATH) except Exception as e: print(e) finally: spark.stop()
def main(): spark = create_session('wash_post_shootings') spark.sparkContext.setLogLevel('ERROR') try: df = create_df(spark) sh = Shootings(df) sh.show() df = sh.get_df() (train, test) = df.randomSplit([0.8, 0.2]) # instantiate the base classifier. lr = LogisticRegression(maxIter=10, tol=1E-6, fitIntercept=True, featuresCol='features', labelCol='label') # instantiate the One Vs Rest Classifier. ovr = OneVsRest(classifier=lr) # train the multiclass model. ovrModel = ovr.fit(train) # score the model on test data. predictions = ovrModel.transform(test) # obtain evaluator. evaluator = MulticlassClassificationEvaluator(metricName="accuracy") # compute the classification error on test data. accuracy = evaluator.evaluate(predictions) print("Test Error = %g" % (1.0 - accuracy)) print("Accuracy = %.2f" % (accuracy * 100)) except Exception as e: print(e) finally: spark.sparkContext.stop()
def main(): spark = create_session(c.FILTERED_COLLECTION) spark.sparkContext.setLogLevel('ERROR') try: nypd_df = create_df(spark) pd = SparkPDDE(nypd_df) pd.deaths_trend(img_out=f'{PATH}/police_deaths.png', csv_out=f'{PATH}/police_deaths.csv') pd.deaths_cause_topN(img_out=f'{PATH}/police_top_deaths.png', csv_out=f'{PATH}/police_top_deaths.csv') pd.deaths_states_topN(n=10, img_out=f'{PATH}/police_deaths_state.png', csv_out=f'{PATH}/police_deaths_state.csv') print('Done') except Exception as e: print(e) finally: spark.sparkContext.stop()
def main(): spark = create_session(c.COLLECTION_NAME) spark.sparkContext.setLogLevel('ERROR') try: wpsdf = create_df(spark).cache() shoots = SparkShoots(wpsdf) monthly_df = shoots.monthly(img_out=True, csv_out=True, path=PATH) shoots.yearly(monthly_df, img_out=True, csv_out=True, path=PATH) shoots.kills_per_year(img_out=True, csv_out=True, path=PATH) shoots.agehist(img_out=True, path=PATH) shoots.races(img_out=True, csv_out=True, path=PATH) shoots.crimes_per_state(img_out=True, csv_out=True, path=PATH) shoots.armed(img_out=True, path=PATH) shoots.armed_or_not(img_out=True, csv_out=True, path=PATH) shoots.flee(img_out=True, csv_out=True, path=PATH) shoots.blacklivesmatter(img_out=True, csv_out=True, path=PATH) shoots.allrace(img_out=True, csv_out=True, path=PATH) except Exception as e: print(e) spark.stop()
def main(): spark = create_session(c.FILTERED_COLLECTION) spark.sparkContext.setLogLevel('ERROR') try: nypd_df = create_df(spark) ny = SparkNYPD(nypd_df) ny.crimes_trend(img_out=f'{PATH}/crimes_trend.png', csv_out=f'{PATH}/crimes_trend.csv') ny.crimes_top(img_out=f'{PATH}/crimes_top.png', csv_out=f'{PATH}/crimes_top.csv') ny.crimes_severity(img_out=f'{PATH}/crimes_severity.png', csv_out=f'{PATH}/crimes_severity.csv') ny.crimes_severity_by_district( img_out=f'{PATH}/crimes_severity_by_district.png', csv_out=f'{PATH}/crimes_severity_by_district.csv') ny.crimes_day_night(img_out=f'{PATH}/crimes_daynight.png', csv_out=f'{PATH}/crimes_daynight.csv') ny.crimes_race(img_out=f'{PATH}/crimes_race.png', csv_out=f'{PATH}/crimes_race.csv') ny.crimes_cross_districts(img_out=f'{PATH}/crimes_cross_districts.png', csv_out=f'{PATH}/crimes_cross_districts.csv') ny.crimes_cross_districts_race( img_out=f'{PATH}/crimes_districts_race.png', csv_out=f'{PATH}/crimes_districts_race.csv') ny.cross_age_race(img_out=f'{PATH}/crimes_cross_age_race.png', csv_out=f'{PATH}/crimes_cross_age_race.csv') ny.cross_crime_race(img_out=f'{PATH}/crimes_cross_crime_race.png', csv_out=f'{PATH}/crimes_cross_crime_race.csv') print('Done') except Exception as e: print(e) finally: spark.stop()