コード例 #1
0
def main():
    spark = create_session(c.FILTERED_COLLECTION)
    spark.sparkContext.setLogLevel('ERROR')

    try:
        census_df = create_df(spark)
        census = SparkCensus(census_df)

        census.race_by_borough(img_out=True, csv_out=True, path=PATH)
    except Exception as e:
        print(e)
    finally:
        spark.stop()
コード例 #2
0
def main():
    spark = create_session(c.COLLECTION_NAME)
    spark.sparkContext.setLogLevel('ERROR')

    try:
        politics_df = create_df(spark)
        politics = SparkPolitics(politics_df)

        politics.polls_map(img_out=True, path=PATH)
    except Exception as e:
        print(e)
    finally:
        spark.stop()
コード例 #3
0
def main():
    spark = create_session('wash_post_shootings')
    spark.sparkContext.setLogLevel('ERROR')

    try:
        df = create_df(spark)

        sh = Shootings(df)
        sh.show()

        df = sh.get_df()

        (train, test) = df.randomSplit([0.8, 0.2])

        # instantiate the base classifier.
        lr = LogisticRegression(maxIter=10,
                                tol=1E-6,
                                fitIntercept=True,
                                featuresCol='features',
                                labelCol='label')

        # instantiate the One Vs Rest Classifier.
        ovr = OneVsRest(classifier=lr)

        # train the multiclass model.
        ovrModel = ovr.fit(train)

        # score the model on test data.
        predictions = ovrModel.transform(test)

        # obtain evaluator.
        evaluator = MulticlassClassificationEvaluator(metricName="accuracy")

        # compute the classification error on test data.
        accuracy = evaluator.evaluate(predictions)
        print("Test Error = %g" % (1.0 - accuracy))
        print("Accuracy = %.2f" % (accuracy * 100))

    except Exception as e:
        print(e)
    finally:
        spark.sparkContext.stop()
コード例 #4
0
def main():

    spark = create_session(c.FILTERED_COLLECTION)
    spark.sparkContext.setLogLevel('ERROR')

    try:
        nypd_df = create_df(spark)

        pd = SparkPDDE(nypd_df)
        pd.deaths_trend(img_out=f'{PATH}/police_deaths.png',
                        csv_out=f'{PATH}/police_deaths.csv')
        pd.deaths_cause_topN(img_out=f'{PATH}/police_top_deaths.png',
                             csv_out=f'{PATH}/police_top_deaths.csv')
        pd.deaths_states_topN(n=10, img_out=f'{PATH}/police_deaths_state.png',
                              csv_out=f'{PATH}/police_deaths_state.csv')

        print('Done')
    except Exception as e:
        print(e)
    finally:
        spark.sparkContext.stop()
コード例 #5
0
def main():
    spark = create_session(c.COLLECTION_NAME)
    spark.sparkContext.setLogLevel('ERROR')

    try:
        wpsdf = create_df(spark).cache()
        shoots = SparkShoots(wpsdf)

        monthly_df = shoots.monthly(img_out=True, csv_out=True, path=PATH)
        shoots.yearly(monthly_df, img_out=True, csv_out=True, path=PATH)
        shoots.kills_per_year(img_out=True, csv_out=True, path=PATH)
        shoots.agehist(img_out=True, path=PATH)
        shoots.races(img_out=True, csv_out=True, path=PATH)
        shoots.crimes_per_state(img_out=True, csv_out=True, path=PATH)
        shoots.armed(img_out=True, path=PATH)
        shoots.armed_or_not(img_out=True, csv_out=True, path=PATH)
        shoots.flee(img_out=True, csv_out=True, path=PATH)
        shoots.blacklivesmatter(img_out=True, csv_out=True, path=PATH)
        shoots.allrace(img_out=True, csv_out=True, path=PATH)
    except Exception as e:
        print(e)
        spark.stop()
コード例 #6
0
def main():
    spark = create_session(c.FILTERED_COLLECTION)
    spark.sparkContext.setLogLevel('ERROR')

    try:
        nypd_df = create_df(spark)
        ny = SparkNYPD(nypd_df)

        ny.crimes_trend(img_out=f'{PATH}/crimes_trend.png',
                        csv_out=f'{PATH}/crimes_trend.csv')
        ny.crimes_top(img_out=f'{PATH}/crimes_top.png',
                      csv_out=f'{PATH}/crimes_top.csv')
        ny.crimes_severity(img_out=f'{PATH}/crimes_severity.png',
                           csv_out=f'{PATH}/crimes_severity.csv')
        ny.crimes_severity_by_district(
            img_out=f'{PATH}/crimes_severity_by_district.png',
            csv_out=f'{PATH}/crimes_severity_by_district.csv')
        ny.crimes_day_night(img_out=f'{PATH}/crimes_daynight.png',
                            csv_out=f'{PATH}/crimes_daynight.csv')
        ny.crimes_race(img_out=f'{PATH}/crimes_race.png',
                       csv_out=f'{PATH}/crimes_race.csv')
        ny.crimes_cross_districts(img_out=f'{PATH}/crimes_cross_districts.png',
                                  csv_out=f'{PATH}/crimes_cross_districts.csv')
        ny.crimes_cross_districts_race(
            img_out=f'{PATH}/crimes_districts_race.png',
            csv_out=f'{PATH}/crimes_districts_race.csv')
        ny.cross_age_race(img_out=f'{PATH}/crimes_cross_age_race.png',
                          csv_out=f'{PATH}/crimes_cross_age_race.csv')
        ny.cross_crime_race(img_out=f'{PATH}/crimes_cross_crime_race.png',
                            csv_out=f'{PATH}/crimes_cross_crime_race.csv')

        print('Done')
    except Exception as e:
        print(e)
    finally:
        spark.stop()