Exemple #1
0
def stationsInGeometry(geometry, stations):
    intersect_stations = []
    station_index = 0
    intersect_stations.append(
        ["ID", "NAME", "LONG", "LAT", "ELEVATION", "STATIONID"])
    geometry = shape(geometry[0]['geometry'])
    print("Number of stations: " + str(len(stations["results"])))
    for station in stations["results"]:
        point = Point(station["longitude"], station["latitude"])
        point.crs = {'init': '+proj=longlat +datum=WGS84 +no_defs'}
        if geometry.contains(point):
            station_index += 1
            add_station = [
                station_index, station["name"], station["longitude"],
                station["latitude"], station["elevation"], station["id"]
            ]
            intersect_stations.append(add_station)
    print("Number of stations in geometry:" + str(station_index))
    return intersect_stations
    def do_KNN_and_naive_bayes(self):
        new_p = self.new_p
        # print(new_p)
        # df= pd.read_excel('core/tweets_location.xls')
        df = pd.read_csv('core/tweets_location.csv')
        # new_p= [-81.33response020, 28.5380]    #the point for which i need to know the zipcode
        # KNN on lat long of the tweetzips.
        n_neighbors = 5
        samples = df.as_matrix(['longi', 'lat'])
        neigh = NearestNeighbors(n_neighbors)
        neigh.fit(samples)
        NearestNeighbors(algorithm='auto', metric='haversine')
        a, b = neigh.kneighbors([new_p])
        candidates, tweetzip, candidate_loc, annotation, raw_tweets = self.most_frequent(
            a, b, df)
        # create new dataframe to match with the train file
        new_point = pd.DataFrame(index=range(1))
        new_point['predicted_k=%s' % str(n_neighbors)] = tweetzip
        # make a shaply point
        sp = Point(new_p[0], new_p[1])
        sp.crs = {'init': 'epsg:4326'}
        usps = gpd.GeoDataFrame.from_file('core/maps/usps_wgs84.shp')
        esri = gpd.GeoDataFrame.from_file('core/maps/esri_wgs84.shp')
        zcta = gpd.GeoDataFrame.from_file('core/maps/zcta_wgs84.shp')
        map1 = gpd.GeoDataFrame.from_file('core/maps/map1_wgs84.shp')

        usps.crs = {'init': 'epsg:4326'}
        esri.crs = {'init': 'epsg:4326'}
        zcta.crs = {'init': 'epsg:4326'}
        map1.crs = {'init': 'epsg:4326'}

        usps = usps.loc[usps.zipc > 30000]
        esri = esri.loc[esri.ZIP_number > 30000]
        zcta.ZCTA5CE10 = zcta.ZCTA5CE10.astype(int)
        zcta = zcta.loc[zcta.ZCTA5CE10 > 30000]
        map1.ZIP = map1.ZIP.astype(int)
        map1 = map1.loc[map1.ZIP > 30000]

        # intersect geodataframe of the point to the polygon geodataframe and get the zipcode
        pnt = gpd.GeoDataFrame(geometry=[sp])
        # print("here")
        usps_int = gpd.sjoin(pnt,
                             usps[['zipc', 'geometry']],
                             how='left',
                             op='intersects')

        new_point['tweet_usps'] = usps_int.zipc
        esri_int = gpd.sjoin(pnt,
                             esri[['ZIP_number', 'geometry']],
                             how='left',
                             op='intersects')
        zcta_int = gpd.sjoin(pnt,
                             zcta[['ZCTA5CE10', 'geometry']],
                             how='left',
                             op='intersects')
        map1_int = gpd.sjoin(pnt,
                             map1[['ZIP', 'geometry']],
                             how='left',
                             op='intersects')

        new_point['tweet_map'] = map1_int.ZIP
        new_point['tweet_zcta'] = zcta_int.ZCTA5CE10
        new_point['tweet_esri'] = esri_int.ZIP_number
        new_point = new_point[[
            'tweet_usps', 'tweet_map', 'tweet_zcta', 'tweet_esri',
            'predicted_k=5'
        ]]

        naive_train = pd.read_csv('core/k5_forNaive_allmap.csv')

        X_train = naive_train[[
            'tweet_map', 'tweet_zcta', 'tweet_esri', 'predicted_k=5'
        ]].values
        y_train = naive_train[['tweet_usps']].values
        X_test = new_point[[
            'tweet_map', 'tweet_zcta', 'tweet_esri', 'predicted_k=5'
        ]].values
        y_test = new_point[['tweet_usps']].values

        # multinomial naive base prediction
        clf = MultinomialNB().fit(X_train, y_train)
        y_pred = clf.predict(new_point[[
            'tweet_map', 'tweet_zcta', 'tweet_esri', 'predicted_k=5'
        ]])
        zip_prob = pd.DataFrame(clf.predict_proba(X_test),
                                columns=clf.classes_)
        # return int(y_pred)
        # return zip_prob.to_json()  ## to return all the zips with probabilities

        # gaussian prediction
        clf_gs = GaussianNB().fit(X_train, y_train.ravel())
        y_pred_gs = clf_gs.predict(X_test)
        zip_prob_gs = pd.DataFrame(clf_gs.predict_proba(X_test),
                                   columns=clf_gs.classes_)
        return {
            "zip_with_probs": zip_prob_gs.to_json(),
            "candidates": candidates,
            "candidate_loc": candidate_loc,
            "candidate_annotation": annotation,
            "raw_tweets": raw_tweets
        }