def load_tweets_to_grids():

    # mongodb
    mg = MongoDB()
    mg.connect()

    tweets = mg.find()

    grid_db = GridDB()
    grid_db.add(tweets)

    return grid_db
def all_grids():
    mg = MongoDB()
    mg.connect()
    griddb = GridDB()
    print('querying grid volumes...')
    results = mg.group_by([{'$match': {'created_at': {'$gt': datetime.strptime('2012-10-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ'),
                                                      '$lt': datetime.strptime('2012-11-15T20:00:02Z', '%Y-%m-%dT%H:%M:%SZ')}}}])   # print(results)
    griddb.add(results)

    ret = Grid.get_raw_pandas_ts(results, 'D')

    STL.seasonal_decomposition(ret)
def write_tweets_to_mongo():

    """ load tweets, decide the grid of tweets and insert the tweets into mongo
        this function serves as the initial step of the pipeline. 
        all following db-specific operations are based on the dataset inserted here. 
        This function is only called once for the entire pipeline.
    """

    data_file = "data/sandy_all.txt"
    kml_file = ["data/nj_ct.kml", "data/nyc_ct_sea.kml"]
    # kml_file = ['data/nyc_cb_sea.kml']

    tweets = load_tweets(data_file)
    #     trajectories = tweets_to_trajectories(tweets)

    grid_db = GridDB()
    grid_db.load_grid_from_file(kml_file[0])
    grid_db.load_grid_from_file(kml_file[1])
    #     grid_db.write_grids_to_json('shapefile.json')

    grid_db.check_and_add(tweets)

    #####################################################
    ############## index Tweets into MongoDB ############
    #####################################################

    # mongodb
    mg = MongoDB()
    mg.connect()
    print("connected...")
    mg.drop()

    # tweets to dicts;
    rst = []
    for t in grid_db.get_tweets():
        rst.append(t.to_dict())

    print("inserting...")
    mg.insert_tweets(rst)