Beispiel #1
0
def queryToCollection(typology):

    city = "Torino"
    provider = "car2go"
    initdate = "2017-9-5T00:00:00"
    finaldate = "2017-11-2T00:00:00"

    collections = {}
    d1 = {}
    d1["enjoy"] = "enjoy_PermanentBookings"
    d1["car2go"] = "PermanentBookings"
    collections["bookings"] = d1

    d2 = {}
    d2["enjoy"] = "enjoy_PermanentParkings"
    d2["car2go"] = "PermanentParkings"
    collections["parkings"] = d2

    collection = collections[typology][provider]

    initdate = int(
        time.mktime(
            datetime.datetime.strptime(initdate,
                                       "%Y-%m-%dT%H:%M:%S").timetuple()))
    finaldate = int(
        time.mktime(
            datetime.datetime.strptime(finaldate,
                                       "%Y-%m-%dT%H:%M:%S").timetuple()))
    mongo_collection = sf.setup_mongodb(collection)
    data = mongo_collection.find({
        "city": city,
        "init_time": {
            "$gt": initdate,
            "$lt": finaldate
        }
    })
    return data
Beispiel #2
0
def main():

    collection="enjoy_PermanentBookings"
    if(gv.provider == "car2go"):
        collection = "PermanentBookings"
    enjoy_bookings = sf.setup_mongodb(collection)

    bookings = enjoy_bookings.find({"city": gv.city,
                                    "init_time" :{"$gt" : gv.initDate , "$lt" : gv.finalDate}});

    # geolocator = Nominatim()
    # location = geolocator.geocode("Torino")
    #baselon = location.longitude
    #baselat = location.latitude

    
    i=0 #id del booking, numero progressivo
    
    NumEvents=0
    NumEventsFiltered=0
    Discarted=0
    for booking in bookings:
            initt =  booking['init_time'] 
            finalt= booking['final_time']
            duration = finalt - initt
            coords = booking['origin_destination']['coordinates']
            lon1 = coords[0][0]
            lat1 = coords[0][1]
            lon2 = coords[1][0]
            lat2 = coords[1][1]
            #d = haversine(baselon, baselat, lon2, lat2)
            #d1 = haversine(baselon, baselat, lon1, lat1)
            d2 = sf.haversine(lon1, lat1, lon2, lat2)

            
            if(duration > 120 and duration<3600 and d2>500):
                # if( sf.checkPerimeter(lat1, lon1) and sfcheckPerimeter(lat2, lon2) or
                #    (provider == "car2go" and  ((checkPerimeter(lat1, lon1) and checkCasellePerimeter(lat2, lon2)) or  (checkCasellePerimeter(lat1, lon1) and checkPerimeter(lat2, lon2))))):
                if sf.checkPerimeter(lat1, lon1) and sf.checkPerimeter(lat2, lon2):
                    NumEvents+=1
                    id_events[i] = [booking['init_time'],booking['final_time'],EventBook(i,"s",  booking["origin_destination"]['coordinates'][0]),EventBook(i ,"e", booking["origin_destination"]['coordinates'][1])]
                    if booking['init_time'] not in dict_bookings:
                        dict_bookings[booking['init_time']]=[]
                    dict_bookings[booking['init_time']].append([i,"s"])
                    if booking['final_time'] not in dict_bookings:
                        dict_bookings[booking['final_time']]=[]
                    dict_bookings[booking['final_time']].append([i,"e"])        
                    i=i+1
                
                    if(i<1000):
                        if booking['init_time'] not in dict_bookings_short:
                            dict_bookings_short[booking['init_time']]=[]
                        dict_bookings_short[booking['init_time']].append(EventBook(i,"s",  booking["origin_destination"]['coordinates'][0]))
                        if booking['final_time'] not in dict_bookings_short:
                            dict_bookings_short[booking['final_time']]=[]
                        dict_bookings_short[booking['final_time']].append(EventBook(i ,"e", booking["origin_destination"]['coordinates'][1]))  
            else:
                Discarted+=1

    with open("../events/"+ gv.city + "_" + gv.provider + "_dict_bookings.pkl", 'wb') as handle:
        pickle.dump( dict_bookings, handle)
    
    with open("../events/"+ gv.city + "_" + gv.provider + "_id_events.pkl", 'wb') as handle:
        pickle.dump( id_events, handle)
    
    print("End Pickles")
    '''exit(0)
    
    dict_bookings= pickle.load( open( "../events/"+operator+"_dict_bookings.pkl", "rb" ) )
    id_events= pickle.load( open( "../events/"+operator+"_id_events.pkl", "rb" ) )
    '''
    print("Start")
    to_delete = []
    EventDeleted=0
    for stamp in dict_bookings:
        startbooking = 0
        for event in dict_bookings[stamp]:
            if(event[1]=="s"): startbooking+=1
        
        if(startbooking>30):
            EventDeleted+=startbooking
            to_delete.append(stamp)

    for stamp in to_delete:
        events_to_delete = []
        for event in dict_bookings[stamp]:
            if(event[1] == "s"): events_to_delete.append(event[0])
            
        for event in events_to_delete:
            InitTime = id_events[event][0]
            FinalTime = id_events[event][1]
            InitInd = dict_bookings[InitTime].index([event,"s"])
            FinalInd = dict_bookings[FinalTime].index([event,"e"])

            del  dict_bookings[InitTime][InitInd]
            del  dict_bookings[FinalTime][FinalInd]
            
    
        if(len(dict_bookings[stamp])==0):
            del dict_bookings[stamp]
    
    for stamp in dict_bookings:
        for i in range(0,len(dict_bookings[stamp])):
            NumEventsFiltered+=1
            EventT = dict_bookings[stamp][i]
            if(EventT[1] == "s"): dict_bookings[stamp][i]=id_events[EventT[0]][2]
            else: dict_bookings[stamp][i]=id_events[EventT[0]][3]
            


    print("CPE, Num Events Filtered + Event deleted:",NumEventsFiltered+EventDeleted)
    print("CPE, Num Events Filtered:", NumEventsFiltered)
    print("CPE, Event Deleted:", EventDeleted)
    print("CPE, Dicarded:", Discarted)
    
    ordered_dict_booking = collections.OrderedDict(sorted(dict_bookings.items()))
    ordered_dict_booking_short = collections.OrderedDict(sorted(dict_bookings_short.items()))
   

    with open("../events/"+ gv.city + "_" + gv.provider + "_sorted_dict_events_obj.pkl", 'wb') as handle:
        pickle.dump( ordered_dict_booking, handle)

    with open("../events/"+ gv.city + "_" + gv.provider + "_sorted_dict_events_obj_short.pkl", 'wb') as handle:
        pickle.dump( ordered_dict_booking_short, handle)

    print ("CPE, end\n")
Beispiel #3
0
def formatBookings():
    collection = "enjoy_PermanentBookings"
    if gv.provider == "car2go":
        collection = "PermanentBookings"
    enjoy_bookings = sf.setup_mongodb(collection)

    print("***********************")
    print("city", gv.city)
    print(
        "initDate ",
        datetime.datetime.fromtimestamp(int(
            gv.initDate)).strftime('%Y-%m-%d %H:%M:%S'))
    print(
        "fianlDate",
        datetime.datetime.fromtimestamp(int(
            gv.finalDate)).strftime('%Y-%m-%d %H:%M:%S'))
    print("***********************")

    bookings = enjoy_bookings.find({
        "city": gv.city,
        "init_time": {
            "$gt": gv.initDate,
            "$lt": gv.finalDate
        }
    })

    bookings_df = pd.DataFrame(list(bookings))

    if gv.city == "Vancouver":
        bookings_df["init_time"] = bookings_df["init_time"].sub(25200)
        bookings_df["final_time"] = bookings_df["final_time"].sub(25200)

    bookings_df.to_pickle('../input/bookings_' + gv.city)

    bookings_df[
        "duration"] = bookings_df["final_time"] - bookings_df["init_time"]
    bookings_df["duration"] = bookings_df["duration"].astype(int)
    bookings_df = bookings_df.drop('driving', 1)

    bookings_df['type'] = bookings_df.origin_destination.apply(
        lambda x: x['type'])
    bookings_df['coordinates'] = bookings_df.origin_destination.apply(
        lambda x: x['coordinates'])
    bookings_df = bookings_df.drop('origin_destination', 1)

    bookings_df['end'] = bookings_df.coordinates.apply(lambda x: x[0])
    bookings_df['start'] = bookings_df.coordinates.apply(lambda x: x[1])
    bookings_df = bookings_df.drop('coordinates', 1)

    bookings_df['start_lon'] = bookings_df.start.apply(lambda x: float(x[0]))
    bookings_df['start_lat'] = bookings_df.start.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('start', 1)

    bookings_df['end_lon'] = bookings_df.end.apply(lambda x: float(x[0]))
    bookings_df['end_lat'] = bookings_df.end.apply(lambda x: float(x[1]))
    bookings_df = bookings_df.drop('end', 1)

    bookings_df['distance'] = bookings_df.apply(
        lambda x: sf.haversine(float(x['start_lon']), float(x['start_lat']),
                               float(x['end_lon']), float(x['end_lat'])),
        axis=1)

    bookings_df = bookings_df[bookings_df["distance"] >= 700]
    bookings_df = bookings_df[bookings_df["duration"] >= 120]
    bookings_df = bookings_df[bookings_df["duration"] <= 3600]

    if gv.city == "Torino":
        bookings_df = bookings_df[bookings_df["start_lon"] <= 7.8]

    return bookings_df
Beispiel #4
0
def main():


    collection="enjoy_PermanentParkings"
    if gv.provider == "car2go":
        collection = "PermanentParkings"
        
    collection_parkings = sf.setup_mongodb(collection)
    parkings = collection_parkings.find({"city": gv.city, 
                                         "init_time": {"$gt": gv.initDate, 
                                                       "$lt": gv.finalDate}
                                         })
    parkigns2 = parkings.clone()

    
    if gv.fleetSize.isnumeric():
        realFleetSize = int(gv.fleetSize)
    else :
        df = pd.DataFrame(list(parkigns2))
        df["DailyDate"] = df["init_date"].apply(lambda x : x.strftime("%Y/%m/%d"))
        carsPerDay = df.groupby('DailyDate')["plate"].nunique()
#        carsPerDay = pd.Series(11)
        if gv.fleetSize == "mean":
            realFleetSize = int(round(carsPerDay.mean()))
            
        elif gv.fleetSize == "max":
            realFleetSize = int(carsPerDay.max())

        elif gv.fleetSize == "min":
            realFleetSize = int(carsPerDay.min())

        else:
            print("CCID, ERROR wrong fleetSize Value: "+str(gv.fleetSize))
            return  -1
        
    parkigns2.close()
    print ("CCID, realFleetSize:",str(realFleetSize), "gv.fleetSize:", str(gv.fleetSize))
    
    currentFleetSize = 0
    for val in parkings:
            coords = val['loc']['coordinates']
            lon1 = coords[0]
            lat1 = coords[1]
            #d = haversine(baselon, baselat, lon1, lat1)
            # if( checkPerimeter(lat1, lon1) or
            #    (provider == "car2go" and checkCasellePerimeter(lat1, lon1)) and
            #     currentFleetSize <= FleetSize):
            if currentFleetSize <  realFleetSize:
                if sf.checkPerimeter(lat1, lon1):
                    if val['plate'] not in dict_plates:
                        dict_plates[val['plate']] = PlatesData(val['init_time'], val["loc"]['coordinates'])
                        currentFleetSize += 1

                    else:
                        if dict_plates[val['plate']].timestamp >= val['init_time']: #se non erano in ordine nel dataset iniziale
                            dict_plates[val['plate']] = PlatesData(val['init_time'], val["loc"]['coordinates'])
                else:
                    print("problem")
            else :
                print("CCID, len dict_plates:" + str(len(dict_plates)) + "FleetSize:" + str(realFleetSize))
                print("CCID, cfs", currentFleetSize)
                break
    

    print("CCID", "Seen cars:", len(dict_plates))
    print("cfs", currentFleetSize)
    
    with open("../input/"+ gv.city + "_" + gv.provider + "_plates_appeareance_obj.pkl", 'wb') as handle:
        pickle.dump(dict_plates, handle)

    print ("CCID, col:", gv.NColumns, " row:", gv.NRows)
    print ("CCID, End\n")
    return