Example #1
0
    def runPipeline(self):
        for user_uuid in get_recommender_uuid_list():
            recommend_trips = []
	    # Converting to a list because otherwise, when we prepare feature
	    # vectors, the iterator is all used up
            trips_to_improve = list(self.get_trips_to_improve(user_uuid))
            alternatives = atm.get_alternative_for_trips(trips_to_improve)
            trips_with_alts = self.prepare_feature_vectors(trips_to_improve, alternatives)
            logging.debug("trips_with_alts = %s" % trips_with_alts)
            user_model = self.get_selected_user_utility_model(user_uuid, trips_to_improve)
            if user_model:
                for trip_to_improve in trips_to_improve:
                    logging.debug("user_model = %s" % user_model)
                    original_trip = trip_to_improve
                    if (len(list(atm.get_alternative_for_trip(original_trip))) == 0):
                        logging.debug("trip = %s has no alternatives, skipping..." % original_trip._id)
                        continue;
                    logging.debug("considering for recommendation, original trip = %s " % original_trip.__dict__)
                    recommended_trip = user_model.predict(original_trip)
                    logging.debug("recommended_trip = %s" % recommended_trip)
                    if original_trip != recommended_trip:
                        logging.debug("recommended trip is different, setting it")
                        original_trip.mark_recommended(recommended_trip)
                        recommend_trips.append(recommended_trip)
                    else: 
                        logging.debug("Original Trip is best")
            else:
                logging.debug("No user model found, skipping")
  def predict(self, trip):
    alts = list(atm.get_alternative_for_trip(trip))
    trip_features, labels = self.extract_features_for_trips([trip])
    trip_features = np.nan_to_num(trip_features)
    trip_features[np.abs(trip_features) < .001] = 0 
    trip_features[np.abs(trip_features) > 1000000] = 0 
    logging.debug("trip_features = %s" % trip_features)
    nonzero = ~np.all(trip_features==0, axis=1)
    logging.debug("nonzero count = %d" % np.count_nonzero(nonzero))
    trip_features = trip_features[nonzero]
    labels = labels[nonzero]      
    logging.debug("len(labels) = %d, len(alts) = %d" % (len(labels), len(alts)))
    # assert(len(labels) == len(alts) + 1)

    best_trip = None
    best_utility = float("-inf")
    for i, trip_feature in enumerate(trip_features):
        utility = self.predict_utility(trip_feature)
        if utility > best_utility:
                best_trip = i 
                best_utility = utility
    if labels[best_trip] == 1:
        logging.debug("Model predicts best trip is: ORIGINAL TRIP (%d)" % best_trip)
    else: 
        logging.debug("Model predicts best trip is: Alternative TRIP (%d)" % best_trip)
    if best_trip == 0:
        # Isn't this the same as the earlier check for labels[i]?
        logging.debug("labels[best_trip] == %d" % labels[best_trip])
        return trip
    else:
        logging.debug("best_trip = %s, out of %d alternatives " % (best_trip, len(alts)))
	logging.debug("corresponding alternative = %s" % (alts[best_trip-1]))
        return alts[best_trip-1]
    def extract_features_for_trips(self, trips):
        '''
       For the specified set of trips, retrieve alternatives and generate all
       their features. The alternatives are computed in here instead of outside
       in order to keep the number of outstanding connections to the database low.
       Otherwise, we keep a connection open for every trip in the trip list.
    '''
        logging.debug("about to get num_features from %s" % self.feature_list)
        num_features = len(self.feature_list)
        logging.debug("num_features = %d " % num_features)
        feature_vector = np.zeros(
            (len(trips * (self.num_alternatives + 1)), num_features))
        label_vector = np.zeros(len(trips * (self.num_alternatives + 1)))
        logging.debug("creating label_vector with size %d" %
                      len(trips * (self.num_alternatives + 1)))
        logging.debug("after creation, len = %d, size = %d" %
                      (len(label_vector), label_vector.size))
        sample = 0
        for trip in trips:
            logging.debug("Considering trip %s" % trip._id)
            alt = list(atm.get_alternative_for_trip(trip))
            if len(alt) > 0:
                feature_vector[sample] = self._extract_features(trip)
                label_vector[sample] = 1
                sample += 1
                logging.debug("original sample = %d" % sample)
                for _alt in alt:
                    feature_vector[sample] = self._extract_features(_alt)
                    label_vector[sample] = 0
                    sample += 1
                    logging.debug("Alt: %d" % sample)
            else:
                logging.debug("No alternatives found for trip %s, skipping " %
                              trip._id)

# Close the connection to the database after reading all the alternatives
            try:
                alt.close()
            except AttributeError:
                logging.debug("Non cursor iterator, skipping close")
        logging.debug("Returning feature vector = %s" % feature_vector)
        return (feature_vector, label_vector)
    def predict(self, trip):
        alts = list(atm.get_alternative_for_trip(trip))
        trip_features, labels = self.extract_features_for_trips([trip])
        trip_features = np.nan_to_num(trip_features)
        trip_features[np.abs(trip_features) < .001] = 0
        trip_features[np.abs(trip_features) > 1000000] = 0
        logging.debug("trip_features = %s" % trip_features)
        nonzero = ~np.all(trip_features == 0, axis=1)
        logging.debug("nonzero count = %d" % np.count_nonzero(nonzero))
        trip_features = trip_features[nonzero]
        labels = labels[nonzero]
        logging.debug("len(labels) = %d, len(alts) = %d" %
                      (len(labels), len(alts)))
        # assert(len(labels) == len(alts) + 1)

        best_trip = None
        best_utility = float("-inf")
        for i, trip_feature in enumerate(trip_features):
            utility = self.predict_utility(trip_feature)
            if utility > best_utility:
                best_trip = i
                best_utility = utility
        if labels[best_trip] == 1:
            logging.debug("Model predicts best trip is: ORIGINAL TRIP (%d)" %
                          best_trip)
        else:
            logging.debug(
                "Model predicts best trip is: Alternative TRIP (%d)" %
                best_trip)
        if best_trip == 0:
            # Isn't this the same as the earlier check for labels[i]?
            logging.debug("labels[best_trip] == %d" % labels[best_trip])
            return trip
        else:
            logging.debug("best_trip = %s, out of %d alternatives " %
                          (best_trip, len(alts)))
            logging.debug("corresponding alternative = %s" %
                          (alts[best_trip - 1]))
            return alts[best_trip - 1]
  def extract_features_for_trips(self, trips):
    '''
       For the specified set of trips, retrieve alternatives and generate all
       their features. The alternatives are computed in here instead of outside
       in order to keep the number of outstanding connections to the database low.
       Otherwise, we keep a connection open for every trip in the trip list.
    '''
    logging.debug("about to get num_features from %s" % self.feature_list)
    num_features = len(self.feature_list)
    logging.debug("num_features = %d " % num_features)
    feature_vector = np.zeros((len(trips * (self.num_alternatives+1)), num_features)) 
    label_vector = np.zeros(len(trips * (self.num_alternatives+1)))
    logging.debug("creating label_vector with size %d" % len(trips * (self.num_alternatives+1)))
    logging.debug("after creation, len = %d, size = %d" % (len(label_vector), label_vector.size))
    sample = 0
    for trip in trips:
        logging.debug("Considering trip %s" % trip._id)
        alt = list(atm.get_alternative_for_trip(trip))
        if len(alt) > 0:
            feature_vector[sample] = self._extract_features(trip)
            label_vector[sample] = 1
            sample += 1
            logging.debug("original sample = %d" % sample)
            for _alt in alt:
                feature_vector[sample] = self._extract_features(_alt)
                label_vector[sample] = 0
                sample += 1
                logging.debug("Alt: %d" % sample)
        else:
            logging.debug("No alternatives found for trip %s, skipping " % trip._id)
	# Close the connection to the database after reading all the alternatives
        try:
	    alt.close()
        except AttributeError:
            logging.debug("Non cursor iterator, skipping close")
    logging.debug("Returning feature vector = %s" % feature_vector)
    return (feature_vector, label_vector)