Beispiel #1
0
    def predict(self, user_id, item_id):
        above = []
        below = []
        neighbourhood = self.knn.get_user_neighbourhood(user_id=user_id)

        for u in neighbourhood:
            if item_id not in self.db.users[u[0]].ratings:
                # ignoring user with not rating for a given movie
                continue
            # avg rating for user uj
            ruj = self.get_user_rating_avg(u[0])

            # rating for item_id for user uj
            rating_uj = self.db.users[u[0]].ratings[item_id].rating

            # sim between uj and user_id
            sim = self.knn.get_user_similarity(user_id_1=user_id,
                                               user_id_2=u[0])

            above.append((rating_uj - ruj) * sim)
            below.append(abs(sim))

        if sum(below) == 0:
            return Prediction(user_id, item_id, 0)

        else:
            predicted = sum(above) / sum(below)
            return Prediction(user_id, item_id, predicted)
Beispiel #2
0
    def predict(self, user_id, item_id):
        """
        Based on different  metrics
        """
        neighbourhood = self.knn.get_user_neighbourhood(user_id=user_id)

        above = []
        below = []

        for u in neighbourhood:
            if item_id not in self.db.users[u[0]].ratings:
                # ignoring user with not rating for a given movie
                continue
            weighted_score = self.normalize(user_id, u[0])

            above.append(weighted_score *
                         self.db.users[u[0]].ratings[item_id].rating)
            below.append(weighted_score)

        if sum(below) == 0:
            return Prediction(user_id, item_id, 0)

        else:
            predicted = sum(above) / sum(below)
            return Prediction(user_id, item_id, predicted)
Beispiel #3
0
async def make_prediction(location, to_predict):
    """
    choices for to_predict are "number_attendees" or "number_applications"
    """
    event_objects = EventInformation.query.filter_by(location=location).all()
    if len(event_objects
           ) < 12:  #there isn't enough data for a prediction in this case
        return
    print("completed lookup")
    df = pd.DataFrame()
    for event_object in event_objects:
        if to_predict == "number_attendees":
            df = df.append(
                {
                    "Date": event_object.timestamp,
                    "number_attendees": float(event_object.number_attendees)
                },
                ignore_index=True)
        else:
            df = df.append(
                {
                    "Date": event_object.timestamp,
                    "number_applications": float(
                        event_object.number_applications)
                },
                ignore_index=True)
    # sanity checking this is a datetime
    print("created dataframe")
    df["Date"] = pd.to_datetime(df["Date"])
    df = df.set_index("Date")
    df.sort_index(inplace=True)
    print("completed dataframe sorting")
    result = trend_predict(df, to_predict)
    print("finished prediction")
    if result:
        date_range, forecast, trend = result
    else:
        return
    print("got result")
    for i in range(len(forecast[0])):
        predicted = Prediction(location=location,
                               lower_bound=forecast[2][i][0],
                               prediction=forecast[0][i],
                               upper_bound=forecast[2][i][1],
                               timestamp=date_range[i],
                               prediction_type=to_predict)

        db.session.add(predicted)
        db.session.commit()
        print("saved fitted values")
        print("finished Fitted values")
        for ind in range(len(trend)):
            trend_elem = Trend(location=location,
                               data=trend[ind],
                               timestamp=trend.index[ind],
                               data_type=to_predict)
            db.session.add(trend_elem)
            db.session.commit()
        print("Saved trend results")
Beispiel #4
0
def runStats():
    curDT = datetime.datetime.now()
    originLat = float(os.environ['LATORG'])
    originLong = float(os.environ['LONORG'])
    limitLat = float(os.environ['LATSTOP'])
    limitLong = float(os.environ['LONSTOP'])
    bbsize = float(os.environ['BBSIZE'])
    NSBoxes = (originLat - limitLat) / bbsize
    EWBoxes = (originLong - limitLong) / bbsize
    crimeCounts = []

    for x in np.arange(0, np.absolute(NSBoxes)):
        # X loop
        for y in np.arange(0, np.absolute(EWBoxes)):
            # Y loop
            p1x = originLat - (x * bbsize)
            p1y = originLong + (y * bbsize)
            p2x = originLat - (x * bbsize) - bbsize
            p2y = originLong + (y * bbsize)
            p3x = originLat - (x * bbsize) - bbsize
            p3y = originLong + (y * bbsize) + bbsize
            p4x = originLat - (x * bbsize)
            p4y = originLong + (y * bbsize) + bbsize
            # Create a selection box to get all crimes in a certain region
            box = "POLYGON((" + str(p1x) + " " + str(p1y) + ", " + str(p2x) + " " + str(p2y) + ", " + str(p3x) + " " + str(p3y) \
                           + ", " + str(p4x) + " " + str(p4y) + ", " + str(p1x) + " " + str(p1y) +  "))"
            result = db.session.query(Incident).filter(
                Incident.location.contained(box)).count()
            crimeCounts.append(result)

    # Run stats over the array, most likely just find max for normalization heat map
    maxCrimes = max(crimeCounts)

    countIndex = 0
    for x in np.arange(0, np.absolute(NSBoxes)):
        # X loop
        for y in np.arange(0, np.absolute(EWBoxes)):
            # Y loop
            # Create predictions for each grid location
            # Create central point
            x = ((originLat - (x * bbsize)) + (originLat -
                                               (x * bbsize) - bbsize)) / 2
            y = ((originLong + (y * bbsize)) + (originLong +
                                                (y * bbsize) + bbsize)) / 2
            pred = Prediction()
            pred.certainty = crimeCounts[countIndex] / maxCrimes
            pred.countIndex = countIndex + 1
            pred.type = 'general'
            pred.precog = 'basic_stats'
            pred.datetime = curDT
            pred.location = "POINT( " + str(x) + " " + str(y) + " )"
            db.session.add(pred)
            #			print("POINT( " + str(x) + " " + str(y) + " )")
            #			print(crimeCounts[countIndex] / maxCrimes)
            countIndex = countIndex + 1
    db.session.commit()
Beispiel #5
0
def customer_count():
    businessID = request.args.get('businessID', default=None)
    if businessID is None:
        return 'businessID not found'

    new_count = request.args.get('new_count', default=None)
    if new_count is None:
        return 'new_count not found'

    new_entry = CustomerCount(business_id=businessID, customer_count=new_count)
    print(new_entry)
    db.session.add(new_entry)
    db.session.commit()

    # get a max of the last 50 customer inputs
    user_inputs = UserInput.query.filter_by(business_id=businessID).order_by(
        UserInput.timestamp.desc()).limit(50).all()
    [print(x) for x in user_inputs]

    # calculate service rate per person for each user input
    total_waits = []
    for inputs in user_inputs:
        # get the time the user got in line
        start_time = calc_in_line_time(inputs.timestamp, inputs.wait_duration)

        # get the line length for when the user got in line
        count = CustomerCount.query.filter(
            (CustomerCount.business_id == businessID)
            & (CustomerCount.timestamp <= start_time)).first()

        if count is not None:
            # calculate the rate per person
            service_rate = inputs.wait_duration / count.customer_count

            total_waits.append(service_rate)
        else:
            print('[debug] count is none')

    print(total_waits)
    # calculate line stats
    stats = line_stats(total_waits)

    print(stats)

    # predicted wait = (avg. time/person) * people
    estimate = stats[0] * float(new_count)

    # create the prediction
    new_prediction = Prediction(business_id=businessID,
                                wait_time=estimate,
                                wait_time_stddev=stats[1])

    db.session.add(new_prediction)
    db.session.commit()
    return 'OK'
Beispiel #6
0
 def test_follow_predictions(self):
     # create four users
     u1 = User(username='******', email='*****@*****.**')
     u2 = User(username='******', email='*****@*****.**')
     u3 = User(username='******', email='*****@*****.**')
     u4 = User(username='******', email='*****@*****.**')
     db.session.add_all([u1, u2, u3, u4])
     # create four predictions
     now = datetime.utcnow()
     p1 = Prediction(body="pred from john",
                     author=u1,
                     timestamp=now + timedelta(seconds=1))
     p2 = Prediction(body="pred from susan",
                     author=u2,
                     timestamp=now + timedelta(seconds=4))
     p3 = Prediction(body="pred from mary",
                     author=u3,
                     timestamp=now + timedelta(seconds=3))
     p4 = Prediction(body="pred from david",
                     author=u4,
                     timestamp=now + timedelta(seconds=2))
     db.session.add_all([p1, p2, p3, p4])
     db.session.commit()
     # setup the followers
     u1.follow(u2)  # john follows susan
     u1.follow(u4)  # john follows david
     u2.follow(u3)  # susan follows mary
     u3.follow(u4)  # mary follows david
     db.session.commit()
     # check the followed posts of each user
     f1 = u1.followed_predictions().all()
     f2 = u2.followed_predictions().all()
     f3 = u3.followed_predictions().all()
     f4 = u4.followed_predictions().all()
     self.assertEqual(f1, [p2, p4, p1])
     self.assertEqual(f2, [p2, p3])
     self.assertEqual(f3, [p3, p4])
     self.assertEqual(f4, [p4])
Beispiel #7
0
    def predict(self, user_id, item_id, threshold=1):
        """
        Simple Baseline predictor.
        Returns the average rating for an item_id, excluding
        the rating given by user_id if exists.
        """
        ratings = [
            item.rating for item in self.db.movies[item_id].ratings.values()
            if item.user_id != user_id
        ]

        # if not enough ratings to compute avg, prediciton can not be done
        predicted = 0
        if len(ratings) >= threshold:
            predicted = statistics.mean(ratings)
        return Prediction(user_id, item_id, predicted)
Beispiel #8
0
def commitToDB(rf):
    curDT = datetime.datetime.now()
    #        tryears = [2001,2002,2003,2004,2005,2006]
    #        tyears = [2007,2008,2009,2010]
    tryears = [2001, 2002]
    tyears = [2003, 2004]
    years = tryears + tyears
    months = range(1, 13)
    originLat = float(os.environ['LATORG'])
    originLong = float(os.environ['LONORG'])
    limitLat = float(os.environ['LATSTOP'])
    limitLong = float(os.environ['LONSTOP'])
    bbsize = float(os.environ['BBSIZE'])
    bbsize = 0.007
    NSBoxes = (originLat - limitLat) / bbsize
    EWBoxes = (originLong - limitLong) / bbsize
    DT = datetime.datetime.now()
    countIndex = 0
    run = PreCogRun()
    run.type = "thefts by box-months using random forest"
    run.precog = "MRFT"
    run.datetime = DT
    db.session.add(run)
    db.session.flush()
    db.session.refresh(run)
    for x in np.arange(0, np.absolute(NSBoxes)):
        # X loop
        for y in np.arange(0, np.absolute(EWBoxes)):
            for year in years:
                for month in months:
                    centerLat = ((originLat - (x * bbsize)) +
                                 (originLat - (x * bbsize) - bbsize)) / 2
                    centerLon = ((originLong + (y * bbsize)) +
                                 (originLong + (y * bbsize) + bbsize)) / 2
                    pred = Prediction()
                    predic = rf.predict([[centerLon, centerLat, year,
                                          month]])[0]
                    pred.precogrun = run.ID
                    pred.certainty = predic
                    pred.countIndex = countIndex + 1
                    #                            pred.type = 'general'
                    #                            pred.precog = 'basic_ml'
                    pred.datetime = datetime.datetime(year, month, 1)
                    pred.location = "POINT( " + str(centerLon) + " " + str(
                        centerLat) + " )"
                    db.session.add(pred)
                    countIndex = countIndex + 1
    db.session.commit()
Beispiel #9
0
def predict():
    form = PredictionForm()
    if form.validate_on_submit():
        for image in form.image.data:
            if image:
                save_prediction_image(image)
                image_path = os.path.join(current_app.root_path,
                                          'static/prediction_images',
                                          image.filename)
                prediction = predict_image(image_path)
                full_prediction = Prediction(
                    image_file=image.filename,
                    user_id=current_user.id,
                    class_id=prediction['class_id'],
                    probability=prediction['probability'])
                db.session.add(full_prediction)
        db.session.commit()
        return redirect(url_for('main.index'))
    return render_template('predict.html', title='Predict', form=form)
Beispiel #10
0
def createDummyPredictions(num):
    DT = datetime.datetime.now()
    countIndex = 0
    run = PreCogRun()
    run.type = "JSON TEST DATA ONLY"
    run.precog = "dumb"
    run.datetime = DT
    db.session.add(run)
    db.session.flush()
    db.session.refresh(run)
    for i in range(0, num):
        pred = Prediction()
        pred.precogrun = run.ID
        pred.certainty = round(random.uniform(0, 1), 2)
        pred.countIndex = countIndex + 1
        #       pred.type = 'general'
        #       pred.precog = 'basic_ml'
        pred.datetime = datetime.datetime(2018, 1, 1)
        pred.location = "POINT( " + str(
            round(random.uniform(-87.958428, -87.503532), 6)) + " " + str(
                round(random.uniform(41.640071, 42.029866), 6)) + " )"
        db.session.add(pred)
        countIndex = countIndex + 1
    db.session.commit()
Beispiel #11
0
def submit_prediction():
    form = UploadResultForm()
    teams = [t.name for t in Team.query.all()]
    print(form.data)
    if request.method == 'POST' and request.form[
            'team'] is not None and form.validate_on_submit():
        team_name = request.form['team']
        team = Team.query.filter_by(name=team_name).first()
        if team is None:
            flash('The team does not exist')
        else:
            if team.number_of_submissions_last_24hours(
            ) >= app.config['MAX_TEAM_SUBMISSIONS_PER_DAY']:
                flash('Max number of submissions in the past 24 hours hit')
            else:
                file_libertyUS = form.file_libertyUS.data
                file_libertySpain = form.file_libertySpain.data
                if file_libertySpain.filename == "" and file_libertyUS.filename == "":
                    flash("Please submit at least one file")
                if file_libertySpain.filename == "":
                    if allowed_file(file_libertyUS.filename):
                        try:
                            score_LibertyUs = get_avg_precision_score_from_file(
                                file_libertyUS, team_name, True)
                            prediction = Prediction(
                                team_id=team.id,
                                file_name_LibertyUs=file_libertyUS.filename,
                                file_name_LibertySpain="",
                                score_LibertyUs=score_LibertyUs,
                                score_LibertySpain=0)
                            db.session.add(prediction)
                            db.session.commit()
                            return redirect(url_for('leaderboard'))
                        except Exception as e:
                            flash(str(e))
                if file_libertyUS.filename == "":
                    if allowed_file(file_libertySpain.filename):
                        try:
                            score_LibertySpain = get_avg_precision_score_from_file(
                                file_libertySpain, team_name)
                            prediction = Prediction(
                                team_id=team.id,
                                file_name_LibertyUs="",
                                file_name_LibertySpain=file_libertySpain.
                                filename,
                                score_LibertyUs=0,
                                score_LibertySpain=score_LibertySpain)
                            db.session.add(prediction)
                            db.session.commit()
                            return redirect(url_for('leaderboard'))
                        except Exception as e:
                            flash(str(e))
                if allowed_file(file_libertyUS.filename) and allowed_file(
                        file_libertySpain.filename):
                    try:
                        score_LibertyUs = get_avg_precision_score_from_file(
                            file_libertyUS, team_name, True)
                        score_LibertySpain = get_avg_precision_score_from_file(
                            file_libertySpain, team_name)
                        prediction = Prediction(
                            team_id=team.id,
                            file_name_LibertyUs=file_libertyUS.filename,
                            file_name_LibertySpain=file_libertySpain.filename,
                            score_LibertyUs=score_LibertyUs,
                            score_LibertySpain=score_LibertySpain)
                        db.session.add(prediction)
                        db.session.commit()
                        return redirect(url_for('leaderboard'))
                    except Exception as e:
                        flash(str(e))
                else:
                    flash('The file is not a csv')

    return render_template('submit_predictions.html',
                           title='Submit predictions',
                           teams=teams,
                           form=form)
Beispiel #12
0
def runStats():
    curDT = datetime.datetime.now()

    #years = [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018]
    years = [2001, 2002]
    months = range(1, 5)
    originLat = float(os.environ['LATORG'])
    originLong = float(os.environ['LONORG'])
    limitLat = float(os.environ['LATSTOP'])
    limitLong = float(os.environ['LONSTOP'])
    bbsize = float(os.environ['BBSIZE'])
    bbsize = 0.007
    NSBoxes = (originLat - limitLat) / bbsize
    EWBoxes = (originLong - limitLong) / bbsize
    crimeCounts = []
    boxes = []
    #X, y = make_regression(n_features=4, n_informative=4,random_state=42, shuffle=False)
    NSBoxes = (originLat - limitLat) / bbsize
    EWBoxes = (originLong - limitLong) / bbsize
    crimeCounts = []
    boxes = []
    totalCrimes = 0
    j = 0
    maxCrimes = 0
    start = time.time()
    for x in np.arange(0, np.absolute(NSBoxes)):
        # X loop
        for y in np.arange(0, np.absolute(EWBoxes)):
            j += 1
            p1x = originLat - (x * bbsize)
            p1y = originLong + (y * bbsize)
            p2x = originLat - (x * bbsize) - bbsize
            p2y = originLong + (y * bbsize)
            p3x = originLat - (x * bbsize) - bbsize
            p3y = originLong + (y * bbsize) + bbsize
            p4x = originLat - (x * bbsize)
            p4y = originLong + (y * bbsize) + bbsize
            # Create a selection box to get all crimes in a certain region
            box = "POLYGON((" + str(p1x) + " " + str(p1y) + ", " + str(p2x) + " " + str(p2y) + ", " + str(p3x) + " " + str(p3y) \
                    + ", " + str(p4x) + " " + str(p4y) + ", " + str(p1x) + " " + str(p1y) +  "))"
            x = ((originLat - (x * bbsize)) + (originLat -
                                               (x * bbsize) - bbsize)) / 2
            y = ((originLong + (y * bbsize)) + (originLong +
                                                (y * bbsize) + bbsize)) / 2
            print("box number ", j)
            print("max count ", maxCrimes / 88)
            print("total crimes ", totalCrimes)
            boxResult = db.session.query(Incident).filter(
                Incident.location.contained(box))
            if (boxResult.count() == 0):
                continue
            for year in years:
                yearResult = boxResult.filter(Incident.year == year)
                for month in months:

                    # Y loop

                    #1310 max per year
                    #result = db.session.query(Incident).filter(and_(Incident.location.contained(box),Incident.year==year))#,func.extract('month',Incident.date)==month))#, Incident.date.type.python_type.month==month)).count()
                    #result = db.session.query(Incident).filter(Incident.location.contained(box)).filter(Incident.year==year).filter(func.extract('month',Incident.date)==month)
                    #result = db.session.query(Incident).filter(and_((Incident.year==year),(func.extract('month',Incident.date)==month),Incident.FBIcode=="06",Incident.location.contained(box)))
                    #print("db says month is ",result[0].date.type.python_type.month)
                    monthResult = yearResult.filter(
                        and_(func.extract('month', Incident.date) == month),
                        Incident.FBIcode == "06")
                    count = monthResult.count()
                    #if count>0:
                    #print("db says month is ",result[0].date.month)
                    #print("python says month is ", month)
                    #                        if result==0:
                    #                            continue
                    #print(Incident.date)

                    boxes.append([x, y, year, month])
                    crimeCounts.append(count / 88)
                    if count > maxCrimes:
                        maxCrimes = count
#                            print("maxcrimes so far is ",maxCrimes)
#                            print("box number ", j)
#                            print("number of crimes is ")
#                            print(count/88)
#                            print("year is ", year, "month is ", month)
                    totalCrimes = totalCrimes + count
                    #print("total crimes ", totalCrimes)


#
#                        current = time.time()
#print("time remaining: ",(((current-start)/totalCrimes)*6600000)-(current-start))

#monthFeaturesfile = TemporaryFile()
#monthOutputsfile = TemporaryFile()
#monthFeatures = np.array(monthResult)
#monthOutputs = np.array(crimeCounts)
#np.save(monthFeaturesfile, monthFeatures)
#np.save(monthOutputsfile, monthOutputs)
# Run stats over the array, most likely just find max for normalization heat map
#maxCrimes = max(crimeCounts)
    regr = RandomForestRegressor()
    features = np.array(boxes[:int(len(boxes) / 10)])
    output = np.array(crimeCounts[:int(len(crimeCounts) / 10)])
    testFeatures = np.array(boxes[int(len(boxes) / 2):])
    testOutput = np.array(crimeCounts[int(len(crimeCounts) / 2):])
    regr.fit(features, output)
    predic = regr.predict(testFeatures)
    joblib.dump(regr, 'precogMonths.joblib')

    variance = sklearn.metrics.explained_variance_score(
        testOutput, predic, sample_weight=None, multioutput='uniform_average')
    #        r2 = sklearn.metrics.r2_score(testOutput, predic, sample_weight=None, multioutput="uniform_average")
    #        r22 = sklearn.metrics.r2_score(testOutput, predic, sample_weight=None, multioutput="raw_values")
    #        r222 = sklearn.metrics.r2_score(testOutput, predic, sample_weight=None, multioutput="variance_weighted")
    #
    #        mae1 = sklearn.metrics.mean_absolute_error(testOutput, predic, sample_weight=None, multioutput="uniform_average")
    #        mae2 = sklearn.metrics.mean_absolute_error(testOutput, predic, sample_weight=None, multioutput="raw_values")
    #        mae3 = sklearn.metrics.mean_absolute_error(testOutput, predic, sample_weight=None, multioutput="variance_weighted")
    #
    #        mse1 = sklearn.metrics.mean_squared_error(testOutput, predic, sample_weight=None, multioutput="uniform_average")
    #        mse2 = sklearn.metrics.mean_squared_error(testOutput, predic, sample_weight=None, multioutput="raw_values")
    #        mse3 = sklearn.metrics.mean_squared_error(testOutput, predic, sample_weight=None, multioutput="variance_weighted")
    #        #accuracy = sklearn.metrics.accuracy_score(testOutput,predic,normalize=True,sample_weight=None)
    #        #print(sklearn.metrics.classification_report(testOutput,predic,labels=None,target_names=None,sample_weight=None,digits=10))
    #        print("variance ", variance)
    #        print("score ", regr.score(testFeatures,testOutput))
    #        print("r2 ", r2)
    #        print("r2 raw ", r22)
    #        print("r2  variance weighted", r222)
    #
    #        print("mae ", mae1)
    #        print("mae raw ", mae2)
    #        print("mae  variance weighted", mae3)
    #
    #        print("mse ", mse1)
    #        print("mse raw ", mse2)
    #        print("mse  variance weighted", mse3)
    #
    #        print("input length ", boxes.__len__())
    #        print("total output length ", crimeCounts.__len__())
    #        print("feature length ", features.__len__())
    #        print("output length ", output.__len__())
    #        print("test feature length ", testFeatures.__len__())
    #        print("test output length ", testOutput.__len__())

    #print("accuracy ", accuracy)
    DT = datetime.datetime.now()
    countIndex = 0
    run = PreCogRun()
    run.type = "thefts by box-months using random forest"
    run.precog = "MRF"
    run.datetime = DT
    db.session.add(run)
    db.session.flush()
    db.session.refresh(run)
    for x in np.arange(0, np.absolute(NSBoxes)):
        # X loop
        for y in np.arange(0, np.absolute(EWBoxes)):
            for year in years:
                for month in months:
                    x = ((originLat - (x * bbsize)) +
                         (originLat - (x * bbsize) - bbsize)) / 2
                    y = ((originLong + (y * bbsize)) +
                         (originLong + (y * bbsize) + bbsize)) / 2
                    pred = Prediction()
                    predic = regr.predict([[x, y, year, month]])[0]
                    pred.precogrun = run.ID
                    pred.certainty = predic
                    pred.countIndex = countIndex + 1
                    pred.type = 'general'
                    pred.precog = 'basic_ml'
                    pred.datetime = datetime.datetime(year, month, 1)
                    pred.location = "POINT( " + str(x) + " " + str(y) + " )"
                    db.session.add(pred)
                    countIndex = countIndex + 1
    db.session.commit()