コード例 #1
0
ファイル: all.py プロジェクト: Ohohcakester/cs4246-project
def computeAreaDensity(zCoord, trainTags, focusPoints, testTimes):
    """
    Takes in tags and compute densities for all the users
    for the focus points specified

    Note: This only needs to be run once per run, no need to do bayes opt
    so maybe we can preprocess and store in file?
    --
    trainTags: list of user tags
    focusPoints: list of focus points
    [(a, b), (c, d), (e, f)] where a through f are floats
    --
    Returns: dict {timestamp: densityDistribution}
    """
    print 'Generating test cases and computing shortest paths...'
    dfFloor18 = generateTestCases(focusPoints, trainTags, level=zCoord)
    dfFormattedFloor18 = formatDf(dfFloor18)

    uniqueUserID = dfFormattedFloor18['USER'].unique()
    dfFormattedFloor18['SHORTEST_PATHS'] = dfFormattedFloor18['SHORTEST_PATHS'].str.split(',')

    result = pd.DataFrame()

    #result = bayes.predictGP(dfFormattedFloor18[dfFormattedFloor18['USER'] == uniqueUserID[0]], testTimes)

    densityDist = None
    numOfSkippedFile = 0

    # Iterate through each user
    count = 0
    for user in uniqueUserID:
        print 'Computing GP', count, 'of', len(uniqueUserID), '. USER==', user
        count += 1

        userResult = bayes.predictGP(
            dfFormattedFloor18[dfFormattedFloor18['USER'] == user],
            testTimes)

        userDensityDist = computeDensity(userResult, focusPoints, level=zCoord)

        if densityDist is None:
            densityDist = userDensityDist
        else:
            for timestamp in userDensityDist:
                # All timestamps the same
                # Add points
                userDensityDistTimestamp = userDensityDist[timestamp]
                densityDistTimestamp = densityDist[timestamp]
                userDensityDistTimestampPoints = userDensityDistTimestamp.getPoints()
                for point in userDensityDistTimestampPoints:
                    densityDistPoint = densityDistTimestamp.query(point)

                    if np.isnan(densityDistPoint):
                        print 'NAN DETECTED'
                        densityDistPoint = 0

                    addedProb = densityDistTimestamp.query(point) + userDensityDistTimestamp.query(point)
                    densityDist[timestamp].setPoint(point, addedProb)

    return densityDist, numOfSkippedFile
コード例 #2
0
ファイル: all.py プロジェクト: Ohohcakester/cs4246-project
def runBayes(df, testTimes):
    """
    Takes in a dataframe of ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] and
    performs a GP regression and a GP prediction
    ---
    df: dataframe output from formatDf method
    ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] where
    'SHORTEST_PATHS' is a series of 3-tuples, each of which contain shortest
    distances of each of the 3 selected points
    ---
    Returns:
    A DataFrame of ['TIMESTAMP', 'MU', 'VAR'] for all users concatenated
    'MU' and 'VAR' both contains series of 3-tuples
    """

    print 'NOT BEING USED'
    quit()
    df['SHORTEST_PATHS'] = df['SHORTEST_PATHS'].str.split(',')
    uniqueUserID = df['USER'].unique()

    result = pd.DataFrame()

    result = bayes.predictGP(df[df['USER'] == uniqueUserID[0]], testTimes)
    '''
    # Iterate through each user and regress then concat the output
    for user in uniqueUserID:
        userResult = bayes.predictGP(df[df['USER'] == user], testTimes)
        result = pd.concat([result, userResult])
    '''

    return result
コード例 #3
0
ファイル: all.py プロジェクト: Ohohcakester/cs4246-project
def runBayes(df, testTimes):
    """
    Takes in a dataframe of ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] and
    performs a GP regression and a GP prediction
    ---
    df: dataframe output from formatDf method
    ['TIMESTAMP', 'USER', 'Z', SHORTEST_PATHS'] where
    'SHORTEST_PATHS' is a series of 3-tuples, each of which contain shortest
    distances of each of the 3 selected points
    ---
    Returns:
    A DataFrame of ['TIMESTAMP', 'MU', 'VAR'] for all users concatenated
    'MU' and 'VAR' both contains series of 3-tuples
    """

    print 'NOT BEING USED'; quit()
    df['SHORTEST_PATHS'] = df['SHORTEST_PATHS'].str.split(',')
    uniqueUserID = df['USER'].unique()

    result = pd.DataFrame()

    result = bayes.predictGP(df[df['USER'] == uniqueUserID[0]], testTimes)

    '''
    # Iterate through each user and regress then concat the output
    for user in uniqueUserID:
        userResult = bayes.predictGP(df[df['USER'] == user], testTimes)
        result = pd.concat([result, userResult])
    '''

    return result
コード例 #4
0
ファイル: all.py プロジェクト: Ohohcakester/cs4246-project
def computeAreaDensity(zCoord, trainTags, focusPoints, testTimes):
    """
    Takes in tags and compute densities for all the users
    for the focus points specified

    Note: This only needs to be run once per run, no need to do bayes opt
    so maybe we can preprocess and store in file?
    --
    trainTags: list of user tags
    focusPoints: list of focus points
    [(a, b), (c, d), (e, f)] where a through f are floats
    --
    Returns: dict {timestamp: densityDistribution}
    """
    print 'Generating test cases and computing shortest paths...'
    dfFloor18 = generateTestCases(focusPoints, trainTags, level=zCoord)
    dfFormattedFloor18 = formatDf(dfFloor18)

    uniqueUserID = dfFormattedFloor18['USER'].unique()
    dfFormattedFloor18['SHORTEST_PATHS'] = dfFormattedFloor18[
        'SHORTEST_PATHS'].str.split(',')

    result = pd.DataFrame()

    #result = bayes.predictGP(dfFormattedFloor18[dfFormattedFloor18['USER'] == uniqueUserID[0]], testTimes)

    densityDist = None
    numOfSkippedFile = 0

    # Iterate through each user
    count = 0
    for user in uniqueUserID:
        print 'Computing GP', count, 'of', len(uniqueUserID), '. USER==', user
        count += 1

        userResult = bayes.predictGP(
            dfFormattedFloor18[dfFormattedFloor18['USER'] == user], testTimes)

        userDensityDist = computeDensity(userResult, focusPoints, level=zCoord)

        if densityDist is None:
            densityDist = userDensityDist
        else:
            for timestamp in userDensityDist:
                # All timestamps the same
                # Add points
                userDensityDistTimestamp = userDensityDist[timestamp]
                densityDistTimestamp = densityDist[timestamp]
                userDensityDistTimestampPoints = userDensityDistTimestamp.getPoints(
                )
                for point in userDensityDistTimestampPoints:
                    densityDistPoint = densityDistTimestamp.query(point)

                    if np.isnan(densityDistPoint):
                        print 'NAN DETECTED'
                        densityDistPoint = 0

                    addedProb = densityDistTimestamp.query(
                        point) + userDensityDistTimestamp.query(point)
                    densityDist[timestamp].setPoint(point, addedProb)

    return densityDist, numOfSkippedFile