def loadData() :

    # Load Data
    sql = "SELECT user_handle,course_id,author_handle,level,sum(view_time_seconds) view_time_seconds from user_course_views group by user_handle,course_id,author_handle,level"
    data = db.loadDatabySQL(sql)
    data.head()

    # generate sequence numbers for string column values
    data["course_id"] = le.fit_transform(data["course_id"])
    data["level"] = le.fit_transform(data["level"])

    # Explore Data
    #df = pd.DataFrame(data)

    print ("head : ")
    print ("------")
    print (data[1:5])
    print("")
    print ("shape ")
    print ("------")
    print (data.shape)
    print("")
    print ("Describe Result" )
    print ("---------------")
    print (data.describe())
    print("")
    # Prepare X and Y
    X = data[[ "course_id","level"]]
    y = data["view_time_seconds"]


    #Perform linear regression  using formula api
    df2=pd.DataFrame(X,columns=['course_id','level'])
    df2['view_time_seconds']=pd.Series(y)
    return(df2)
Ejemplo n.º 2
0
def predict_CL():

    print(
        "Running  - IRIS_StudentClassification_KNN_1_IRIS_StudentClassification_KNN_1_CLCL.py"
    )
    # Load Data
    sql = "SELECT user_handle,course_id,author_handle,level,sum(view_time_seconds) view_time_seconds from user_course_views group by user_handle,course_id,author_handle,level"
    df = db.loadDatabySQL(sql)
    df.head()

    # Remove header row
    data = df.iloc[1:]

    # Remove last row as it contains the column header in string format
    data = data.drop(data.index[len(data) - 1])

    data["course_id"] = le.fit_transform(data["course_id"])
    data["level"] = le.fit_transform(data["level"])
    data = data.apply(pd.to_numeric)

    # Prepare X and Y
    iris_X = data[['author_handle', 'course_id']]
    iris_y = data['level']

    # consider 400 rows for taining set and 100 rows of data for test set
    iris_X_train = iris_X[:-400]
    iris_y_train = iris_y[:-400]
    iris_X_test = iris_X[-100:]
    iris_y_test = iris_y[-100:]

    # prepare the KNN classifier model
    from sklearn.neighbors import KNeighborsClassifier

    knn = KNeighborsClassifier()
    knn.fit(iris_X_train, iris_y_train)
    KNeighborsClassifier(algorithm='auto',
                         leaf_size=30,
                         metric='minkowski',
                         metric_params=None,
                         n_jobs=1,
                         n_neighbors=5,
                         p=2,
                         weights='uniform')

    # predict using test set
    pr = knn.predict(iris_X_test)
    #print(pr)

    # display the results
    dfFinal = pd.DataFrame(iris_X_test)
    dfFinal['Actual result'] = iris_y_test
    dfFinal['Predicted Result'] = pr

    # Return first 10 rows for actual and predicted set of rows
    df1 = dfFinal.head()
    htmltext = df1.to_html()
    print(df1)
    return (htmltext)
def loadData() :

    # Read Data
    df = db.loadData("user_assessment_scores")
    df = df.drop(['user_assessment_date'], 1)

    # Remove header row
    df = df.iloc[1:]

    df["assessment_tag"] = le.fit_transform(df["assessment_tag"])
    df.head()
    df = df.drop(['user_handle'], 1)
    return(df)
def loadData():

    df = db.loadData("user_interests")
    df.head()

    # Remove header row
    df = df.iloc[1:]

    # drop unwanted columns
    df = df.drop(['date_followed'], 1)

    # convert interest_tag to serial numbers
    df["interest_tag"] = le.fit_transform(df["interest_tag"])
    return (df)
Ejemplo n.º 5
0
def loadData():
    # load data
    df = db.loadData("user_course_views")

    # select only first 5 columns
    df = df.iloc[:, 0:5]
    df = df.drop(['view_date', 'author_handle'], 1)

    # Convert the string data to numeric series
    df["course_id"] = le.fit_transform(df["course_id"])
    df["level"] = le.fit_transform(df["level"])

    # Remove header row
    df = df.iloc[1:]
    return (df)
def loadData():
    df = db.loadData("user_course_views")
    df.head()

    # select only first 5 columns
    df = df.iloc[:, 0:5]

    # drop unwanted columns for this analysis
    df = df.drop(['view_date', 'user_handle'], 1)

    # Convert the string data to numeric series
    df["course_id"] = le.fit_transform(df["course_id"])
    df["level"] = le.fit_transform(df["level"])

    # Remove header row
    df = df.iloc[1:]
    return (df)
def loadData() :

    print("Running  - IRIS_StudentClassification_Views__AC_5.py")

    #Read Data
    df = db.loadData("user_course_views")

    # select only first 5 columns
    df = df.iloc[:,0:5]

    # remove unwanted columns for this analysis
    df = df.drop(['view_date','user_handle'], 1)

    # Convert the string data to numeric series
    df["course_id"] = le.fit_transform(df["course_id"])
    df["level"] = le.fit_transform(df["level"])

    # Remove header row
    df = df.iloc[1:]
    return(df)
def loadData():

    print(
        "Running  - IRIS_StudentClassification_KNN_1_IRIS_StudentClassification_KNN_1_CLCL.py"
    )

    # Load Data
    df = db.loadData("user_assessment_scores")

    # remove unwanted columns
    df = df.drop(['user_assessment_date'], 1)

    # conver the column values to sequence numbers
    df["assessment_tag"] = le.fit_transform(df["assessment_tag"])
    df = df.drop(['assessment_tag'], 1)

    # Remove header row
    df = df.iloc[1:]

    return (df)
def loadData():

    #Read Data
    df = db.loadData("user_course_views")
    df.head()

    # select only first 5 columns
    df = df.iloc[:, 0:5]

    # remove author handle and view date columns
    df = df.drop(['view_date', 'author_handle'], 1)
    df.head()

    # Convert the string data to numeric series
    df["course_id"] = le.fit_transform(df["course_id"])
    df["level"] = le.fit_transform(df["level"])

    # remove duplicates
    df = df.drop_duplicates(subset=['user_handle', 'course_id', 'level'],
                            keep=False)
    return (df)

    # Remove header row
    df = df.iloc[1:]