import concurrent.futures

from tabulate import tabulate
from timeit import default_timer as timer

from dataUtils import DataUtils
from dataAggregator import DataAggregator
from baselines import MostPopularRecommender, MostRecentRecommender, MeanScoreRecommender
from evalMethods import evaluate_recall, evaluate_arhr, evaluate_mse
from contentbased import ContentBasedRecommender

if __name__ == '__main__':
    DATANUM = 0  # number of files, 0 to load all
    K = 10

    dataHelper = DataUtils()
    print("loading data...")
    entries = dataHelper.load_data("active1000", num=DATANUM)
    numLoaded = len(entries)
    print(f"loaded {numLoaded} events.")

    # Filter out unhelpful rows
    print("filtering data...")
    filtered_data = dataHelper.filter_data(entries)
    numLost = numLoaded - len(filtered_data)
    print(
        f"filtered to {len(filtered_data)} events. {numLost} events ({numLost / numLoaded:%}) discarded."
    )

    # Re-index document and user IDs to start at 0 and be sequential, manually
    #print("re-indexing data...")
Example #2
0
"""
@version: 1.0
@author: Jie Lin
@Mail: [email protected]
@file: dataUtils.py
@time: 09/27/2018 4:08pm
@purpose: this files contain a main method to call each function in class DataUtils in order to clean data
@code environment: ubuntu 18.01
"""

from dataUtils import DataUtils

# the main method to call each function in Datautils class
if __name__ == '__main__':
    # to call a DataUtils class
    dataUtil = DataUtils()
    # pass data address to class to get a dataframe
    messyData = dataUtil.readFiles('./SIIM2016_Messy_Fake_EMRdata.csv')
    print(messyData)

    #print data under resource column and show without duplicate with the number of each kind of data
    print("showing data under resource:")
    print(dataUtil.printCDataWoutDu(messyData, "resource"))

    print()
    #select TRCT1 under resource column
    print("showing data with resource name = \'TRCT1\'")
    resourceTRCT1 = dataUtil.findDataUnCol(messyData, "TRCT1", "resource")
    print(resourceTRCT1)

    print()