business_stats_file = sys.argv[4] print('[ %04ds ] Program started' % (time.time() - start_time)) training_set: List[Review] = Review.load_from_file(training_set_file) user_stats: Dict[str, User] = User.load_from_file(user_stats_file) business_stats: Dict[str, Business] = Business.load_from_file( business_stats_file) print('[ %04ds ] Files loaded' % (time.time() - start_time)) all_user_features = ['NO_FEAT'] all_business_features = Business.collect_business_features(business_stats) dataset = Dataset() dataset.fit(User.extract_user_ids(user_stats), Business.extract_business_ids(business_stats), user_features=all_user_features, item_features=all_business_features) user_features = dataset.build_user_features( User.build_user_features(user_stats, User.extract_user_ids(user_stats)), True) business_features = dataset.build_item_features( Business.build_business_features( business_stats, Business.extract_business_ids(business_stats)), True) print('[ %04ds ] Dataset initialized' % (time.time() - start_time))
business_stats_file = sys.argv[4] output_file = sys.argv[5] print('[ %04ds ] Program started' % (time.time() - start_time)) training_set: List[Review] = Review.load_from_file(training_set_file) user_stats: Dict[str, User] = User.load_from_file(user_stats_file) business_stats: Dict[str, Business] = Business.load_from_file( business_stats_file) print('[ %04ds ] Files loaded' % (time.time() - start_time)) all_user_features = ['NO_FEAT'] all_business_features = Business.collect_business_features(business_stats) all_user_ids = User.extract_user_ids(user_stats) all_business_ids = Business.extract_business_ids(business_stats) dataset = Dataset() dataset.fit(all_user_ids, all_business_ids, user_features=all_user_features, item_features=all_business_features) user_features = dataset.build_user_features( User.build_user_features(user_stats, all_user_ids), True) business_features = dataset.build_item_features( Business.build_business_features(business_stats, all_business_ids), True)