def read_input(filename, top_N=10): #just reads in X and category matrix so loading it will not take time
    shop_mall = pd.read_csv("Demographic Filtering/mall_store_list.csv", encoding = "ISO-8859-1", index_col=False) #This is used for obttaining unique malls

    stores_db = shop_mall[["store", "store_id"]].drop_duplicates(subset = ["store"]) #get unique stores
    stores_db.index = pd.Series(np.arange(stores_db.shape[0]))
    #stores_db.to_csv("command_line_files/store_list.csv", header="true")

    mall_demographic = pd.read_csv("Demographic Filtering/mall_with_demographic_category.csv", encoding = "ISO-8859-1", index_col=False)
    county_db = mall_demographic[["county", "usps"]].copy(deep=True) #reads all
    county_db["county"] =county_db["county"].str.lower()
    county_db["usps"] =county_db["usps"].str.lower()
    county_db =county_db.drop_duplicates(["county", "usps"])
    #county_db.to_csv("command_line_files/county_list.csv", header="true")
    #save columns into another file

    #read txt file
    file = open(filename)
    entire_file =
    user = re.split('\n+', entire_file) #first entry of user is their county followed by stores
    #accept two type of inputs. One read
    X, _ = filter_demo_data.get_X()

    #get category features
    #This will enable you to get one user
    #get demographic features
    user_demographic_feature = mall_demographic.ix[mall_demographic["county"].str.lower() == user[0].lower(), "Homeowner_vacancy_rate_percent":"Rental_vacancy_rate_percent"].ix[mall_demographic["usps"].str.lower() == user[1].lower()].as_matrix()[0, :]

    #create Nstore vctor that represents the boolean values of a mall having a store.
    user_store_db = pd.DataFrame(pd.Series(user[2:]).str.lower(), columns=["store"] ) #put stuff into dataframe
    user_shop_index = pd.merge(user_store_db, stores_db, how="left", left_on=["store"], right_on=["store"] )["store_id"].as_matrix()
    user_ratings = np.zeros(stores_db.shape[0])
    user_ratings[user_shop_index] = 1
    user_ratings.reshape((-1, 1))

    #computing category features
    #get category features. You can do this by computing the weighted average of all the malls based on cosine distance

    #This is used to compute
    user_feature = mall_demographic.ix[:, "Homeowner_vacancy_rate_percent":"Rental_vacancy_rate_percent"].as_matrix()
    recommendation_system =, similarity_helper=similarity_helper)
    top_recommendations = recommendation_system.predict_for_user(user_ratings, user_demographic_feature , top_N, user_feature ) #do another join
    print(stores_db.ix[top_recommendations, "store"] )
    def get_helper2(self, name, function):
        if(name == 'feature_helper'):
            self.feature_helper = function
        if(name == 'similarity_helper'):
            self.similarity_helper = function
        if(name == 'score_helper'):
            self.score_helper = function
            raise Exception("Cannot find feature function corresponding to the input name")

#for testing use category data
X, category = filter_demo_data.get_X()
X = X[:, :]
category = category[:, :]
model = logistic_reg(X, user_feat=category)

initializer = one_class.one_class(learner=model)
t = time.time()
train, test = initializer.train_test_split_equal_item(X, .1) #use something else. THe train test split gets ones sometimes
print(time.time() - t )
train = train.astype(int)
test = test.astype(int), test)