import pandas as pd import numpy as np import scipy.sparse as sparse import matplotlib.pyplot as plt plt.close('all') """ Load Data """ loadall=True #set to true to load from csv #load from pre-saved sparse matrix (fast and sexier) if not loadall: import sparse_manip sparsefile = 'data/data.npz' #default location try: data = sparse_manip.load(sparsefile) except IOError: loadall = True #load from csv (slow) if loadall: #load data filename='data/dataset.txt' X = pd.read_csv(filename, ';') #'real world' data (no indication regarding origin) labels = X['category'] #let's be fair, but not too fair :P X = X.drop('category', axis=1) X = X.drop('project', axis=1)
def SparseData(filename = 'data/drop_presence.npz'): import sparse_manip data = sparse_manip.load(filename) return data