import pandas as pd
import numpy as np
import scipy.sparse as sparse
import matplotlib.pyplot as plt

plt.close('all')

""" Load Data """
loadall=True #set to true to load from csv

#load from pre-saved sparse matrix (fast and sexier)
if not loadall:
    import sparse_manip
    sparsefile = 'data/data.npz' #default location
    try:
        data = sparse_manip.load(sparsefile)
    except IOError:
        loadall = True

#load from csv (slow)
if loadall:
    #load data
    filename='data/dataset.txt'
    X = pd.read_csv(filename, ';')

    #'real world' data (no indication regarding origin)
    labels = X['category'] #let's be fair, but not too fair :P
    X = X.drop('category', axis=1)
    X = X.drop('project', axis=1)

Esempio n. 2
0
def SparseData(filename = 'data/drop_presence.npz'):
    import sparse_manip
    data = sparse_manip.load(filename)
    return data