# 'fold', # 'murders', 'murdPerPop', # 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet', 'NumImmig', 'PctEmploy', "PctPopUnderPov", 'pctUrban' ]) #crime = crime.fix_missing(fill_mean=True) #crime = crime.standardize() #crime = crime.normalize() #crime = crime.drop_nominals() #crime = crime.discretize('assaults', 3) crime = DataSet(dataframe=crime.df[:200]) #print(crime.df.assaults) print(crime.attributeNames) # Variables of interestz N, M = crime.N, crime.M #C = len(crime.classNames)
# 'rapes', 'rapesPerPop', # 'robberies', 'robbbPerPop', # # 'assaults', 'assaultPerPop', # 'burglaries', 'burglPerPop', # 'larcenies', 'larcPerPop', # 'autoTheft', 'autoTheftPerPop', # 'arsons', 'arsonsPerPop', # 'ViolentCrimesPerPop', # 'nonViolPerPop', # ]) crime = crime.take_columns([ 'racePctHisp', 'racePctWhite', #'racepctblack', #'racePctAsian', 'medIncome', 'NumStreet', 'NumImmig', 'PctEmploy', 'PctPopUnderPov', 'pctUrban' ]) crime = crime.fix_missing(fill_mean=True) crime = crime.standardize() #crime = crime.normalize() #crime = crime.drop_nominals() crime = crime.take_first_n_rows(200) crime = crime.discretize('racePctWhite', 2) crime = crime.set_class_column('racePctWhite') #crime = DataSet(dataframe=crime.df[:200])
crime = DataSet( datafile='../data/raw.csv', nominals=['state', 'communityname', 'countyCode', 'communityCode']) #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', #'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', #'nonViolPerPop', ]) print(type(crime.X)) crime = crime.normalize() crime = crime.take_columns(['nonViolPerPop', 'arsons']) #print(crime)
import pylab as pl from Framework.DataSet import * crime = DataSet(datafile='../data/raw.csv', nominals=['state','communityname','countyCode','communityCode']) #crime = crime.drop(['state', 'communityname']) # Drop strings #crime = crime.drop(['countyCode','communityCode']) # Drop nominals crime = crime.drop_columns([ 'fold', 'murders', 'murdPerPop', 'rapes', 'rapesPerPop', 'robberies', 'robbbPerPop', # 'assaults', 'assaultPerPop', 'burglaries', 'burglPerPop', 'larcenies', 'larcPerPop', 'autoTheft', 'autoTheftPerPop', #'arsons', 'arsonsPerPop', 'ViolentCrimesPerPop', #'nonViolPerPop', ]) print(type(crime.X)) crime = crime.normalize() crime = crime.take_columns(['nonViolPerPop', 'arsons']) #print(crime)