def main(load=True): indices = ['X', 'Y', 'PdDistrictInt'] train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices] all = sfc.get_data('data/all.csv', drop_data=True) knn = Juristictions() if load: knn.load() else: knn.train(train) knn.save() data = knn.outside_juristiction(all) sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction') return
def make_dataset(input, output, comment='', verbose=False, size=None): data = None # Get the input data if isinstance(input, str): if not os.path.exists(input): input2 = os.path.join('data', input) if not os.path.exists(input2): raise IOError('Neither {} nor {} exist'.format(input, input2)) input = input2 data = sfc.get_data(input) elif isinstance(input, pandas.DataFrame): data = copy.deepcopy(input) else: raise IOError('Cannot deal with a {}'.format(type(input))) # Shrink to random records if size is not None and size < len(data): random.seed(sfc._SEED) data = data.ix[sorted(random.sample(xrange(len(data)), size))] data.reset_index(drop=True) sfc.write_data(data, output, comment=comment) return
def make_dataset(input, output, comment="", verbose=False, size=None): data = None # Get the input data if isinstance(input, str): if not os.path.exists(input): input2 = os.path.join("data", input) if not os.path.exists(input2): raise IOError("Neither {} nor {} exist".format(input, input2)) input = input2 data = sfc.get_data(input) elif isinstance(input, pandas.DataFrame): data = copy.deepcopy(input) else: raise IOError("Cannot deal with a {}".format(type(input))) # Shrink to random records if size is not None and size < len(data): random.seed(sfc._SEED) data = data.ix[sorted(random.sample(xrange(len(data)), size))] data.reset_index(drop=True) sfc.write_data(data, output, comment=comment) return
def main(load=True): indices = ['X', 'Y', 'PdDistrictInt'] train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices] all = sfc.get_data('data/all.csv', drop_data=True) knn = Juristictions() if load: knn.load() else: knn.train(train) knn.save() data = knn.outside_juristiction(all) sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction') return ############################################################################### if __name__ == "__main__": #main(False) knn = Juristictions() knn.load() #knn.plot(('data/outside_pd.csv', 'Outside PD'), ('data/all.csv', 'all')) data = sfc.get_data('data/all.csv') all = knn.add_outside_juristiction(data) sfc.write_data(all, 'data/all.csv') ###############################################################################
############################################################################### def main(load=True): indices = ['X', 'Y', 'PdDistrictInt'] train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices] all = sfc.get_data('data/all.csv', drop_data=True) knn = Juristictions() if load: knn.load() else: knn.train(train) knn.save() data = knn.outside_juristiction(all) sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction') return ############################################################################### if __name__ == "__main__": #main(False) knn = Juristictions() knn.load() #knn.plot(('data/outside_pd.csv', 'Outside PD'), ('data/all.csv', 'all')) data = sfc.get_data('data/all.csv') all = knn.add_outside_juristiction(data) sfc.write_data(all, 'data/all.csv') ###############################################################################