Exemple #1
0
def main(load=True):
    indices = ['X', 'Y', 'PdDistrictInt']
    train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices]
    all = sfc.get_data('data/all.csv', drop_data=True)
    knn = Juristictions()
    if load:
        knn.load()
    else:
        knn.train(train)
        knn.save()
    data = knn.outside_juristiction(all)
    sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction')
    return
Exemple #2
0
def main(load=True):
    indices = ['X', 'Y', 'PdDistrictInt']
    train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices]
    all = sfc.get_data('data/all.csv', drop_data=True)
    knn = Juristictions()
    if load:
        knn.load()
    else:
        knn.train(train)
        knn.save()
    data = knn.outside_juristiction(all)
    sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction')
    return
Exemple #3
0
def make_dataset(input, output, comment='', verbose=False, size=None):
    data = None
    # Get the input data
    if isinstance(input, str):
        if not os.path.exists(input):
            input2 = os.path.join('data', input)
            if not os.path.exists(input2):
                raise IOError('Neither {} nor {} exist'.format(input, input2))
            input = input2
        data = sfc.get_data(input)
    elif isinstance(input, pandas.DataFrame):
        data = copy.deepcopy(input)
    else:
        raise IOError('Cannot deal with a {}'.format(type(input)))
    # Shrink to random records
    if size is not None and size < len(data):
        random.seed(sfc._SEED)
        data = data.ix[sorted(random.sample(xrange(len(data)), size))]
        data.reset_index(drop=True)
    sfc.write_data(data, output, comment=comment)
    return
def make_dataset(input, output, comment="", verbose=False, size=None):
    data = None
    # Get the input data
    if isinstance(input, str):
        if not os.path.exists(input):
            input2 = os.path.join("data", input)
            if not os.path.exists(input2):
                raise IOError("Neither {} nor {} exist".format(input, input2))
            input = input2
        data = sfc.get_data(input)
    elif isinstance(input, pandas.DataFrame):
        data = copy.deepcopy(input)
    else:
        raise IOError("Cannot deal with a {}".format(type(input)))
    # Shrink to random records
    if size is not None and size < len(data):
        random.seed(sfc._SEED)
        data = data.ix[sorted(random.sample(xrange(len(data)), size))]
        data.reset_index(drop=True)
    sfc.write_data(data, output, comment=comment)
    return
Exemple #5
0

def main(load=True):
    indices = ['X', 'Y', 'PdDistrictInt']
    train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices]
    all = sfc.get_data('data/all.csv', drop_data=True)
    knn = Juristictions()
    if load:
        knn.load()
    else:
        knn.train(train)
        knn.save()
    data = knn.outside_juristiction(all)
    sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction')
    return


###############################################################################


if __name__ == "__main__":
    #main(False)
    knn = Juristictions()
    knn.load()
    #knn.plot(('data/outside_pd.csv', 'Outside PD'), ('data/all.csv', 'all'))
    data = sfc.get_data('data/all.csv')
    all = knn.add_outside_juristiction(data)
    sfc.write_data(all, 'data/all.csv')

###############################################################################
Exemple #6
0
###############################################################################


def main(load=True):
    indices = ['X', 'Y', 'PdDistrictInt']
    train = sfc.get_data('data/trim_1e4.csv', drop_data=True)[indices]
    all = sfc.get_data('data/all.csv', drop_data=True)
    knn = Juristictions()
    if load:
        knn.load()
    else:
        knn.train(train)
        knn.save()
    data = knn.outside_juristiction(all)
    sfc.write_data(data, 'data/outside_pd.csv', comment='Outside juristiction')
    return


###############################################################################

if __name__ == "__main__":
    #main(False)
    knn = Juristictions()
    knn.load()
    #knn.plot(('data/outside_pd.csv', 'Outside PD'), ('data/all.csv', 'all'))
    data = sfc.get_data('data/all.csv')
    all = knn.add_outside_juristiction(data)
    sfc.write_data(all, 'data/all.csv')

###############################################################################