コード例 #1
0
ファイル: master.py プロジェクト: mrelich/sfpolice
    import sys
    sys.exit()

f_data = options.filename
eval = False
if 'test' in f_data:
    eval = True

wesave = options.save

#--------------------------------------------------------#
# Prepare data
#--------------------------------------------------------#

data = pd.read_csv(f_data)
data, keeps = prep(data, eval)
import sys
sys.exit()
#--------------------------------------------------------#
# Choose a method
#--------------------------------------------------------#

# K-nn training and testing
if opt == 0:
    from KNN import KNN_test_train
    KNN_test_train(data[ keeps[:-1] ], data[ keeps[-1] ], wesave)

# BDT
if opt == 1:
    from BDT import BDT_test_train
    BDT_test_train(data[ keeps[:-1] ], data[ keeps[-1] ])
コード例 #2
0
ファイル: result.py プロジェクト: mrelich/sfpolice
    sys.exit()

# --------------------------------------------------------#
# Prepare data differently.  Load in chuncks since it
# takes so much memory to process
# --------------------------------------------------------#

f_data = "data/test.csv"
data = pd.read_csv(f_data, chunksize=50000)

# Loop over each chunk and run options
probs = None
counter = 0
for chunk in data:
    print "Working on ", counter, "..."
    chunk, keeps = prep(chunk, True)

    # Run KNN and get probs
    if opt == 0:
        if probs == None:
            probs = KNN_evaluate(chunk[keeps])
        else:
            probs = np.concatenate([probs, KNN_evaluate(chunk[keeps])])

    counter += 1

# Now we are done, write the output
outcat = np.array(np.arange(len(categories)), dtype="string")
for key in categories:
    outcat[categories[key]] = key