Esempio n. 1
0
    testPath = _basePath + "001_test_tobe.csv"

    # 1. read data
    dr = DataReader()
    dr.readInCSV(path, "train")
    newX, newY = dr._trainDataFrame, dr._ansDataFrame
    if doTestFlag == True:
        dr.readInCSV(testPath, "test")
        newX = dr._testDataFrame
        #newX = pd.DataFrame(newX[newX.columns[0]])
        print newX
    # 2. stratify 60 % data and train location only
#     newX, newY = stratifyData(dr._trainDataFrame, dr._ansDataFrame, 0.4)

# 3. get all best model from newX
#     fab = ModelFactory()
#     fab._gridSearchFlag = True
#     fab._n_iter_search = 500
#     fab._expInfo = "001_location_only"
#     fab.getAllModels(newX, newY)

# 4. test all data, output 3 ans as features
    modelPath = _basePath + "(Xgboost)_(2016-02-03_18_39_14).model"
    tmpOutPath = _basePath + "001_submission_2.csv"
    tmpClf = loadModel(modelPath)
    log(tmpClf.predict_proba(newX))
    #outDf = pd.concat([newX, pd.DataFrame(tmpClf.predict_proba(newX))], axis=1)
    outDf = pd.DataFrame(tmpClf.predict_proba(newX))
    outDf.to_csv(tmpOutPath, sep=',', encoding='utf-8')
    musicAlarm()
Esempio n. 2
0
            tmpId = df[df.columns[0]][i2]
            tmpVal = df[df.columns[1]][i2]
            # tmpVal2= df[df.columns[2]][i2]
            if  tmpMainId == tmpId:
                tmpFlag = True
                print tmpVal
                processDf[processDf.columns[tmpVal + 394]][i1] = 1
            if tmpFlag == True and tmpMainId != tmpId:
                tmpLastI2 = i2
                break
            print i1, i2
    # outDf = pd.concat([dr._ansDataFrame, processDf], axis=1)
    outDf = processDf
    outDf.to_csv(_outputPathName, sep=',', encoding='utf-8')  
    # print dr._ansDataFrame

    
    
        
if __name__ == '__main__':
    
    start = time.time()
    
    oneHot()

    elapsed = time.time() - start
    print "elapsed:", elapsed , "sec"
    musicAlarm()