def ageGenderRand():
     import original_functions.ageGenderModel as ageGender
     modelindex = list(range(4))
     for i in range(4):
         modelindex[i] = random.randint(0, 2)
     if usePrints : print(getPredictMethodName(modelindex[0]),getPredictMethodName(modelindex[1]),getPredictMethodName(modelindex[2]),getPredictMethodName(modelindex[3]))
     return ageGender.ageGenderModel(getPredictMethod(modelindex[0]),getPredictMethod(modelindex[1]),getPredictMethod(modelindex[2]),getPredictMethod(modelindex[3]))
 def ageGenderTree():
     return ageGender.ageGenderModel(getPredictMethod(1),getPredictMethod(1),getPredictMethod(1),getPredictMethod(1))
Example #3
0
pathTest = '../input/users_test_final.csv'
pathyRes = '../input/yRes_final.csv'
finalTrain = pd.read_csv(pathTrain)
finalTest = pd.read_csv(pathTest)
yRes = pd.read_csv(pathyRes)
finalTrainUse = orig.removeColumns(
    finalTrain,
    ("action_", "num_of_devices", "total_time", "timeAct_", "dev_", "id"))
finalTestUse = orig.removeColumns(
    finalTest,
    ("action_", "num_of_devices", "total_time", "timeAct_", "dev_", "id"))

predList = []
for j in range(3):
    predList.append(LogisticRegression())
predList.append(MLPClassifier(solver='lbfgs'))
mainPredictMethod = agm.ageGenderModel(predList[0], predList[1], predList[2],
                                       predList[3])
startRun = time.clock()
mainPredictMethod.fit(finalTrainUse, yRes['country_destination'])
print('fit time:', time.clock() - startRun)
startRun = time.clock()
prediction = mainPredictMethod.predict(finalTestUse)
print('prediction time:', time.clock() - startRun)

idCol = finalTest['id']
idCol = idCol.reset_index(drop=True)
d = {'id': idCol, 'country': pd.Series(prediction)}
df = pd.DataFrame(d, columns=['id', 'country'])
df.to_csv('../submission/submission_ageGenderTree_LLLN.csv', index=False)
 def ageGenderLogreg():
     return ageGender.ageGenderModel(getPredictMethod(0),getPredictMethod(0),getPredictMethod(0),getPredictMethod(0))
Example #5
0
from sklearn import tree
import numpy as np
import pandas as pd

predictMethod0 = LogisticRegression()
predictMethod1 = LogisticRegression()
predictMethod2 = LogisticRegression()
predictMethod3 = LogisticRegression()

users, yRes = orig.loadAndUpdateFeatures('../input/train_users.csv')
featureListClass = orig.featureList()
print(users.head())
print(yRes.head())
category = 'country_destination'

mainPredictMethod = agm.ageGenderModel(predictMethod0, predictMethod1,
                                       predictMethod2, predictMethod3)
users = pd.concat([users, yRes], axis=1)
startRun = time.clock()
prediction, fit = orig.fitPredictAndTest(users, list(featureListClass.get()),
                                         category, mainPredictMethod)
print('\n\n\n\n\nnum of featurs', len(list(featureListClass.get())),
      'run time:',
      time.clock() - startRun, 'fit:', fit)

idCol = finalTest['id']
idCol = idCol.reset_index(drop=True)
d = {'id': idCol, 'country': pd.Series(y_pred)}
df = pd.DataFrame(d, columns=['id', 'country'])
df.to_csv('../submission/submission_ageGenderTreeLogistic.csv', index=False)