def ageGenderRand(): import original_functions.ageGenderModel as ageGender modelindex = list(range(4)) for i in range(4): modelindex[i] = random.randint(0, 2) if usePrints : print(getPredictMethodName(modelindex[0]),getPredictMethodName(modelindex[1]),getPredictMethodName(modelindex[2]),getPredictMethodName(modelindex[3])) return ageGender.ageGenderModel(getPredictMethod(modelindex[0]),getPredictMethod(modelindex[1]),getPredictMethod(modelindex[2]),getPredictMethod(modelindex[3]))
def ageGenderTree(): return ageGender.ageGenderModel(getPredictMethod(1),getPredictMethod(1),getPredictMethod(1),getPredictMethod(1))
pathTest = '../input/users_test_final.csv' pathyRes = '../input/yRes_final.csv' finalTrain = pd.read_csv(pathTrain) finalTest = pd.read_csv(pathTest) yRes = pd.read_csv(pathyRes) finalTrainUse = orig.removeColumns( finalTrain, ("action_", "num_of_devices", "total_time", "timeAct_", "dev_", "id")) finalTestUse = orig.removeColumns( finalTest, ("action_", "num_of_devices", "total_time", "timeAct_", "dev_", "id")) predList = [] for j in range(3): predList.append(LogisticRegression()) predList.append(MLPClassifier(solver='lbfgs')) mainPredictMethod = agm.ageGenderModel(predList[0], predList[1], predList[2], predList[3]) startRun = time.clock() mainPredictMethod.fit(finalTrainUse, yRes['country_destination']) print('fit time:', time.clock() - startRun) startRun = time.clock() prediction = mainPredictMethod.predict(finalTestUse) print('prediction time:', time.clock() - startRun) idCol = finalTest['id'] idCol = idCol.reset_index(drop=True) d = {'id': idCol, 'country': pd.Series(prediction)} df = pd.DataFrame(d, columns=['id', 'country']) df.to_csv('../submission/submission_ageGenderTree_LLLN.csv', index=False)
def ageGenderLogreg(): return ageGender.ageGenderModel(getPredictMethod(0),getPredictMethod(0),getPredictMethod(0),getPredictMethod(0))
from sklearn import tree import numpy as np import pandas as pd predictMethod0 = LogisticRegression() predictMethod1 = LogisticRegression() predictMethod2 = LogisticRegression() predictMethod3 = LogisticRegression() users, yRes = orig.loadAndUpdateFeatures('../input/train_users.csv') featureListClass = orig.featureList() print(users.head()) print(yRes.head()) category = 'country_destination' mainPredictMethod = agm.ageGenderModel(predictMethod0, predictMethod1, predictMethod2, predictMethod3) users = pd.concat([users, yRes], axis=1) startRun = time.clock() prediction, fit = orig.fitPredictAndTest(users, list(featureListClass.get()), category, mainPredictMethod) print('\n\n\n\n\nnum of featurs', len(list(featureListClass.get())), 'run time:', time.clock() - startRun, 'fit:', fit) idCol = finalTest['id'] idCol = idCol.reset_index(drop=True) d = {'id': idCol, 'country': pd.Series(y_pred)} df = pd.DataFrame(d, columns=['id', 'country']) df.to_csv('../submission/submission_ageGenderTreeLogistic.csv', index=False)