Example #1
0
def getDifferentTrainAndTestData(trainDataSize, testDataSize):

    data = dataReader.getWholeTrainingData()

    if trainDataSize+testDataSize > data.shape[0]: # request more rows than the DF has
        print "Getting different train & test data with possible duplicates"
        trainData = data.sample(trainDataSize)
        testData = data.sample(testDataSize)
    else:
        print "Getting totally different train & test data"
        indexes = np.arange(data.shape[0]) #0->873k
        random.shuffle(indexes) # works in-place

        trainData = data.ix[indexes[0:trainDataSize]]
        testData = data.ix[indexes[trainDataSize+1:trainDataSize+1+testDataSize]]


    return trainData,testData