def build(numberOfTimeUnits=24):
    validLattices = set()
    for data in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('world','%s_%s'%(2,11))): validLattices.add(data['id'])
    documents, lattices = [], set()
    for h in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))): 
        hashtag, document = Hashtag(h), []
        if hashtag.isValidObject():
            for timeUnit, occs in enumerate(hashtag.getOccrancesEveryTimeWindowIterator(HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)):
                occs = filter(lambda t: t[0] in validLattices, occs)
                occs = sorted(occs, key=itemgetter(0))
                if occs: 
                    for lattice in zip(*occs)[0]: lattices.add(lattice)
                document.append([timeUnit, [(k, len(list(i))) for k, i in groupby(occs, key=itemgetter(0))]])
            if document: documents.append(document)
    lattices = sorted(list(lattices))
    print len(lattices)
    documents = [(d, TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(d)) for d in documents]
    documents = documents[:int(len(documents)*0.80)]
    for decisionTimeUnit in range(1, numberOfTimeUnits+1):
        for latticeCount, predictingLattice in enumerate(lattices):
            print decisionTimeUnit, latticeCount,
            inputVectors, outputValues = [], []
            for rawDocument, processedDocument in documents:
                documentForTimeUnit = TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(rawDocument[:decisionTimeUnit])
                if documentForTimeUnit and processedDocument:
                    vector =  [documentForTimeUnit.get(l, 0) for l in lattices]
                    inputVectors.append(vector), outputValues.append(float(processedDocument.get(predictingLattice, 0)))
#            TargetSelectionRegressionClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
            TargetSelectionRegressionSVMRBFClassifier(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice).build(zip(inputVectors, outputValues))
def testClassifierPerformance(numberOfTimeUnits=24):
    validLattices = set()
    for data in FileIO.iterateJsonFromFile(hashtagsLatticeGraphFile%('world','%s_%s'%(2,11))): validLattices.add(data['id'])
    documents, lattices = [], set()
    for h in FileIO.iterateJsonFromFile(hashtagsFile%('training_world','%s_%s'%(2,11))): 
        hashtag, document = Hashtag(h), []
        if hashtag.isValidObject():
            for timeUnit, occs in enumerate(hashtag.getOccrancesEveryTimeWindowIterator(HashtagsClassifier.CLASSIFIER_TIME_UNIT_IN_SECONDS)):
                occs = filter(lambda t: t[0] in validLattices, occs)
                occs = sorted(occs, key=itemgetter(0))
                if occs: 
                    for lattice in zip(*occs)[0]: lattices.add(lattice)
                document.append([timeUnit, [(k, len(list(i))) for k, i in groupby(occs, key=itemgetter(0))]])
            if document: documents.append(document)
    lattices = sorted(list(lattices))
    print len(lattices)
    documents = [(d, TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(d)) for d in documents]
    documents = documents[-int(len(documents)*0.20):]
    GeneralMethods.runCommand('rm -rf %s'%TargetSelectionRegressionClassifier.classifiersPerformanceFile)
    for decisionTimeUnit in range(1, numberOfTimeUnits+1):
        for classifierType in [TargetSelectionRegressionSVMRBFClassifier, TargetSelectionRegressionSVMLinearClassifier,
                               TargetSelectionRegressionSVMPolyClassifier, TargetSelectionRegressionClassifier]:
            totalError = []
            for latticeCount, predictingLattice in enumerate(lattices):
                inputVectors, outputValues, tempError = [], [], []
                for rawDocument, processedDocument in documents:
                    documentForTimeUnit = TargetSelectionRegressionClassifier.getPercentageDistributionInLattice(rawDocument[:decisionTimeUnit])
                    if documentForTimeUnit and processedDocument:
                        vector =  [documentForTimeUnit.get(l, 0) for l in lattices]
                        inputVectors.append(vector), outputValues.append(float(processedDocument.get(predictingLattice, 0)))
                classifier = classifierType(decisionTimeUnit=decisionTimeUnit, predictingLattice=predictingLattice)
                for iv, ov in zip(inputVectors, outputValues): 
                    if latticeCount==2: print ov, classifier.predict(iv), pow(ov-classifier.predict(iv), 2)
                    if ov!=0.0: 
                        tempError.append(pow(ov-classifier.predict(iv), 2))
#                print tempError, np.mean(tempError)
#                exit()
                totalError.append(np.mean(tempError))
            print {'id': classifier.id, 'timeUnit': decisionTimeUnit-1, 'error': np.mean(totalError)}
            FileIO.writeToFileAsJson({'id': classifier.id, 'timeUnit': decisionTimeUnit-1, 'error': np.mean(totalError)}, TargetSelectionRegressionClassifier.classifiersPerformanceFile)