def extend_challenge_db(idealist):
    challengelist = {}

    for file in listdir(variables.ideadbpath):
        if isfile(join(variables.ideadbpath, file)):
            filename = file.split(".")[0]
            challengelist[filename] = list(
                importDataHelper.readcsvdata(join(variables.ideadbpath, file)))

    for idea in idealist:
        idea["CHALLENGE"] = idea.get("CHALLENGE", "")
        if "cscw19-1" in idea["CHALLENGE"]:
            challengelist["TCO"] = challengelist.get("TCO", [])
            if not any(e['ID'] == idea['ID'] for e in challengelist["TCO"]):
                challengelist["TCO"].append(idea)
        elif "chi19s1" in idea["CHALLENGE"]:
            challengelist["TCO"] = challengelist.get("TCO", [])
            if not any(e['ID'] == idea['ID'] for e in challengelist["TCO"]):
                challengelist["TCO"].append(idea)
        elif "bionic" in idea["CHALLENGE"].lower():
            challengelist["bionicRadar"] = challengelist.get("bionicRadar", [])
            if not any(e['ID'] == idea['ID']
                       for e in challengelist["bionicRadar"]):
                challengelist["bionicRadar"].append(idea)
        elif "fabric" in idea["CHALLENGE"].lower():
            challengelist["fabricDisplay"] = challengelist.get(
                "fabricDisplay", [])
            if not any(e['ID'] == idea['ID']
                       for e in challengelist["fabricDisplay"]):
                challengelist["fabricDisplay"].append(idea)
    for key in challengelist.keys():
        importDataHelper.writecsvfile(join(variables.ideadbpath, key + ".csv"),
                                      challengelist[key][0].keys(),
                                      challengelist[key])
        print("saved " + key)
Example #2
0
def save_confusionmatrix(cm,
                         path,
                         applied_filters=[],
                         description="",
                         dataset=""):
    cmdict = list(importDataHelper.readcsvdata(path))
    cmdict.append(cm.stats())
    cmdict[len(cmdict) - 1]["applied Filter"] = applied_filters
    cmdict[len(cmdict) - 1]["Description"] = description
    cmdict[len(cmdict) - 1]["Dataset"] = dataset
    importDataHelper.writecsvfile(path, cmdict[0].keys(), cmdict)
    return 0
def spamdetection():
    parser = argparse.ArgumentParser()
    parser.add_argument("path", help="Path to a csv or xml file with ideas")

    parser.add_argument("-t",
                        "--train",
                        help="to train the system. Requires classified ideas.",
                        action="store_true")
    parser.add_argument(
        "--challenge",
        help=
        "give a challenge to use instead of the challenges given in an idea")
    args = parser.parse_args()
    filename, fileformat = os.path.basename(args.path).split('.')
    if fileformat == 'csv':
        idealist = list(importDataHelper.readcsvdata(args.path))
    elif fileformat == 'xml':
        idealist = importDataHelper.readxmldata(args.path)
    else:
        print("Can not read the file, please use csv or xml files")
        return 1
    challengelists = {}
    # Divide idea in challenges or use the given challenge
    if args.challenge is None:
        for idea in idealist:
            challenge = idea.get("CHALLENGE", "Cross-Domain")
            challengelists[challenge] = challengelists.get(challenge, [])
            challengelists[challenge].append(idea)
    else:
        challengelists[args.challenge] = idealist
    if args.train:
        for elem in challengelists:
            train(challengelists[elem], elem)
    else:
        classifiedlist = []
        for elem in challengelists:
            if fileformat == "csv":
                classifiedlist += classify(challengelists[elem], elem,
                                           fileformat)
                importDataHelper.writecsvfile(
                    os.path.dirname(args.path) + "/" + filename +
                    "_classified.csv", classifiedlist[0].keys(),
                    classifiedlist)
            else:
                idealist = classify(idealist, elem, fileformat)
                idealist.write(
                    os.path.dirname(args.path) + "/" + filename +
                    "_classified.xml")
def classify_unreviewed():
    idealist = list(
        importDataHelper.readcsvdata(
            "Data/Results/fabricDisplayunreviewed.csv"))
    idealist2 = list(
        importDataHelper.readcsvdata(
            "Data/Results/fabricDisplayClassified.csv"))
    print("bionic Radar:")
    for idea in idealist:
        if idea["ID"] in [ideas["ID"] for ideas in idealist2]:
            idealist.remove(idea)
    print(len(idealist))
    for idea in idealist:
        print(" ")
        if "usable" not in idea.get("STATUS", ""):
            print("Content: " + idea["DESCRIPTION"])
            print("Prediction: " + idea["PREDICTION"])
            print("Bayes: " + idea["OTHERBayes"])
            print("Others: " + idea["OTHERS"])
            print("Filter: " + idea["TRIGGERED"])
            x = input("Spam? (y/n)")
            if 'y' in x:
                idea["STATUS"] = "unusable"
                idealist2.append(idea)
                idealist.remove(idea)
            elif 'n' in x:
                idea["STATUS"] = "usable"
                idealist2.append(idea)
                idealist.remove(idea)
            else:
                importDataHelper.writecsvfile(
                    "Data/Results/fabricDisplayClassified.csv",
                    idealist2[0].keys(), idealist2)
                importDataHelper.writecsvfile(
                    "Data/Results/fabricDisplayunreviewed.csv",
                    idealist[0].keys(), idealist)
    importDataHelper.writecsvfile("Data/Results/fabricDisplayClassified.csv",
                                  idealist2[0].keys(), idealist2)
    importDataHelper.writecsvfile("Data/Results/fabricDisplayunreviewed.csv",
                                  idealist[0].keys(), idealist)
Example #5
0
def classifyideas(dataset=None):
    if dataset is None:
        print("Select a dataset: ")
        i = 0
        print("Classified datasets")
        filesclass = []
        for file in listdir(variables.importpathclassified):
            if isfile(join(variables.importpathclassified, file)):
                print("", i, ": ", file)
                filesclass.append((variables.importpathclassified, file))
                i += 1
        print("Unclassified datasets")
        for file in listdir(variables.importpathunclassified):
            if isfile(join(variables.importpathunclassified, file)):
                print("", i, ": ", file)
                filesclass.append((variables.importpathunclassified, file))
                i += 1
        selected = int(input("Which dataset do you want to use? "))
        path = filesclass[selected][0]
        filename, fileformat = filesclass[selected][1].replace(".",
                                                               ' ').split()
        if 'csv' in fileformat:
            idealist = list(
                importDataHelper.readcsvdata(
                    join(path, filename + '.' + fileformat)))
        else:
            idealist = list(
                importDataHelper.readxmldata(
                    join(path, filename + '.' + fileformat)))
    else:
        fileformat = dataset[3]
        filename = dataset[2]
        path = dataset[1]
        idealist = dataset[0]
    bayesbool = 'y' in input(
        "Do you want to use single word bayes to classify? (y/n) ").lower()
    complbayesbool = 'y' in input(
        "Do you want to use 5-word bayes to classify? (y/n) ").lower()
    filtersystembool = 'y' in input(
        "Do you want to use the Filtersystem to classify? (y/n) ").lower()
    if bayesbool:
        wordprobs = bayes.gettokenprobs()
    if complbayesbool:
        wordprobscom = complexBayes.gettokenprobs()
    if filtersystembool:
        unigram_tagger, st = prepare_tagger()

    spamlist = []
    applied_filters = {}
    pred = []
    actual = []
    fplist = []
    fnlist = []
    start1 = time.time()

    for row in idealist:
        row['TRIGGERED'] = []
        row['PREDICTION'] = "Ham"
        if bayesbool:
            bayesprob = bayes.classify(row['DESCRIPTION'], wordprobs)
            if bayesprob > 0.8:
                row['TRIGGERED'].append("bayes")
                applied_filters["bayes"] = int(applied_filters.get("bayes",
                                                                   0)) + 1
                row['PREDICTION'] = "Spam"
        if complbayesbool:
            combayesprob = complexBayes.classify(row['DESCRIPTION'],
                                                 wordprobscom)
            if combayesprob > 0.8:
                row['TRIGGERED'].append("complex bayes: " + str(combayesprob))
                applied_filters["complex bayes"] = int(
                    applied_filters.get("complex bayes", 0)) + 1
                row['PREDICTION'] = "Spam"
        if filtersystembool:
            row = spamFilter.classifyidea(row, unigram_tagger, st)
        actual.append("spam" in row.get('SPAM', "")
                      or "unusable" in row.get("STATUS", ""))
        pred.append(row['PREDICTION'] == "Spam")
        for filter in row['TRIGGERED']:
            if 'bayes' not in filter:
                applied_filters[filter] = int(applied_filters.get(filter,
                                                                  0)) + 1
        spamlist.append(row)
        if row['PREDICTION'] == "Spam" and ("ham" in row.get('SPAM', "") or
                                            row.get("STATUS", "") == "usable"):
            fplist.append(row)
        elif row['PREDICTION'] == "Ham" and ("spam" in row.get(
                'SPAM', "") or "unusable" in row.get("STATUS", "")):
            fnlist.append(row)
    cm = confusionMatrix.create_confusionmatrix(actual, pred)
    confusionMatrix.print_confusionmatrix(cm, True)
    description = "just filtersystem, Test enumeration fix with iui dataset"

    confusionMatrix.save_confusionmatrix(
        cm, variables.resultpath + "ConfusionMatrices.csv", applied_filters,
        description, filename)
    duration1 = time.time() - start1
    print("Duration1: ", duration1, "seconds")
    print(applied_filters)

    ###################### Save results ######################
    #    importDataHelper.writecsvfile(variables.resultpath + 'IdeaDataSpam2.csv', spamlist[0].keys(), spamlist)
    if len(fplist) > 0:
        importDataHelper.writecsvfile(
            variables.filterresults + filename + '_fp.csv', fplist[0].keys(),
            fplist)
    if len(fnlist) > 0:
        importDataHelper.writecsvfile(
            variables.filterresults + filename + '_fn.csv', fnlist[0].keys(),
            fnlist)
    return None
Example #6
0
def evaluate_filtersystem():
    resultlist = []
    unigram, st = prepare_tagger()
    for file in listdir(variables.importpathclassified):
        if isfile(join(variables.importpathclassified, file)):
            if ".csv" in file:
                idealist = list(
                    importDataHelper.readcsvdata(
                        join(variables.importpathclassified, file)))
            elif ".xml" in file:
                idealist = list(
                    importDataHelper.readxmldata(
                        join(variables.importpathclassified, file)))
            else:
                print(
                    "Not able to read all files (just csv and xml are supported)"
                )
                return 1
            for filter in textDataFilter.textDataFilterList:
                if "count" in str(filter):
                    if "more" in filter.__name__:
                        for count in countmore:
                            cm = evaluate_filter(filter, idealist, count)
                            result = {
                                "Dataset": file,
                                "Filter": filter.__name__,
                                "Variable": count
                            }
                            if cm is not None:
                                result.update(cm.stats())
                            resultlist.append(result)
                    elif "less" in filter.__name__:
                        for count in countless:
                            cm = evaluate_filter(filter, idealist, count)
                            result = {
                                "Dataset": file,
                                "Filter": filter.__name__,
                                "Variable": count
                            }
                            if cm is not None:
                                result.update(cm.stats())
                            resultlist.append(result)
                    elif "word" in filter.__name__:
                        for count in countwords:
                            cm = evaluate_filter(filter, idealist, count)
                            result = {
                                "Dataset": file,
                                "Filter": filter.__name__,
                                "Variable": count
                            }
                            if cm is not None:
                                result.update(cm.stats())
                            resultlist.append(result)
                else:
                    cm = evaluate_filter(filter, idealist)
                    result = {
                        "Dataset": file,
                        "Filter": filter.__name__,
                        "Variable": "None"
                    }
                    if cm is not None:
                        result.update(cm.stats())
                    resultlist.append(result)
            for filter in textContentFilter.textContentFilterlist:
                if "unigram" in filter.__name__:
                    cm = evaluate_filter(filter, idealist, unigram)
                    result = {
                        "Dataset": file,
                        "Filter": filter.__name__,
                        "Variable": "UnigramTagger"
                    }
                    if cm is not None:
                        result.update(cm.stats())
                    resultlist.append(result)
                elif "containsnames" in filter.__name__:
                    cm = evaluate_filter(filter, idealist, st)
                    result = {
                        "Dataset": file,
                        "Filter": filter.__name__,
                        "Variable": "StanfordNERTagger"
                    }
                    if cm is not None:
                        result.update(cm.stats())
                    resultlist.append(result)
                else:
                    cm = evaluate_filter(filter, idealist)
                    result = {
                        "Dataset": file,
                        "Filter": filter.__name__,
                        "Variable": "None"
                    }
                    if cm is not None:
                        result.update(cm.stats())
                    resultlist.append(result)
                print(filter.__name__)
    importDataHelper.writecsvfile(
        variables.resultpath + "FilterEvaluation.csv", resultlist[0].keys(),
        resultlist)
def evaluate_results():
    resultdict = import_results()

    safelist = []
    for key in resultdict.keys():
        print(key)
        print("Ideas: ", len(resultdict[key]["actual"]))
        print("Spam: ", resultdict[key]["actual"].count(True))
        print("Ham: ", resultdict[key]["actual"].count(False))
        bayespred = [x >= 0.9 for x in resultdict[key]["bayes"]]
        bayesprob = [x for x in resultdict[key]["bayes"]]
        combayespred = [x >= 0.9 for x in resultdict[key]["complexbayes"]]
        combayesprob = [x for x in resultdict[key]["complexbayes"]]
        linclasspred = [x[0] == 1 for x in resultdict[key]["linCLassifier"]]
        linclassprob = [x[1] for x in resultdict[key]["linCLassifier"]]
        filterpred = [
            x == 1 for x in evaluationHelper.get_filter_results(resultdict[key]
                                                                ["Filter"])
        ]
        usepred = [x[0] == 1 for x in resultdict[key]["USE"]]
        useprob = [x[1] for x in resultdict[key]["USE"]]
        lin = False
        com = False
        use = False
        bay = False
        if True in bayespred and False in bayespred:
            cmbay = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], bayespred)
            safelist.append({"Data": key, "Filter": "Bayes", **cmbay.stats()})
            bay = True
            print("Bayes")
            print("Precision: ", cmbay.PPV)
            print("Recall: ", cmbay.TPR, "\n")
        if True in combayespred and False in combayespred:
            cmcombay = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], combayespred)
            safelist.append({
                "Data": key,
                "Filter": "complex Bayes",
                **cmcombay.stats()
            })
            com = True
            print("Complex Bayes")
            print("Precision: ", cmcombay.PPV)
            print("Recall: ", cmcombay.TPR, "\n")
        if True in linclasspred and False in linclasspred:
            cmlinclass = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], linclasspred)
            safelist.append({
                "Data": key,
                "Filter": "lin Classifier",
                **cmlinclass.stats()
            })
            lin = True
            print("lin Classifier")
            print("Precision: ", cmlinclass.PPV)
            print("Recall: ", cmlinclass.TPR, "\n")
        if True in filterpred and False in filterpred:
            cmfilter = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], filterpred)
            safelist.append({
                "Data": key,
                "Filter": "Filtersystem",
                **cmfilter.stats()
            })
            print("Filtersystem")
            print("Precision: ", cmfilter.PPV)
            print("Recall: ", cmfilter.TPR, "\n")
        if True in usepred and False in usepred:
            cmuse = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], usepred)
            safelist.append({"Data": key, "Filter": "USE", **cmuse.stats()})
            use = True
            print("USE Classifier")
            print("Precision: ", cmuse.PPV)
            print("Recall: ", cmuse.TPR, "\n")
        probs = []
        classor = []
        classtwo = []
        classthree = []
        countbayesdiff = 0
        y = 0
        for i in range(0, len(bayesprob)):
            classor.append(bayesprob[i] >= 0.9 or combayesprob[i] >= 0.9
                           or linclasspred[i] or usepred[i])
            classtwo.append(
                (bayesprob[i] >= 0.9 and
                 (combayesprob[i] >= 0.9 or linclasspred[i] or usepred[i]))
                or (combayesprob[i] >= 0.9 and (linclasspred[i] or usepred[i]))
                or (linclasspred[i] and usepred[i]))
            classthree.append(
                (bayesprob[i] >= 0.9 and combayesprob[i] >= 0.9 and
                 (linclasspred[i] or usepred[i]))
                or (combayesprob[i] >= 0.9 and linclasspred[i] and usepred[i])
                or (bayesprob[i] >= 0.9 and linclasspred[i] and usepred[i]))
            probs.append(0.0)
            if bay:
                probs[i] += bayesprob[i]
                y += 1
            if com:
                probs[i] += combayesprob[i]
                y += 1
            if lin:
                if linclasspred[i]:
                    probs[i] += linclassprob[i]
                else:
                    probs[i] += 1 - linclassprob[i]
                y += 1
            if use:
                if usepred[i]:
                    probs[i] += useprob[i]
                else:
                    probs[i] += 1 - useprob[i]
                y += 1
            if y > 0:
                probs[i] = probs[i] / y
            if bayesprob[i] >= 0.9 and combayesprob[i] < 0.9:
                countbayesdiff += 1
        print("Bayes difference: ", countbayesdiff, "\n\n")
        avglow = [x >= 0.5 for x in probs]
        avghigh = [x >= 0.8 for x in probs]
        if True in avglow and False in avglow:
            cmavglow = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], avglow)
            safelist.append({
                "Data": key,
                "Filter": "low avg",
                **cmavglow.stats()
            })
            print("low Average")
            print("Precision: ", cmavglow.PPV)
            print("Recall: ", cmavglow.TPR, "\n")

        if True in avghigh and False in avghigh:
            cmavghigh = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], avghigh)
            safelist.append({
                "Data": key,
                "Filter": "high avg",
                **cmavghigh.stats()
            })
            print("high Average")
            print("Precision: ", cmavghigh.PPV)
            print("Recall: ", cmavghigh.TPR, "\n")
        if True in classor and False in classor:
            cmor = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], classor)
            safelist.append({
                "Data": key,
                "Filter": "Or Classifiers",
                **cmor.stats()
            })
            print("Classifier or")
            print("Precision: ", cmor.PPV)
            print("Recall: ", cmor.TPR, "\n")
        if True in classtwo and False in classtwo:
            cmtwo = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], classtwo)
            safelist.append({
                "Data": key,
                "Filter": "Two Classifiers",
                **cmtwo.stats()
            })
            print("Two Classifier")
            print("Precision: ", cmtwo.PPV)
            print("Recall: ", cmtwo.TPR, "\n")
        if True in classthree and False in classthree:
            cmthree = confusionMatrix.create_confusionmatrix(
                resultdict[key]["actual"], classthree)
            safelist.append({
                "Data": key,
                "Filter": "Three Classifiers",
                **cmthree.stats()
            })
            print("Three Classifier")
            print("Precision: ", cmthree.PPV)
            print("Recall: ", cmthree.TPR, "\n")

    importDataHelper.writecsvfile(
        "Data/Results/Evaluation/extendNewResultDicts.csv", safelist[0].keys(),
        safelist)
def test():
    #    idealist = list(importDataHelper.readxmldata(variables.importpathunclassified + 'IdeaData.xml'))
    idealist = list(
        importDataHelper.readcsvdata(variables.importpathclassified +
                                     "ideas-with-challenges.csv"))
    idealistchallenge = {"bionicRadar": [], "fabricDisplay": []}
    print(len(idealist))
    i = 0
    j = 0
    k = 0
    for idea in idealist:
        if idea["STATUS"] == "unreviewed":
            if "bionic" in idea["CHALLENGE"].lower():
                i += 1
                idealistchallenge["bionicRadar"].append(idea)
            elif "fabric" in idea["CHALLENGE"].lower():
                j += 1
                idealistchallenge["fabricDisplay"].append(idea)
            else:
                k += 1
    print("unreviewed bionic: ", i)
    print("unreviewed fabric: ", j)
    print("unreviewed others: ", k)

    idealisttrainingschallenge = {}
    idealisttrainingschallenge["fabricDisplay"] = list(
        importDataHelper.readcsvdata(variables.ideadbpath +
                                     'fabricDisplay.csv'))
    idealisttrainingschallenge["bionicRadar"] = list(
        importDataHelper.readcsvdata(variables.ideadbpath + 'bionicRadar.csv'))
    idealisttrainingschallenge["TCO"] = list(
        importDataHelper.readcsvdata(variables.ideadbpath + 'TCO.csv'))

    idealisttrainingschallengewodups = {}
    idealisttrainingschallengewodups["fabricDisplay"] = list(
        importDataHelper.readcsvdata(variables.ideadbwithoutduppath +
                                     "fabricDisplay.csv"))
    idealisttrainingschallengewodups["bionicRadar"] = list(
        importDataHelper.readcsvdata(variables.ideadbwithoutduppath +
                                     "bionicRadar.csv"))
    idealisttrainingschallengewodups["TCO"] = list(
        importDataHelper.readcsvdata(variables.ideadbwithoutduppath +
                                     "TCO.csv"))

    idealistmixedtraining = idealisttrainingschallenge[
        "fabricDisplay"] + idealisttrainingschallenge[
            "bionicRadar"] + idealisttrainingschallenge["TCO"]
    idealistmixedtrainingwithoutdups = idealisttrainingschallengewodups[
        "fabricDisplay"] + idealisttrainingschallengewodups[
            "bionicRadar"] + idealisttrainingschallengewodups["TCO"]

    for key in idealistchallenge.keys():
        idealisttraining = idealisttrainingschallenge[key]
        idealisttrainingwithoutdups = list(
            importDataHelper.readcsvdata(variables.ideadbwithoutduppath + key +
                                         ".csv"))

        #        idealistchallengewithoutdups = duplicateDetection.filterduplikates(idealistchallenge[key], variables.resultpath + "test3.csv", idealisttrainingwithoutdups)
        print("duplicate detection done")

        bayes.trainbayes(idealisttraining, challenge=key, delete=True)
        bayes.trainbayes(idealisttrainingwithoutdups,
                         challenge=key,
                         delete=True,
                         duplicates=True)
        print("bayes training TCO complete")

        bayes.trainbayes(idealistmixedtraining, delete=True)
        bayes.trainbayes(idealistmixedtrainingwithoutdups,
                         delete=True,
                         duplicates=True)
        print("bayes training mixed complete")

        wordprobs = bayes.gettokenprobs(challenge=key)
        wordprobswithoutdups = bayes.gettokenprobs(challenge=key,
                                                   duplicates=True)

        wordprobsmixed = bayes.gettokenprobs()
        wordprobsmixedwithoutdups = bayes.gettokenprobs(duplicates=True)
        print("loaded probs")
        complexBayes.trainbayes(idealisttraining, challenge=key, delete=True)
        complexBayes.trainbayes(idealisttrainingwithoutdups,
                                challenge=key,
                                delete=True,
                                duplicates=True)
        print("complex bayes training TCO complete")

        complexBayes.trainbayes(idealistmixedtraining, delete=True)
        complexBayes.trainbayes(idealistmixedtrainingwithoutdups,
                                delete=True,
                                duplicates=True)
        print("complex bayes training mixed complete")

        comwordprobs = complexBayes.gettokenprobs(challenge=key)
        comwordprobswithoutdups = complexBayes.gettokenprobs(challenge=key,
                                                             duplicates=True)

        comwordprobsmixed = complexBayes.gettokenprobs()
        comwordprobsmixedwithoutdups = complexBayes.gettokenprobs(
            duplicates=True)
        print("loaded probs complex")

        linclass, lincoeff = linearClassifier.train_linear_classificator(key)
        print(lincoeff)
        linclassmixed, lincoeffmixed = linearClassifier.train_linear_classificator(
            "all")
        print(lincoeffmixed)

        useest = USEClassifier.train_classifier(key)
        useestmixed = USEClassifier.train_classifier("all")
        print("trained USE")

        unigram_tagger, st = spamFilter.prepare_tagger()

        i = 1
        for idea in idealistchallenge[key]:
            print(i)
            idea["TRIGGERED"] = [""]
            # classify with challenge bayes with duplicates
            bayesprob = bayes.classify(idea["DESCRIPTION"], wordprobs)
            # classify with challenge bayes without duplicates
            bayesprobdup = bayes.classify(idea["DESCRIPTION"],
                                          wordprobswithoutdups)
            # classify with mixed challenge bayes with duplicates
            bayesprobmixed = bayes.classify(idea["DESCRIPTION"],
                                            wordprobsmixed)
            # classify with mixed challenge bayes without duplicates
            bayesprobmixedwithoutdup = bayes.classify(
                idea["DESCRIPTION"], wordprobsmixedwithoutdups)

            combayesprob = complexBayes.classify(idea["DESCRIPTION"],
                                                 comwordprobs)
            # classify with challenge bayes without duplicates
            combayesprobdup = complexBayes.classify(idea["DESCRIPTION"],
                                                    comwordprobswithoutdups)
            # classify with mixed challenge bayes with duplicates
            combayesprobmixed = complexBayes.classify(idea["DESCRIPTION"],
                                                      comwordprobsmixed)
            # classify with mixed challenge bayes without duplicates
            combayesprobmixedwithoutdup = complexBayes.classify(
                idea["DESCRIPTION"], comwordprobsmixedwithoutdups)

            # classify with challenge USE:
            useclass, useclassprob = USEClassifier.classify(useest, idea)
            # classify with mixed challenge USE:
            usemixedclass, usemixedclassprob = USEClassifier.classify(
                useestmixed, idea)

            idea, ideadata = spamFilter.classify_and_get_idea(
                idea, unigram_tagger, st)
            allnull = True
            for keytest in ideadata.keys():
                ideadata[keytest] = [ideadata[keytest]]
                if ideadata[keytest] == 1:
                    allnull = False
            if not allnull:
                linclasspred, linclassprob = linearClassifier.classify(
                    ideadata, linclass)
                linmixedclasspred, linmixedclassprob = linearClassifier.classify(
                    ideadata, linclassmixed)
            else:
                linclasspred, linclassprob = 0, 0
                linmixedclasspred, linmixedclassprob = 0, 0
            idea["PREDICTION"] = "Bayes: " + str(
                bayesprobdup) + ", complexBayes " + str(
                    combayesprobdup) + ", linClass: " + str(
                        linmixedclasspred) + " " + str(
                            linmixedclassprob) + ", USE: " + str(
                                useclass) + " " + str(useclassprob)
            idea["OTHERBayes"] = "BayesTCO: " + str(
                bayesprob) + ", BayesMixed " + str(
                    bayesprobmixed) + ", BayesMixed w/o dups " + str(
                        bayesprobmixedwithoutdup) + ", compl BayesTCO: " + str(
                            combayesprob) + ", compl BayesMixed: " + str(
                                combayesprobmixed
                            ) + ", compl BayesMixed w/o dups: " + str(
                                combayesprobmixedwithoutdup)
            idea["OTHERS"] = "Lin Class: " + str(linclasspred) + " " + str(
                linclassprob) + ", USE mixed: " + str(
                    usemixedclass) + " " + str(usemixedclassprob)

            i += 1
        importDataHelper.writecsvfile(
            variables.resultpath + key + "unreviewed.csv",
            idealistchallenge[key][0].keys(), idealistchallenge[key])