def SplitTrainTest(lSamples):
    if config.get_bool('SPLIT_BY_EASY_HARD'):
        setEasy = set(data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE')));
        lTrain = filter(lambda samp: samp.pddlconn.sPddlTo in setEasy, lSamples);
        lTest = filter(lambda samp: samp.pddlconn.sPddlTo not in setEasy, lSamples);
        print "NUM Train:", len(lTrain), "Test:", len(lTest);
        return lTrain, lTest;
    if config.get_bool('SPLIT_BY_FIRST_30'):
        setFirst30 = set(data.file_to_obj(config.get_string('FIRST_30_CONNECTIONS_LIST_FILE')));
        lTrain = filter(lambda samp: samp.pddlconn.sPddlTo in setFirst30, lSamples);
        lTest = filter(lambda samp: samp.pddlconn.sPddlTo not in setFirst30, lSamples);
        print "NUM Train:", len(lTrain), "Test:", len(lTest);
        return lTrain, lTest;
    if config.get_bool('SPLIT_BY_SENTENCE'):
        lRandomIndexes, iSplit = SplitSampleBySentence(lSamples);
    else:
        lRandomIndexes = range(len(lSamples));
        random.shuffle(lRandomIndexes);
        iSplit = len(lSamples)/2;
    if config.get_bool('TRAIN_AND_TEST_ON_ALL'):
        assert(not config.get_bool('TRAIN_ON_HALF_TEST_ON_ALL'));
        lTrainIndexes = range(len(lSamples));
        lTestIndexes = range(len(lSamples));
    elif config.get_bool('TRAIN_ON_HALF_TEST_ON_ALL'):
        lTrainIndexes = lRandomIndexes[:iSplit];
        lTestIndexes = range(len(lSamples));
    elif config.get_bool('TRAIN_ON_HALF_TEST_ON_HALF'):
        lTrainIndexes = lRandomIndexes[:iSplit];
        lTestIndexes = lRandomIndexes[iSplit:];
    else:
        assert False, 'No Test Train Split Method Specified';

    lTrain = map(lambda x:lSamples[x], lTrainIndexes);
    lTest = map(lambda x:lSamples[x], lTestIndexes);
    return lTrain, lTest;
def FixObjectives(sInFile, sOutFile):
    lOldObjectives = data.file_to_obj(sInFile);
    lNewObjectives = [];
    for tObjective in lOldObjectives:
        lObjective = [tObjective[0], tObjective[1]+1];
        lNewObjectives.append(lObjective);
    data.obj_to_file(lNewObjectives,sOutFile);
def ValidateAll():
    sDomainFile = '../subgoal_learning/data/domain-no-stone-iron-tools.v120.pddl';
    sProblemPath = '../subgoal_learning/data/problems/no-stone-iron-tools-and-extra-resources/'
    sFFPath = '/home/nkushman/hierarchical_planning/ff/metric-ff-recompiled-2011-11-24-2';
    sMainPath = '/home/nkushman/hierarchical_planning/model/subgoal_learning/lhla_v4 /home/nkushman/hierarchical_planning/model/subgoal_learning/run_compute_end_state.cfg';
    lDicts = data.file_to_obj('subgoals_gold_augmented.json');
    for dInfo in lDicts:
        lSubgoalsToValidate1 = [];
        lSubgoalsToValidate2 = [ '(> (thing-available wood-pickaxe) 0)'];
        lSubgoalsToValidate3 = [ '(> (thing-available wood-pickaxe) 0)',
                                  '(> (thing-available stone) 7)'];
        lSubgoalsToValidate4 = [ '(> (thing-available wood-pickaxe) 0)',
                                  '(> (thing-available stone) 7)', 
                                  '(> (thing-available ironore) 2)',
                                  '(> (thing-available iron) 2)'];
        

        print >> open('test.tmp', 'w'), data.obj_to_string(dInfo);
        item = dInfo['file'];
        if (item.startswith('test.0.pddl') or
            item.startswith('tallgrass.34.pddl')):
            continue;
        #if not item.startswith('iron.30.pddl'):
        #    continue;
        sFullPath = sProblemPath+item;
        print "objective for:", item;
        sGoal = dInfo['subgoals-formatted'][-1];
        lSubgoalsToValidate1.append(sGoal);
        lSubgoalsToValidate2.append(sGoal);
        lSubgoalsToValidate3.append(sGoal);
        lSubgoalsToValidate4.append(sGoal);
        print "Goal:", sGoal;

        bSuccess1 = False;
        bSuccess2 = False;
        bSuccess3 = False;
        bSuccess4 = False;
        if os.path.isfile(sFullPath):
            sTempProblemPath = 'tmp-validate-all/' + item + '.subgoals1';
            #bSuccess1 = SubgoalValidator.TestSubgoals(sDomainFile, sFullPath, lSubgoalsToValidate1,
            #                                         sFFPath, sMainPath, sTempProblemPath, bOptimize = False);
            bSuccess1 = False;
            if not bSuccess1:
                sTempProblemPath = 'tmp-validate-all/' + item + '.subgoals2';
                bSuccess2 = SubgoalValidator.TestSubgoals(sDomainFile, sFullPath, lSubgoalsToValidate2,
                                                          sFFPath, sMainPath, sTempProblemPath, bOptimize = False,
                                                          iLimitSecs = 10);
                if not bSuccess2:
                    sTempProblemPath = 'tmp-validate-all/' + item + '.subgoals3';
                    bSuccess3 = SubgoalValidator.TestSubgoals(sDomainFile, sFullPath, lSubgoalsToValidate3,
                                                              sFFPath, sMainPath, sTempProblemPath, bOptimize = False,
                                                              iLimitSecs = 10);
                    if not bSuccess3:
                        sTempProblemPath = 'tmp-validate-all/' + item + '.subgoals4';
                        bSuccess4 = SubgoalValidator.TestSubgoals(sDomainFile, sFullPath, lSubgoalsToValidate4,
                                                                  sFFPath, sMainPath, sTempProblemPath,bOptimize = False,
                                                                  iLimitSecs = 10);
            bOverall = bSuccess1 or bSuccess2 or bSuccess3 or bSuccess4;
            print "File:", sFullPath, "Success:", bOverall, "1:", bSuccess1, "2:", bSuccess2, "3:", bSuccess3, "4:", bSuccess4;
            sys.stdout.flush();
def CalcConnFileFScore():
    print "Calcing FScore for:", config.get_string('CONN_FILE');
    setConnTups = predicate.ReadConnectionsFileToTupSet();
    setGoldConns = LoadGoldStringConnSet();

    if config.get_bool('ANALYZE_ON_HARD'):
        lEasy = data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE'));

    iTruePos = 0;
    iFalsePos = 0;
    for sFrom, sTo in setConnTups:
        if config.get_bool('ANALYZE_ON_HARD'):
            if sTo in lEasy:
                continue;

        if (sFrom, sTo) in setGoldConns:
            iTruePos += 1;
        else:
            iFalsePos += 1;

    iFalseNeg = len(setGoldConns)-iTruePos;
    if config.get_bool('ANALYZE_ON_HARD'):
        iFalseNeg = len(setGoldConns)-iTruePos-len(lEasy);
        
    iTrueNeg = 0;
    fPrecision = float(iTruePos)/float(iTruePos+iFalsePos) if iTruePos > 0 else 0;
    fRecall = float(iTruePos)/float(iTruePos+iFalseNeg) if iTruePos > 0 else 0;
    fScore = 2*fPrecision*fRecall/(fPrecision+fRecall) if (fPrecision*fRecall) > 0 else 0;
    print "TP:", iTruePos, "FP:", iFalsePos, "TN:", iTrueNeg, "FN:", iFalseNeg;
    print "Precision:", fPrecision;
    print "Recall:", fRecall;
    print "FScore:", fScore;
def ExtractInitGoalPreds(sProblemPath, sDomainFile, sGoldSubgoalsFile, sAugmentedGoldSubgoalsFile):
    lGold = data.file_to_obj(sGoldSubgoalsFile);
    for dGold in lGold[:min(len(lGold), MAXNUMPROBLEMS)]:
        sProblemFile = sProblemPath + dGold['file'];
        domain = pddl.Domain(sDomainFile, sProblemFile, 0);
        lInitPreds = Predicate.GetInitPredList(domain);
        lGoalPreds = Predicate.GetGoalPredList(domain);
        dGold['init'] = lInitPreds;
        dGold['goal'] = lGoalPreds;
    data.obj_to_file(lGold, sAugmentedGoldSubgoalsFile);
def AnalyzePlans():
    lCompleted = map(lambda sLine: sLine.strip(), open('gold_completed.txt').readlines());
    lSubgoalDicts = data.file_to_obj('subgoals_gold.json');
    fSuccess = open('success.txt', 'w');
    fFailed = open('fail.txt', 'w');
    for dSubgoal in lSubgoalDicts:
        sName = dSubgoal['file'].split('.')[0];
        sLine = dSubgoal['file'];
        for dObj in dSubgoal['subgoals']:
            sObj = dObj.keys()[0];
            sLine += ' ' + sObj;
        if sName in lCompleted:
            print >> fSuccess, sLine;
        else:
            print >> fFailed, sLine;
def GenTrainingSamples(sAugmentedGoldSubgoalsFile, sTrainingFile):
    lGold = data.file_to_obj(sAugmentedGoldSubgoalsFile);
    iSubgoalIndex = 0;
    lSamples = [];
    for iIndex, dGold in enumerate(lGold[:min(len(lGold), MAXNUMPROBLEMS)]):
        # skip the ones without the right answer
        print "Running on:", dGold['file'], iIndex, "out of", len(lGold);
        data.print_obj(dGold['subgoals']);
        if ('success' in dGold) and (not dGold['success']):
            continue;
        if len(dGold['subgoals']) < 2:
            print "Skipping:", dGold['file'], 'because no subgoals';
            # if there's no read "sub"-goals, then just skip this one
            continue;
        GenSamplesOneProblem(dGold, lSamples);

    return lSamples;
def GetSubgoalsForAll(bValidate, bRebuildObjectives):
    sDomainFile = '../subgoal_learning/data/domain-no-stone-iron-tools-simple-furnace.v120.pddl';
    sProblemPath = '../subgoal_learning/data/problems/no-stone-iron-tools-and-extra-resources-rand2/'
    sFFPath = '/home/nkushman/hierarchical_planning/ff/metric-ff-recompiled-2011-11-24-2';
    sMainPath = '/home/nkushman/hierarchical_planning/model/subgoal_learning/lhla_v4 /home/nkushman/hierarchical_planning/model/subgoal_learning/run_compute_end_state.cfg';
    sNumSubgoalsFile = 'thing-available_max5.gold_num_subgoals';
    dDeps = data.file_to_obj_with_comments('dep.json');
    if bRebuildObjectives:
        GetAllObjectives(sDomainFile, sProblemPath);
    lObjectives = data.file_to_obj('objectives.json');
    setObjectives = set();
    lOutput = [];
    for iIndex, tObjective in enumerate(lObjectives):
        dCurOutput = {'file':tObjective[0], 'thing':tObjective[1], 'num':tObjective[2]}
        if iIndex < 0:
            print "Skipping:", iIndex, tObjective;
            continue;
        sys.stdout.flush();
        print "****Objective:", iIndex, tObjective;
        lSubgoals = GenSubgoals(dDeps, tObjective[1], tObjective[2]);
        lSubgoalsToValidate = TransformToValidateFormat(lSubgoals);
        print "Plan:";
        data.print_obj(lSubgoalsToValidate);
        sTempProblemPath = 'tmp-no-shovel/test.' + tObjective[0] + '.subgoals';
        if bValidate:
            bSuccess = SubgoalValidator.TestSubgoals(sDomainFile, sProblemPath + tObjective[0], lSubgoalsToValidate,
                                                     sFFPath, sMainPath, sTempProblemPath, bOptimize = False);
            dCurOutput['success'] = bSuccess;
            print "Success:", bSuccess;
        # include them all
        for dSubgoal in lSubgoals:
            setObjectives.add(FormatSubgoal(dSubgoal));

        dCurOutput['subgoals'] = lSubgoals;
        dCurOutput['subgoals-formatted'] = lSubgoalsToValidate;
        dCurOutput['index'] = iIndex;
        lOutput.append(dCurOutput);
        #data.print_obj(lSubgoals);
    data.obj_to_file(lOutput, 'subgoals_gold.json');
    dIndexToPred, dPredToIndex = GenerateIndexesFromPredDict('pred_gold.txt', setObjectives);
    WriteConnectionsFile(dDeps, dPredToIndex, dIndexToPred, 'pred_gold_connections.txt');
    WriteNumSubgoalsFile(lOutput, sNumSubgoalsFile);
def LoadFullRewards():
    dConnRewards = reward.LoadFullRewardsDict();
    #print "Len:", len(dConnRewards);
    #for (sFrom, sTo), cr in dConnRewards.items():
    #    print sFrom, sTo, "Pos:", cr.iNumPos, "NE:", cr.iNumEarlierNeg, "NR:", cr.iNumNoReachNeg;
    #lGranularSamples = lGranularSamples[:100];
    assert(config.get_bool('COLLAPSE_FIRST'));
    #add the reward data to the samples themselves
    lSentences = Sentence.ReadSentencesFromTextFile();
    lGranularSamples = Sentence.GenAllGranularSamplesFromList(lSentences, 'sentences.log');
    lCollapsedSamples = Sample.CollapseSamples(lGranularSamples);
    setEasy = set(data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE')));
    lTrainCollapsed = filter(lambda sample: sample.pddlconn.sPddlTo in setEasy, lCollapsedSamples);
    lTestCollapsed = filter(lambda sample: sample.pddlconn.sPddlTo not in setEasy, lCollapsedSamples);
    
    lTrainingSamples = GenerateTrainingSamplesFromRewards(dConnRewards, lTrainCollapsed);
    if config.get_bool('SVM'):
        svm.Train(lTrainingSamples);
        svm.Test(lTestCollapsed);
    elif config.get_bool('LOG_LINEAR'):
        log_linear.TrainAndTestLogLinear(lTrainingSamples, lTestCollapsed);
    else:
        assert False;
    # remove the duplicates
    setAlreadySeen = set();
    lTestNoDups = [];
    for sample in lTestCollapsed:
        tKey = (sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo);
        if tKey not in setAlreadySeen:
            setAlreadySeen.add(tKey);
            lTestNoDups.append(sample);

    fScore, fPrec, fRecall = AnalyzePredsSimple(lTestNoDups);
    #Sample.CollapsedSample.WriteConnections(lTestNoDups, config.get_string('SVM_REWARD_CONNECTIONS_FILE'), 
    #                                        bAppend=False, bWritePredictions = True, bPosOnly = True);
    #fOut = open('debug.txt', 'w');
    #for sample in lTestCollapsed:
    #    print >> fOut, "Sample:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, sample.fPred;

    print "Precision: ", fPrec
    print "Recall: ", fRecall
    print "F-Score: ", fScore;
def CalcAllTextFScore(lGranularSamples):
    lCollapsedSamples = Sample.CollapseSamples(lGranularSamples);
    lSorted = data.file_to_obj(config.get_string('SORTED_CONNECTIONS_LIST_FILE'));
    lSorted.reverse();
    lPos = [0 for i in range(len(lSorted))];
    lNeg = [0 for i in range(len(lSorted))];
    iPosTot = 0;
    iNegTot = 0;
    for sample in lCollapsedSamples:
        if sample.bPos:
            iPosTot += 1;
        else:
            iNegTot += 1;
        for i in range(len(lSorted)):
            if sample.pddlconn.sPddlTo != lSorted[i]:
                continue;
            if sample.bPos:
                lPos[i] += 1;
            else:
                lNeg[i] += 1;
    for i in range(len(lSorted)):
        fPrecision = float(lPos[i])/float(lPos[i] + lNeg[i]) if (lPos[i] + lNeg[i]) != 0 else 0;
        print lSorted[i], fPrecision;
    print "Overall Precision:", float(iPosTot)/float(iNegTot);
def AnalyzePredsSimple(lSamples):
    if config.get_bool('FORCE_SINGLE_DIR'):
        dSamples = {};
        for sample in lSamples:
            tKey = (sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo);
            assert(tKey not in dSamples);
            dSamples[tKey] = sample;

    iNumTotal = 0;
    iNumCorrect = 0;
    iTruePos = 0;
    iFalsePos = 0;
    iTrueNeg = 0;
    iFalseNeg = 0;
    iThres = 0;
    if config.get_bool('SVM'):
        fThres = config.get_int('SVM_THRESHOLD');
    elif config.get_bool('LOG_LINEAR'):
        fThres = 0.5
    else:
        assert False;

    if config.get_bool('CALC_FSCORE_ON_GOLD'):
        setGoldStringConns = LoadGoldStringConnSet()
        iNumGold = len(setGoldStringConns);

    if config.get_bool('ANALYZE_ON_HARD'):
        lEasy = data.file_to_obj(config.get_string('EASY_CONNECTIONS_LIST_FILE'));
    fPredMin = sys.float_info.max;
    fPredMax = -sys.float_info.max;
    for sample in lSamples:
        if config.get_bool('ANALYZE_ON_HARD'):
            if sample.pddlconn.sPddlTo in lEasy:
                continue;

        if config.get_bool('TRAIN_ON_REWARD_EVAL_ON_GOLD'):
            bActual = sample.GetGoldPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL'));
        else:
            bActual = sample.GetPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL'));
        if config.get_bool('FORCE_SINGLE_DIR'):
            fPred = sample.fPred;
            tReverseKey = (sample.pddlconn.sPddlTo, sample.pddlconn.sPddlFrom);
            fReversePred = dSamples[tReverseKey].fPred if tReverseKey in dSamples else -sys.maxint;
            bNormalPred = (float(sample.fPred) > fThres);
            bPred = ((float(sample.fPred) > fThres) and (float(fPred) >= float(fReversePred)));
            if tReverseKey not in dSamples:
                print "FORCE-MISSING";
            elif (bNormalPred == bActual) and (bPred != bActual):
                print "FORCE-BAD:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred;
            elif  (bNormalPred != bActual) and (bPred == bActual):
                print "FORCE-GOOD:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred;
            else:
                print "FORCE-NEITHER:", sample.pddlconn.sPddlFrom, sample.pddlconn.sPddlTo, fPred, fReversePred;
        else:
            bPred = sample.GetPredPos(bIgnoreDir = config.get_bool('IGNORE_DIR_FOR_EVAL'));
        fPredMin = min(fPredMin, sample.fPred);
        fPredMax = max(fPredMax, sample.fPred);

        iNumTotal += 1;
        if bPred == bActual:
            iNumCorrect += 1;
        if bPred:
            if bActual:
                iTruePos += 1;
            else:
                iFalsePos += 1;
        else:
            if bActual:
                iFalseNeg += 1;
            else:
                iTrueNeg += 1;

    if config.get_bool('CALC_FSCORE_ON_GOLD'):
        iFalseNeg = iNumGold - iTruePos;
        if config.get_bool('ANALYZE_ON_HARD'):
            iFalseNeg = iNumGold - iTruePos - len(lEasy);

    fPrecision = float(iTruePos)/float(iTruePos+iFalsePos) if iTruePos > 0 else 0;
    fRecall = float(iTruePos)/float(iTruePos+iFalseNeg) if iTruePos > 0 else 0;
    fScore = 2*fPrecision*fRecall/(fPrecision+fRecall) if (fPrecision*fRecall) > 0 else 0;
    print "FPred: min:", fPredMin, "max:", fPredMax;
    print "FScore:", fScore, fPrecision, fRecall;
    print "Frac Correct:", float(iNumCorrect)/float(iNumTotal), iNumCorrect, iNumTotal;
    print "TP:", iTruePos, "FP:", iFalsePos, "TN:", iTrueNeg, "FN:", iFalseNeg;
    print "FracPos:", float(iTruePos+iFalsePos)/float(iTrueNeg+iFalseNeg+iTruePos+iFalsePos);
    return fScore, fPrecision, fRecall;