Exemple #1
0
def getscoreofLP(labeledrecords,unlabeledrecords,shuffledbutlabeled,records):

    Y_actualLP = []
    
    for i in range(len(unlabeledrecords)):
        Y_actualLP.append(shuffledbutlabeled[i][1])
    
    for i in range((int)(0.7 * len(records)), len(records)): # test data included in LP
        Y_actualLP.append(shuffledbutlabeled[i][1])
    
    unlabeledrecordsLP = copy.deepcopy(unlabeledrecords)
        
    temp = copy.deepcopy(records[(int)(0.7 * len(records)):])
    for record in temp:
        unlabeledrecordsLP.append(record)
    
    for record in unlabeledrecordsLP:
        record[1] = -1
    
    
    probabilities = runlabelprop(labeledrecords, unlabeledrecordsLP)
    predictedLP = []
    count = 0
    for p in probabilities:
        maxprob = max(p)
        if(math.isnan(maxprob)):
#             print 'yes'
            predictedLP.append(random.randint(1,len(p)))
        else:
            index = p.tolist().index(maxprob)
            predictedLP.append(index)
        count +=1
    
    print 'done with LP, moving ahead'
    return metrics.f1_score(y_true=Y_actualLP, y_pred=predictedLP)
Exemple #2
0
def getscoreofLP(labeledrecords, unlabeledrecords, shuffledbutlabeled,
                 records):

    Y_actualLP = []

    for i in range(len(unlabeledrecords)):
        Y_actualLP.append(shuffledbutlabeled[i][1])

    for i in range((int)(0.7 * len(records)),
                   len(records)):  # test data included in LP
        Y_actualLP.append(shuffledbutlabeled[i][1])

    unlabeledrecordsLP = copy.deepcopy(unlabeledrecords)

    temp = copy.deepcopy(records[(int)(0.7 * len(records)):])
    for record in temp:
        unlabeledrecordsLP.append(record)

    for record in unlabeledrecordsLP:
        record[1] = -1

    probabilities = runlabelprop(labeledrecords, unlabeledrecordsLP)
    predictedLP = []
    count = 0
    for p in probabilities:
        maxprob = max(p)
        if (math.isnan(maxprob)):
            #             print 'yes'
            predictedLP.append(random.randint(1, len(p)))
        else:
            index = p.tolist().index(maxprob)
            predictedLP.append(index)
        count += 1

    print 'done with LP, moving ahead'
    return metrics.f1_score(y_true=Y_actualLP, y_pred=predictedLP)
Exemple #3
0
labeledrecords = separatedrecords[0]
unlabeledrecords = separatedrecords[1]

nooflabeledrecords = len(labeledrecords)

f1LP = getscoreofLP(labeledrecords,unlabeledrecords,shuffledbutlabeled,records)


initlabeledrecords = nooflabeledrecords
probabiltythreshold = 0.99
noofiterations = 0
perofrecordslabeledperiteration = []
classifier = 'SVM'

while nooflabeledrecords < len(trainingrecords):
    probabilities = runlabelprop(labeledrecords,unlabeledrecords)
    predictedclasses = runclassifier(labeledrecords,unlabeledrecords,classifier)
    newlyadded = 0
    foundTarget = False
    for i in range(len(probabilities)):
        if probabilities[i][predictedclasses[i]-1] >= probabiltythreshold: # predictedclasses[i]-1 if no 0 class label
            foundTarget = True
            unlabeledrecords[i-newlyadded][1] = predictedclasses[i]
            labeledrecords.append(unlabeledrecords.pop(i-newlyadded))
            newlyadded+=1
    
    if foundTarget == False:
        break
    
    perofrecordslabeledperiteration.append((float(newlyadded)/nooflabeledrecords) *100 )
    print 'newly labeled ' , newlyadded,
Exemple #4
0
labeledrecords = separatedrecords[0]
unlabeledrecords = separatedrecords[1]

nooflabeledrecords = len(labeledrecords)

f1LP = getscoreofLP(labeledrecords, unlabeledrecords, shuffledbutlabeled,
                    records)

initlabeledrecords = nooflabeledrecords
probabiltythreshold = 0.99
noofiterations = 0
perofrecordslabeledperiteration = []
classifier = 'SVM'

while nooflabeledrecords < len(trainingrecords):
    probabilities = runlabelprop(labeledrecords, unlabeledrecords)
    predictedclasses = runclassifier(labeledrecords, unlabeledrecords,
                                     classifier)
    newlyadded = 0
    foundTarget = False
    for i in range(len(probabilities)):
        if probabilities[i][
                predictedclasses[i] -
                1] >= probabiltythreshold:  # predictedclasses[i]-1 if no 0 class label
            foundTarget = True
            unlabeledrecords[i - newlyadded][1] = predictedclasses[i]
            labeledrecords.append(unlabeledrecords.pop(i - newlyadded))
            newlyadded += 1

    if foundTarget == False:
        break