def getscoreofLP(labeledrecords,unlabeledrecords,shuffledbutlabeled,records): Y_actualLP = [] for i in range(len(unlabeledrecords)): Y_actualLP.append(shuffledbutlabeled[i][1]) for i in range((int)(0.7 * len(records)), len(records)): # test data included in LP Y_actualLP.append(shuffledbutlabeled[i][1]) unlabeledrecordsLP = copy.deepcopy(unlabeledrecords) temp = copy.deepcopy(records[(int)(0.7 * len(records)):]) for record in temp: unlabeledrecordsLP.append(record) for record in unlabeledrecordsLP: record[1] = -1 probabilities = runlabelprop(labeledrecords, unlabeledrecordsLP) predictedLP = [] count = 0 for p in probabilities: maxprob = max(p) if(math.isnan(maxprob)): # print 'yes' predictedLP.append(random.randint(1,len(p))) else: index = p.tolist().index(maxprob) predictedLP.append(index) count +=1 print 'done with LP, moving ahead' return metrics.f1_score(y_true=Y_actualLP, y_pred=predictedLP)
def getscoreofLP(labeledrecords, unlabeledrecords, shuffledbutlabeled, records): Y_actualLP = [] for i in range(len(unlabeledrecords)): Y_actualLP.append(shuffledbutlabeled[i][1]) for i in range((int)(0.7 * len(records)), len(records)): # test data included in LP Y_actualLP.append(shuffledbutlabeled[i][1]) unlabeledrecordsLP = copy.deepcopy(unlabeledrecords) temp = copy.deepcopy(records[(int)(0.7 * len(records)):]) for record in temp: unlabeledrecordsLP.append(record) for record in unlabeledrecordsLP: record[1] = -1 probabilities = runlabelprop(labeledrecords, unlabeledrecordsLP) predictedLP = [] count = 0 for p in probabilities: maxprob = max(p) if (math.isnan(maxprob)): # print 'yes' predictedLP.append(random.randint(1, len(p))) else: index = p.tolist().index(maxprob) predictedLP.append(index) count += 1 print 'done with LP, moving ahead' return metrics.f1_score(y_true=Y_actualLP, y_pred=predictedLP)
labeledrecords = separatedrecords[0] unlabeledrecords = separatedrecords[1] nooflabeledrecords = len(labeledrecords) f1LP = getscoreofLP(labeledrecords,unlabeledrecords,shuffledbutlabeled,records) initlabeledrecords = nooflabeledrecords probabiltythreshold = 0.99 noofiterations = 0 perofrecordslabeledperiteration = [] classifier = 'SVM' while nooflabeledrecords < len(trainingrecords): probabilities = runlabelprop(labeledrecords,unlabeledrecords) predictedclasses = runclassifier(labeledrecords,unlabeledrecords,classifier) newlyadded = 0 foundTarget = False for i in range(len(probabilities)): if probabilities[i][predictedclasses[i]-1] >= probabiltythreshold: # predictedclasses[i]-1 if no 0 class label foundTarget = True unlabeledrecords[i-newlyadded][1] = predictedclasses[i] labeledrecords.append(unlabeledrecords.pop(i-newlyadded)) newlyadded+=1 if foundTarget == False: break perofrecordslabeledperiteration.append((float(newlyadded)/nooflabeledrecords) *100 ) print 'newly labeled ' , newlyadded,
labeledrecords = separatedrecords[0] unlabeledrecords = separatedrecords[1] nooflabeledrecords = len(labeledrecords) f1LP = getscoreofLP(labeledrecords, unlabeledrecords, shuffledbutlabeled, records) initlabeledrecords = nooflabeledrecords probabiltythreshold = 0.99 noofiterations = 0 perofrecordslabeledperiteration = [] classifier = 'SVM' while nooflabeledrecords < len(trainingrecords): probabilities = runlabelprop(labeledrecords, unlabeledrecords) predictedclasses = runclassifier(labeledrecords, unlabeledrecords, classifier) newlyadded = 0 foundTarget = False for i in range(len(probabilities)): if probabilities[i][ predictedclasses[i] - 1] >= probabiltythreshold: # predictedclasses[i]-1 if no 0 class label foundTarget = True unlabeledrecords[i - newlyadded][1] = predictedclasses[i] labeledrecords.append(unlabeledrecords.pop(i - newlyadded)) newlyadded += 1 if foundTarget == False: break