def createTrainingSupervisedDataSet(self,msrcImages , scale , keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels , "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures , numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0,numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label-1] = 1 dataSetTrain.addSample(feature , binaryLabels) print "\tAdded" , np.size(trainData) , " labeled data points to DataSet." return dataSetTrain
def main(self): [dataMat, labelMat ] = FeatureGenerator.FeatureParser(self.messageBook, self.limitBook, self.timeFrame, self.useCachedFiles).parse() print "Generated Features of size: (%d,%d)" % dataMat.shape self.labelMat = labelMat count = 0 self.predictMat = numpy.zeros((labelMat.shape)) for obs in dataMat: #an observation is a row in the dataMat self.cache.add(obs) if self.cache.needUpdate(): print "huh" self.model = self.trainer.train(self.cache.getData()) prediction = self.model.predict(obs) self.predictMat[count, 0] = prediction count += 1 if count % 1000 == 0: print "On Obs: %d" % count self.trader.trade(Trader.DataPt(obs), prediction) self.printAnalysis()
def __init__(self, config, db=None, stats=None, dmodel=None, load_path=None, w2v=None): if type(config) == str or type(config) == unicode: with open(config) as data_file: self._config = json.load(data_file) else: self._config = config self._dmodel = dmodel print "GBRT params:", self._config['hyper_patameters'] if load_path is None: self._model = GradientBoostingClassifier(loss=self._config['hyper_patameters']['loss'], learning_rate=self._config['hyper_patameters']['learning_rate'], n_estimators=self._config['hyper_patameters']['n_estimators'], max_depth=self._config['hyper_patameters']['max_depth'], max_features=None) else: self.loadModel(load_path) self._feature_generator = \ FeatureGenerator(mention_features=self._config['features']['mention_features'], entity_features=self._config['features']['entity_features'], yamada_embedding_path=ProjectSettings.getPath()[0] + self._config['features']['yamada_embedding_path'], stats=stats, db=db, dmodel=dmodel, w2v=w2v) self._train_X = [] self._train_Y = [] self._db = db
def createTrainingSupervisedDataSet(self, msrcImages, scale, keepClassDistTrain): print "\tSplitting MSRC data into train, test, valid data sets." splitData = pomio.splitInputDataset_msrcData(msrcImages, scale, keepClassDistTrain) print "\tNow generating features for each training image." trainData = FeatureGenerator.processLabeledImageData(splitData[0], ignoreVoid=True) features = trainData[0] numDataPoints = np.shape(features)[0] numFeatures = np.shape(features)[1] labels = trainData[1] numLabels = np.size(labels) #!!error! nb unique labels, or max label assert numDataPoints == numLabels, "Number of feature data points and number of labels not equal!" dataSetTrain = ClassificationDataSet(numFeatures, numClasses) print "\tNow adding all data points to the ClassificationDataSet..." for idx in range(0, numDataPoints): feature = trainData[0][idx] label = trainData[1][idx] binaryLabels = np.zeros(numClasses) # to cope with the removal of void class (idx 13) if label < voidClass: binaryLabels[label] = 1 else: binaryLabels[label - 1] = 1 dataSetTrain.addSample(feature, binaryLabels) print "\tAdded", np.size(trainData), " labeled data points to DataSet." return dataSetTrain
def train(xml_file, con_file, dep_file, alg, concept, classifier_pickle): aus = get_annotation_units(xml_file) aus = UnifiedReader(aus, con_file, dep_file) aus = instance_filter(aus, None, True, concept) fss_n_lists = map(lambda x: FeatureGenerator.get_featuresets(x, concept), [aus]) print fss_n_lists[0][0][1] classifier = nltk.MaxentClassifier.train(fss_n_lists[0], alg, trace=0, max_iter=1000) print len(classifier.labels()), classifier.labels pickle_out = open(classifier_pickle, 'wb') pickle.dump(classifier, pickle_out) pickle_out.close()
def featureGen(): """ Desc : Genarate Features """ print('\n ********** Feature Generator ***********') fileName='D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Train_Review.csv' mapFile='D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Business_File.csv' writeFile='D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Generated_Features.csv' oFeature=feature.featureGen() oFeature.getBusinesSToCato(mapFile) oFeature.generateFeature(fileName,writeFile)
def featureGen(): """ Desc : Genarate Features """ print('\n ********** Feature Generator ***********') fileName = 'D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Train_Review.csv' mapFile = 'D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Business_File.csv' writeFile = 'D:\\Fall 2015\\Search\\YELP Challenge\\Yelp-Dataset-Challenge-2015\\Task_2\\Generated_Features.csv' oFeature = feature.featureGen() oFeature.getBusinesSToCato(mapFile) oFeature.generateFeature(fileName, writeFile)
def __init__(self, config, db=None, stats=None, models_as_features=None, load_path=None, w2v=None, base_path="../"): if type(config) == str or type(config) == unicode: with open(config) as data_file: self._config = json.load(data_file) else: self._config = config self.models_as_features = models_as_features print "GBRT params:", self._config['hyper_patameters'] if load_path is None: self._model = GradientBoostingClassifier( loss=self._config['hyper_patameters']['loss'], learning_rate=self._config['hyper_patameters'] ['learning_rate'], n_estimators=self._config['hyper_patameters']['n_estimators'], max_depth=self._config['hyper_patameters']['max_depth'], max_features=None) else: self.loadModel(load_path) self._feature_generator = \ FeatureGenerator(feature_names=self._config['features']['feature_names'], yamada_embedding_path=base_path + self._config['features']['yamada_embedding_path'], # yamada_id2title_path=ProjectSettings.getPath()[0] + # self._config['features']['yamada_title2id_path'], stats=stats, db=db, models_as_features=self.models_as_features, w2v=w2v) self._train_X = [] self._train_Y = [] self._db = db
def computePixelFeatures(rgbImage, ftype): res = [] if ftype == 'classic': res = FeatureGenerator.generatePixelFeaturesForImage(rgbImage) else: raise Exception('Invalid feature type "%s"' % ftype) assert res.shape[1] > 0 assert res.shape[0] == rgbImage.shape[0] * rgbImage.shape[1] assert np.all(np.isfinite(res)) return res
def computePixelFeatures( rgbImage, ftype ): res = [] if ftype == 'classic': res = FeatureGenerator.generatePixelFeaturesForImage( rgbImage ) else: raise Exception('Invalid feature type "%s"' % ftype) assert res.shape[1] > 0 assert res.shape[0] == rgbImage.shape[0]*rgbImage.shape[1] assert np.all( np.isfinite( res ) ) return res
def predictSuperPixelLabels(classifier, image,numberSuperPixels, superPixelCompactness): print "\n**Computing super pixel labelling for input image" # Get superpixels spgraph = SuperPixels.computeSuperPixelGraph(image,'slic',[numberSuperPixels, superPixelCompactness]) imgSuperPixelsMask = spgraph.m_labels imgSuperPixels = spgraph.m_nodes numberImgSuperPixels = len(imgSuperPixels) print "**Image contains", numberImgSuperPixels, "superpixels" # Get superpixel features superPixelFeatures = FeatureGenerator.generateSuperPixelFeatures(image, imgSuperPixelsMask, None) assert np.shape(superPixelFeatures)[0] == numberImgSuperPixels, "Number of superpixels in feature array != number super pixels in image!:: " + str(np.shape(superPixelFeatures)[0]) + " vs. " + str(numberImgSuperPixels) superPixelLabels = classifier.predict( superPixelFeatures ) return (superPixelLabels, spgraph)
def generateImagePredictionClassDist(rgbImage, classifier, requireAllClasses=True): """This image takes an RGB image as an (i,j,3) numpy array, a scikit-learn classifier and produces probability distribution over each pixel and class. Returns an (i,j,N) numpy array where N= total number of classes for use in subsequent modelling.""" # TODO Broaden to cope with more classifiers :) #assert (str(type(classifier)) == "<class 'sklearn.linear_model.logistic.LogisticRegression'>") , "Check classifier type value:: " + str(type(classifier)) testClassifier = None imageDimensions = rgbImage[:, :, 0].shape nbCols = imageDimensions[1] nbRows = imageDimensions[0] #params = classifier.get_params(deep=True) #print "Classifier paras::" , params # Take image, generate features, use classifier to predict labels, ensure normalised dist and shape to (i,j,N) np.array # generate predictions for the image # todo: replace with features.computePixelFeatures JRS imagePixelFeatures = FeatureGenerator.generatePixelFeaturesForImage( rgbImage) #print imagePixelFeatures predictedPixelLabels = classifier.predict(imagePixelFeatures) predictionProbs = classifier.predict_proba(imagePixelFeatures) print "\nShape of predicted labels::", np.shape(predictedPixelLabels) print "\nShape of prediction probs::", np.shape(predictionProbs) numClasses = pomio.getNumClasses() assert not requireAllClasses or \ (np.shape(predictionProbs)[1] == numClasses or \ np.shape(predictionProbs)[1] == numClasses+1) , \ "Classifer prediction does not match all classes (23 or 24):: " + \ str(np.shape(predictionProbs)[1]) print predictionProbs #!!predictionProbs = np.reshape(predictionProbs, (nbCols, nbRows, numClasses )) print 'reshaping to ', (nbCols, nbRows, predictionProbs.shape[1]) predictionProbs = np.reshape(predictionProbs, (nbRows, nbCols, predictionProbs.shape[1])) return predictionProbs
def classify(txt_file, con_file, dep_file, concept, classifier_pickle, output_file): #print >> sys.stderr, "1" aus = get_annotation_units_from_txt(txt_file) aus = UnifiedReader(aus, con_file, dep_file) fss_n_lists = map(lambda x: FeatureGenerator.get_featuresets(x, concept), [aus]) #print >> sys.stderr, "2" pickle_in = open(classifier_pickle, 'rb') classifier = pickle.load(pickle_in) pickle_in.close() #labels = classifier.labels() #print >> sys.stderr, "3" fout = codecs.open(output_file, mode='w', encoding='utf-8') for fs, l in fss_n_lists[0]: prob_dist = classifier.prob_classify(fs) label = prob_dist.max() #print >> fout, '\t'.join(['%s\t%f' % (x, prob_dist.prob(x)) for x in labels]) print >> fout, '%s\t%f' % (label, prob_dist.prob(label))
def generateImagePredictionClassDist(rgbImage, classifier, requireAllClasses=True): """This image takes an RGB image as an (i,j,3) numpy array, a scikit-learn classifier and produces probability distribution over each pixel and class. Returns an (i,j,N) numpy array where N= total number of classes for use in subsequent modelling.""" # TODO Broaden to cope with more classifiers :) #assert (str(type(classifier)) == "<class 'sklearn.linear_model.logistic.LogisticRegression'>") , "Check classifier type value:: " + str(type(classifier)) testClassifier = None imageDimensions = rgbImage[:,:,0].shape nbCols = imageDimensions[1] nbRows = imageDimensions[0] #params = classifier.get_params(deep=True) #print "Classifier paras::" , params # Take image, generate features, use classifier to predict labels, ensure normalised dist and shape to (i,j,N) np.array # generate predictions for the image # todo: replace with features.computePixelFeatures JRS imagePixelFeatures = FeatureGenerator.generatePixelFeaturesForImage(rgbImage) #print imagePixelFeatures predictedPixelLabels = classifier.predict(imagePixelFeatures) predictionProbs = classifier.predict_proba(imagePixelFeatures) print "\nShape of predicted labels::" , np.shape(predictedPixelLabels) print "\nShape of prediction probs::" , np.shape(predictionProbs) numClasses = pomio.getNumClasses() assert not requireAllClasses or \ (np.shape(predictionProbs)[1] == numClasses or \ np.shape(predictionProbs)[1] == numClasses+1) , \ "Classifer prediction does not match all classes (23 or 24):: " + \ str(np.shape(predictionProbs)[1]) print predictionProbs #!!predictionProbs = np.reshape(predictionProbs, (nbCols, nbRows, numClasses )) print 'reshaping to ', (nbCols, nbRows, predictionProbs.shape[1] ) predictionProbs = np.reshape(predictionProbs, (nbRows, nbCols, predictionProbs.shape[1] )) return predictionProbs
def ARFFPrinter(aus, concept, outFile): #fss_n_lists = map(lambda x: FeatureGenerator.get_featuresets(x, concept), [aus]) featuresets = FeatureGenerator.get_featuresets(aus, concept) # calculation for header attDict = dict() for featureset in featuresets: for key, value in featureset[0].items(): try: attDict[key].add(value) except KeyError: attDict[key] = set([value]) attributes = attDict.keys() fout = open(outFile, 'w') # print header print >> fout, "@relation %s"%concept for attribute in attributes: if attribute.startswith('contain-'): dataType = '{True, False}' else: dataType = 'string' print >> fout, '@attribute "%s" %s'%(re.sub('"','\\"',attribute), dataType) if concept is 'CCS': classes = 'unidentifiable normalTOcancer cancerTOnormal' elif concpet is 'PT': classes = 'observation causality' else: raise ValueError print >> fout, '@attribute %s {%s}'%(concept, classes) #print data print >> fout, "@data" for featureset in featuresets: dataLine = "" for attribute in attributes: try: dataLine += '"'+re.sub('"','\\"',unicode(featureset[0][attribute]).encode('ascii','ignore'))+'"'+',' except KeyError: dataLine += 'False'+',' dataLine += featureset[1] print >> fout, dataLine fout.close()
def predictSuperPixelLabels(classifier, image, numberSuperPixels, superPixelCompactness): print "\n**Computing super pixel labelling for input image" # Get superpixels spgraph = SuperPixels.computeSuperPixelGraph( image, 'slic', [numberSuperPixels, superPixelCompactness]) imgSuperPixelsMask = spgraph.m_labels imgSuperPixels = spgraph.m_nodes numberImgSuperPixels = len(imgSuperPixels) print "**Image contains", numberImgSuperPixels, "superpixels" # Get superpixel features superPixelFeatures = FeatureGenerator.generateSuperPixelFeatures( image, imgSuperPixelsMask, None) assert np.shape( superPixelFeatures )[0] == numberImgSuperPixels, "Number of superpixels in feature array != number super pixels in image!:: " + str( np.shape(superPixelFeatures)[0]) + " vs. " + str(numberImgSuperPixels) superPixelLabels = classifier.predict(superPixelFeatures) return (superPixelLabels, spgraph)
def predictSuperPixelLabels(classifier, image,numberSuperPixels, \ superPixelCompactness, makeProbabilities ): print "\n**Computing super pixel labelling for input image" outProbs = None # Get superpixels spgraph = superPixels.computeSuperPixelGraph(image,'slic',[numberSuperPixels, superPixelCompactness]) imgSuperPixelsMask = spgraph.m_labels imgSuperPixels = spgraph.m_nodes numberImgSuperPixels = len(imgSuperPixels) print "**Image contains", numberImgSuperPixels, "superpixels" # Get superpixel features # todo: replace with features.computeSuperPixelFeatures JRS superPixelFeatures = FeatureGenerator.generateSuperPixelFeatures(image, imgSuperPixelsMask, None) assert np.shape(superPixelFeatures)[0] == numberImgSuperPixels, "Number of superpixels in feature array != number super pixels in image!:: " + str(np.shape(superPixelFeatures)[0]) + " vs. " + str(numberImgSuperPixels) superPixelLabels = classifier.predict( superPixelFeatures ) if makeProbabilities: outProbs = classification.classProbsOfFeatures( superPixelFeatures, classifier, requireAllClasses=False ) return (superPixelLabels, spgraph, outProbs)
print "\nProcessing " + str(scale*100) + \ "% of MSRC data on a 60/20/20 split serialised for easier file IO" splitData = pomio.splitInputDataset_msrcData( msrcImages, datasetScale=scale, keepClassDistForTraining=True, trainSplit=0.6, validationSplit=0.2, testSplit=0.2) validationDataset = splitData[1] testDataset = splitData[2] if doVal: print "Processing validation data::" validationData = FeatureGenerator.processLabeledImageData( validationDataset, ignoreVoid=True) if doTest: print "Processing test data::" testingData = FeatureGenerator.processLabeledImageData( testDataset, ignoreVoid=True) else: # Just training data splitData = [msrcImages, None, None] # prepare training data trainDataset = splitData[0] trainLabels = None for idx in range(0, np.size(trainDataset)):
if __name__ == "__main__": # Create network print "*Creating neural net" net = createDefaultNeuralNet() msrcData = "/home/amb/dev/mrf/data/MSRC_ObjCategImageDatabase_v2" print "\n*Creating training dataset" labeledData = createTrainingSupervisedDataSet(msrcData, 0.05, True) print "\n*Training network via backpropogation" trainingResult = trainNetworkBackprop(net, labeledData) net = trainingResult[0] trainer = trainingResult[1] predictImage = pomio.msrc_loadImages(msrcData)[1] print "\n*Read in an image from the MSRC dataset::", np.shape( predictImage.m_img) # todo: replace with features.computePixelFeatures JRS imageFeatures = FeatureGenerator.generatePixelFeaturesForImage( predictImage.m_img) print "\n*Using neural net to predict class label::" prediction = predictClass(imageFeatures, net) print prediction
print('Averaging weights...') for i in range(0, len(prev_weights)): prev_weights[i].divide(counter) return prev_weights if __name__ == '__main__': train = sys.argv[1] test = sys.argv[2] out = sys.argv[3] T = 3 print('Extracting training features...') sentences = FeatureGenerator.process_file(train) weights = [] n_additional_feats = len(sentences[0][0].additional_feats) print('Training...') for tag in range(0, len(NERTag.__members__)): new_weight = Weight.Weight(len(NERTag.__members__)) weights.append(new_weight) weights = train_viterbi_avg_perceptron(weights, T, sentences) #weights = train_perceptron(weights, T, sentences) accuracy = 0 n_sentences = 0
def getSuperPixelData(msrcImages,numberSuperPixels, superPixelCompactness): # Should probably make this a call to pomio in case the ordering changes in the future... voidClassLabel = pomio.getVoidIdx() numberImages = len(msrcImages) # for each image: # determine superpixel label (discard if void) # compute superpixel features of valid superpixels # append features to cumulative array of all super pixel features # append label to array of all labels superPixelFeatures = None superPixelLabels = np.array([], int) # used for superpixel labels numberVoidSuperPixels = 0 # keep track of void superpixels nbClasses = pomio.getNumClasses() classAdjCounts = np.zeros( (nbClasses, nbClasses) ) adjCountsTotal = 0 adjVoidCountsTotal = 0 for imgIdx in range(0, numberImages): superPixelIgnoreList = np.array([], int) # this is used to skip over the superpixel in feature processing print "\n**Processing Image#" , (imgIdx + 1) , " of" , numberImages # get raw image and ground truth labels img = msrcImages[imgIdx].m_img imgPixelLabels = msrcImages[imgIdx].m_gt # create superpixel map and graph for image spgraph = SuperPixels.computeSuperPixelGraph( img, 'slic', [numberSuperPixels, superPixelCompactness] ) imgSuperPixelMask = spgraph.m_labels imgSuperPixels = spgraph.m_nodes numberImgSuperPixels = spgraph.getNumSuperPixels() # create superpixel exclude list & superpixel label array allSPClassLabels = [] for spIdx in range(0, numberImgSuperPixels): superPixelValue = imgSuperPixels[spIdx] #print "\tINFO: Processing superpixel =", superPixelValue , " of" , numberImgSuperPixels, " in image" # Assume superpixel labels are sequence of integers superPixelValueMask = (imgSuperPixelMask == superPixelValue ) # Boolean array for indexing superpixel-pixels superPixelLabel = assignClassLabelToSuperPixel(superPixelValueMask, imgPixelLabels) allSPClassLabels.append( superPixelLabel) if(superPixelLabel == voidClassLabel): # add to ignore list, increment void count & do not add to superpixel label array superPixelIgnoreList = np.append(superPixelIgnoreList, superPixelValue) numberVoidSuperPixels = numberVoidSuperPixels + 1 else: superPixelLabels = np.append(superPixelLabels, superPixelLabel) assert len(allSPClassLabels) == numberImgSuperPixels (theseClassAdjCounts,adjVoidCount,adjCount) = spgraph.countClassAdjacencies( nbClasses, allSPClassLabels ) classAdjCounts += theseClassAdjCounts adjCountsTotal += adjCount adjVoidCountsTotal += adjVoidCount # Now we have the superpixel labels, and an ignore list of void superpixels - time to get the features! imgSuperPixelFeatures = FeatureGenerator.generateSuperPixelFeatures(img, imgSuperPixelMask, excludeSuperPixelList=superPixelIgnoreList) if superPixelFeatures == None: superPixelFeatures = imgSuperPixelFeatures; else: # stack the superpixel features into a single list superPixelFeatures = np.vstack( [ superPixelFeatures, imgSuperPixelFeatures ] ) assert np.shape(superPixelFeatures)[0] == np.shape(superPixelFeatures)[0] , "Number of samples != number labels" print "\n**Processed total of" , numberImages, "images" print " %d out of %d adjacencies were ignored due to void (%.2f %%)" % \ (adjVoidCountsTotal, adjCountsTotal, \ 100.0*adjVoidCountsTotal/adjCountsTotal) # Now return the results return [ superPixelFeatures, superPixelLabels, classAdjCounts ]
adjProbs += 10.0 # Now turn it into normalised probabilities. # todo: hey but this is not normalised for default class probability! adjProbs /= adjProbs.sum() # transform adjProbs = -np.log( adjProbs ) else: adjProbs = None # prefer to merge regions with high degree if args.nbrPotentialMethod == 'adjacencyAndDegreeSensitive': assert adjProbs != None, 'You asked for neighbour potential method "%s", but no adjacency probs specified'\ % args.nbrPotentialMethod print 'Computing superpixel features...' ftrs = FeatureGenerator.generateSuperPixelFeatures( imgRGB, spix.m_labels, [] ) print 'Computing class probabilities...' classProbs = bonzaClass.classProbsOfFeatures(ftrs,clfr,\ requireAllClasses=False) if args.verbose: plt.interactive(1) if adjProbs != None: plt.figure() plt.imshow(np.log(1+adjProbs), cmap=cm.get_cmap('gray'), interpolation='none') plt.title('Adjacency probabilities') plt.waitforbuttonpress() plt.figure()
def n_fold_test(n_folds, xml_file, con_file, dep_file, alg, concept, classification_method, multiple_cancer_terms, unique_pmids, dup_pmids_in_one_fold, classifier_pickle): # instance filtering according to the options aus = get_annotation_units(xml_file) aus = UnifiedReader(aus, con_file, dep_file) aus = instance_filter(aus, classification_method, multiple_cancer_terms, concept) # divide into n sets n_lists = fold_divider(n_folds, aus, unique_pmids, dup_pmids_in_one_fold) # convert annotation units into feature sets fss_n_lists = map(lambda x: FeatureGenerator.get_featuresets(x, concept), n_lists) print fss_n_lists[0][0][1] # N-fold cross validation results = [] classifiers = [] #threads = [] for i in range(n_folds): results.append(0) classifiers.append(0) start = time.time() for i in range(len(fss_n_lists)): one_fold_test(i, fss_n_lists, results, classifiers, alg) #threads.append(threading.Thread(target=one_fold_test, args=(i, fss_n_lists, results, classifiers, alg))) #threads[i].start() #for i in range(len(fss_n_lists)): # threads[i].join() print '#fold\taccuracy\ttrain_time\ttest_time' for i in range(len(fss_n_lists)): print 'fold_%s\t%s\t%s\t%s'%(i, results[i][0], results[i][1], results[i][2]), results[i][3] acc_sum, t_train_sum, t_test_sum = reduce(lambda x, y: (x[0]+y[0],x[1]+y[1],x[2]+y[2]),results) print 'average\t%s\t%s\t%s\t' % (float(acc_sum/n_folds), float(t_train_sum/n_folds), float(t_test_sum/n_folds)) print 'total elapsed time: %d' % (time.time()-start) # for excel print 'accuracy' for i in range(len(fss_n_lists)): print results[i][0] print float(acc_sum/n_folds) classes = list() for i in range(len(results[0][3])): classes.append(results[0][3][i][0]) for clas in classes: print clas print 'precision' for i in range(len(fss_n_lists)): for numbers in results[i][3]: if numbers[0] == clas: print numbers[1] print 'recall' for i in range(len(fss_n_lists)): for numbers in results[i][3]: if numbers[0] == clas: print numbers[2] print 'f' for i in range(len(fss_n_lists)): for numbers in results[i][3]: if numbers[0] == clas: print numbers[3] pickle_out = open(classifier_pickle, 'wb') pickle.dump(classifiers, pickle_out) pickle_out.close()
ex = data[0] plt.figure() plt.imshow(ex.m_img) plt.title('original image') plt.figure() clrs = [[z/255.0 for z in c[1]] for c in pomio.msrc_classToRGB] pomio.showLabels( ex.m_gt ) plt.title('ground truth labels' ) print 'unique class labels: ', np.unique(ex.m_gt) # generate features imagePixelFeatures = FeatureGenerator.generatePixelFeaturesForImage(ex.m_img) # For each feature, how many distinct values? for i in range(imagePixelFeatures.shape[1]): print "Feature %d has %d distinct values" \ % (i, len(np.unique( imagePixelFeatures[:,i])) ) # Plot a selection of features sel = np.arange(26,30) # sel = range(80,86) # colours have to be on range 0-1 plt.figure() # just plot some of the data for clarity nbpts = 2000 ptsz = 5 whichPts = np.random.choice( imagePixelFeatures.shape[0], nbpts, replace=False)
class GBRTModel: def __init__(self, config, db=None, stats=None, dmodel=None, load_path=None, w2v=None): if type(config) == str or type(config) == unicode: with open(config) as data_file: self._config = json.load(data_file) else: self._config = config self._dmodel = dmodel print "GBRT params:", self._config['hyper_patameters'] if load_path is None: self._model = GradientBoostingClassifier(loss=self._config['hyper_patameters']['loss'], learning_rate=self._config['hyper_patameters']['learning_rate'], n_estimators=self._config['hyper_patameters']['n_estimators'], max_depth=self._config['hyper_patameters']['max_depth'], max_features=None) else: self.loadModel(load_path) self._feature_generator = \ FeatureGenerator(mention_features=self._config['features']['mention_features'], entity_features=self._config['features']['entity_features'], yamada_embedding_path=ProjectSettings.getPath()[0] + self._config['features']['yamada_embedding_path'], stats=stats, db=db, dmodel=dmodel, w2v=w2v) self._train_X = [] self._train_Y = [] self._db = db def getPredictor(self): return PointwisePredict(self) def predict(self, mention, candidate1, candidate2=None): if candidate2 is not None: raise "Unsupported operation" # create feature_vec from mention and candidate and predic prob for pointwise predictor feature_vec = self._feature_generator.getPointwiseFeatures(mention, candidate1) Y = self._model.predict_proba(np.asarray(feature_vec).reshape(1, -1)) with open('feature_set.txt', 'a') as f: f.write(' -> ' + str(Y[0][1]) + '\n') return Y[0][1] def train(self, mention, candidate1, candidate2, correct): ''' Gathers mention and candidate features into a dataFrame :param mention: :param candidate1: suppose to be None :param candidate2: None :param correct: :return: only builds the _train_df ''' self._train_X.append(self._feature_generator.getPointwiseFeatures(mention, candidate1)) self._train_Y.append(1.0 if correct == candidate1 else 0.0) def finalize(self): ''' trains the model over accumulated _train_df :return: ''' trainX = np.array(self._train_X) trainy = np.array(self._train_Y) print "fitting gbrt model (", len(self._train_Y), "samples)" self._model.fit(trainX, trainy) def saveModel(self, fname): pickle.dump(self._model, open(fname + ".gbrtmodel", "wb")) def loadModel(self, fname): self._model = pickle.load(open(fname + ".gbrtmodel", "rb"))
# get particular image we like ex = data[0] plt.figure() plt.imshow(ex.m_img) plt.title('original image') plt.figure() clrs = [[z / 255.0 for z in c[1]] for c in pomio.msrc_classToRGB] pomio.showLabels(ex.m_gt) plt.title('ground truth labels') print 'unique class labels: ', np.unique(ex.m_gt) # generate features imagePixelFeatures = FeatureGenerator.generatePixelFeaturesForImage(ex.m_img) # For each feature, how many distinct values? for i in range(imagePixelFeatures.shape[1]): print "Feature %d has %d distinct values" \ % (i, len(np.unique( imagePixelFeatures[:,i])) ) # Plot a selection of features sel = np.arange(12, 29, 2) # sel = range(80,86) # colours have to be on range 0-1 plt.figure() # just plot some of the data for clarity nbpts = 2000 ptsz = 5 whichPts = np.random.choice(imagePixelFeatures.shape[0], nbpts, replace=False)
adjProbs += 10.0 # Now turn it into normalised probabilities. # todo: hey but this is not normalised for default class probability! adjProbs /= adjProbs.sum() # transform adjProbs = -np.log(adjProbs) else: adjProbs = None # prefer to merge regions with high degree if args.nbrPotentialMethod == 'adjacencyAndDegreeSensitive': assert adjProbs != None, 'You asked for neighbour potential method "%s", but no adjacency probs specified'\ % args.nbrPotentialMethod print 'Computing superpixel features...' ftrs = FeatureGenerator.generateSuperPixelFeatures(imgRGB, spix.m_labels, []) print 'Computing class probabilities...' classProbs = bonzaClass.classProbsOfFeatures(ftrs,clfr,\ requireAllClasses=False) if args.verbose: plt.interactive(1) if adjProbs != None: plt.figure() plt.imshow(np.log(1 + adjProbs), cmap=cm.get_cmap('gray'), interpolation='none') plt.title('Adjacency probabilities') plt.waitforbuttonpress()
def getSuperPixelData(msrcImages, numberSuperPixels, superPixelCompactness): # Should probably make this a call to pomio in case the ordering changes in the future... voidClassLabel = pomio.getVoidIdx() numberImages = len(msrcImages) # for each image: # determine superpixel label (discard if void) # compute superpixel features of valid superpixels # append features to cumulative array of all super pixel features # append label to array of all labels superPixelFeatures = None superPixelLabels = np.array([], int) # used for superpixel labels numberVoidSuperPixels = 0 # keep track of void superpixels nbClasses = pomio.getNumClasses() classAdjCounts = np.zeros((nbClasses, nbClasses)) adjCountsTotal = 0 adjVoidCountsTotal = 0 for imgIdx in range(0, numberImages): superPixelIgnoreList = np.array( [], int ) # this is used to skip over the superpixel in feature processing print "\n**Processing Image#", (imgIdx + 1), " of", numberImages # get raw image and ground truth labels img = msrcImages[imgIdx].m_img imgPixelLabels = msrcImages[imgIdx].m_gt # create superpixel map and graph for image spgraph = SuperPixels.computeSuperPixelGraph( img, 'slic', [numberSuperPixels, superPixelCompactness]) imgSuperPixelMask = spgraph.m_labels imgSuperPixels = spgraph.m_nodes numberImgSuperPixels = spgraph.getNumSuperPixels() # create superpixel exclude list & superpixel label array allSPClassLabels = [] for spIdx in range(0, numberImgSuperPixels): superPixelValue = imgSuperPixels[spIdx] #print "\tINFO: Processing superpixel =", superPixelValue , " of" , numberImgSuperPixels, " in image" # Assume superpixel labels are sequence of integers superPixelValueMask = ( imgSuperPixelMask == superPixelValue ) # Boolean array for indexing superpixel-pixels superPixelLabel = assignClassLabelToSuperPixel( superPixelValueMask, imgPixelLabels) allSPClassLabels.append(superPixelLabel) if (superPixelLabel == voidClassLabel): # add to ignore list, increment void count & do not add to superpixel label array superPixelIgnoreList = np.append(superPixelIgnoreList, superPixelValue) numberVoidSuperPixels = numberVoidSuperPixels + 1 else: superPixelLabels = np.append(superPixelLabels, superPixelLabel) assert len(allSPClassLabels) == numberImgSuperPixels (theseClassAdjCounts, adjVoidCount, adjCount) = spgraph.countClassAdjacencies(nbClasses, allSPClassLabels) classAdjCounts += theseClassAdjCounts adjCountsTotal += adjCount adjVoidCountsTotal += adjVoidCount # Now we have the superpixel labels, and an ignore list of void superpixels - time to get the features! imgSuperPixelFeatures = FeatureGenerator.generateSuperPixelFeatures( img, imgSuperPixelMask, excludeSuperPixelList=superPixelIgnoreList) if superPixelFeatures == None: superPixelFeatures = imgSuperPixelFeatures else: # stack the superpixel features into a single list superPixelFeatures = np.vstack( [superPixelFeatures, imgSuperPixelFeatures]) assert np.shape(superPixelFeatures)[0] == np.shape( superPixelFeatures)[0], "Number of samples != number labels" print "\n**Processed total of", numberImages, "images" print " %d out of %d adjacencies were ignored due to void (%.2f %%)" % \ (adjVoidCountsTotal, adjCountsTotal, \ 100.0*adjVoidCountsTotal/adjCountsTotal) # Now return the results return [superPixelFeatures, superPixelLabels, classAdjCounts]
if __name__ == "__main__": # Create network print "*Creating neural net" net = createDefaultNeuralNet() msrcData = "/home/amb/dev/mrf/data/MSRC_ObjCategImageDatabase_v2" print "\n*Creating training dataset" labeledData = createTrainingSupervisedDataSet(msrcData, 0.05, True) print "\n*Training network via backpropogation" trainingResult = trainNetworkBackprop(net, labeledData) net = trainingResult[0] trainer = trainingResult[1] predictImage = pomio.msrc_loadImages(msrcData)[1] print "\n*Read in an image from the MSRC dataset::" , np.shape(predictImage.m_img) imageFeatures = FeatureGenerator.generatePixelFeaturesForImage(predictImage.m_img) print "\n*Using neural net to predict class label::" prediction = predictClass(imageFeatures, net) print prediction
class GBRTModel: def __init__(self, config, db=None, stats=None, models_as_features=None, load_path=None, w2v=None): if type(config) == str or type(config) == unicode: with open(config) as data_file: self._config = json.load(data_file) else: self._config = config self.models_as_features = models_as_features print "GBRT params:", self._config['hyper_patameters'] if load_path is None: self._model = GradientBoostingClassifier( loss=self._config['hyper_patameters']['loss'], learning_rate=self._config['hyper_patameters'] ['learning_rate'], n_estimators=self._config['hyper_patameters']['n_estimators'], max_depth=self._config['hyper_patameters']['max_depth'], max_features=None) else: self.loadModel(load_path) self._feature_generator = \ FeatureGenerator(feature_names=self._config['features']['feature_names'], yamada_embedding_path=ProjectSettings.getPath()[0] + self._config['features']['yamada_embedding_path'], # yamada_id2title_path=ProjectSettings.getPath()[0] + # self._config['features']['yamada_title2id_path'], stats=stats, db=db, models_as_features=self.models_as_features, w2v=w2v) self._train_X = [] self._train_Y = [] self._db = db def getPredictor(self): return PointwisePredict(self) def predict(self, mention, candidates): max_score = -1 max_entity = None ret = dict() for candidate in candidates: # create feature_vec from mention and candidate and predic prob for pointwise predictor feature_vec = self._feature_generator.getFeatureVector( mention, candidate) Y = self._model.predict_proba( np.asarray(feature_vec).reshape(1, -1)) with open('feature_set.txt', 'a') as f: f.write(' -> ' + str(Y[0][1]) + '\n') ret[candidate] = Y[0][1] return ret def train(self, mention, candidate, is_correct): ''' Gathers mention and candidate features into a dataFrame :param mention: :param candidate1: suppose to be None :param candidate2: None :param correct: :return: only builds the _train_df ''' self._train_X.append( self._feature_generator.getFeatureVector(mention, candidate)) self._train_Y.append(1 if is_correct else 0) def is_trainable(self, candidate): return True def finalize(self): ''' trains the model over accumulated _train_df :return: ''' trainX = np.array(self._train_X) trainy = np.array(self._train_Y) print "fitting gbrt model (", len(self._train_Y), "samples)" self._model.fit(trainX, trainy.reshape(trainy.shape[0], )) def saveModel(self, fname): pickle.dump(self._model, open(fname + ".gbrtmodel", "wb")) def loadModel(self, fname): self._model = pickle.load(open(fname + ".gbrtmodel", "rb"))
splitData = pomio.splitInputDataset_msrcData( msrcImages, datasetScale=scale, keepClassDistForTraining=True, trainSplit=0.6, validationSplit=0.2, testSplit=0.2 ) validationDataset = splitData[1] testDataset = splitData[2] if doVal: print "Processing validation data::" validationData = FeatureGenerator.processLabeledImageData(validationDataset, ignoreVoid=True) if doTest: print "Processing test data::" testingData = FeatureGenerator.processLabeledImageData(testDataset, ignoreVoid=True) else: # Just training data splitData = [msrcImages,None,None] # prepare training data trainDataset = splitData[0] trainLabels = None for idx in range(0, np.size(trainDataset)): if trainLabels == None: