def getLabel(self, value): feature = FeatureFactory() feature.createFeature(value, "") dict = {}; dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = self.model.predict(dict) r = max(res.iterkeys(),key=lambda k:res[k]) return r
def getLabel(self, value): feature = FeatureFactory() feature.createFeature(value, "") dict = {} dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = self.model.predict(dict) r = max(res.iterkeys(), key=lambda k: res[k]) return r
def getClass(setting, value): setting = setting.decode("string-escape") #print setting classifier = pickle.loads(setting) feature = FeatureFactory() feature.createFeature(value, "") dict = {} dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = classifier.predict(dict) r = max(res.iterkeys(), key=lambda k: res[k]) return r
def getClass(setting, value): setting = setting.decode("string-escape") #print setting classifier = pickle.loads(setting) feature = FeatureFactory() feature.createFeature(value, "") dict = {}; dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = classifier.predict(dict) r = max(res.iterkeys(), key=lambda k: res[k]) return r
class IDCTClassifier(PartitionClassifierType): def __init__(self): self.path = "./" self.featureFactory = FeatureFactory() def addTrainingData(self, value, label): self.featureFactory.createFeature(value, label) def learnClassifer(self): model = NaiveBayes() dict = {} dict['cases'] = 1 attributes = [] for j in range(len(self.featureFactory.datatable)): dict = {} dict['cases'] = 1 dict['attributes'] = {} line = self.featureFactory.datatable[j] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) dict['label'] = self.featureFactory.classes[j] model.add_instances(dict) model.set_real(attributes) model.train() self.model = model return pickle.dumps(model).encode('string_escape') def getLabel(self, value): feature = FeatureFactory() feature.createFeature(value, "") dict = {} dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = self.model.predict(dict) r = max(res.iterkeys(), key=lambda k: res[k]) return r
class IDCTClassifier(PartitionClassifierType): def __init__(self): self.path = "./" print "building classifier" self.featureFactory = FeatureFactory() def addTrainingData(self, value, label): self.featureFactory.createFeature(value, label) def learnClassifer(self): model = NaiveBayes() dict = {}; dict['cases'] = 1 attributes = [] for j in range(len(self.featureFactory.datatable)): dict = {}; dict['cases'] = 1 dict['attributes'] = {} line = self.featureFactory.datatable[j] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) dict['label'] = self.featureFactory.classes[j] model.add_instances(dict) model.set_real(attributes) model.train() self.model = model return pickle.dumps(model).encode('string_escape') def getLabel(self, value): feature = FeatureFactory() feature.createFeature(value, "") dict = {}; dict['attributes'] = {} attributes = [] line = feature.datatable[0] for i in range(len(line)): dict['attributes'][str(i)] = line[i] attributes.append(str(i)) res = self.model.predict(dict) r = max(res.iterkeys(),key=lambda k:res[k]) return r
def main(argv): # Set default values similarityMeasure = None predictionModel = None numTrials = 0 buildClean = False # Check for 'help' argument if argv[1] == 'help': printArgsHelp() sys.exit(0) # Check for valid arguments if len(argv) < 4: sys.stderr.write('Too few arguments! Try running with the \'help\' argument.\n') sys.exit(1) if argv[1] not in similarityMeasureStrings: sys.err.write('Invalid similarity measure! Try running with the \'help\' argument.\n') sys.exit(1) if argv[2] not in predicitonModelStrings: sys.err.write('Invalid prediction model! Try running with the \'help\' argument.\n') sys.exit(1) try: numTrials = int(argv[3]) except ValueError: sys.err.write('Please provide int value for \'trials\' argument. Try running with the \'help\' argument.\n') sys.exit(1) if int(argv[3]) < 0: sys.err.write('Invalid integer for \'trials\'! Try running with the \'help\' argument.\n') sys.exit(1) if len(argv) > 4 and argv[4] != 'clean': sys.err.write('Invalid value for buildClean! Try running with the \'help\' argument.\n') sys.exit(1) if len(argv) > 4 and argv[4] == 'clean': buildClean = True # Generate Yelp data either # by parsing Jsons or loading from .bins # yelpData currently contains: # a map from user -> friends # a map from business -> users who rated that business # TODO: expand what yelp data contains as necessary for other sim measures if buildClean: yelpData = yelp_json_parser.parseJsons(businessJson='NV_business.json', reviewJson='NV_review.json', userJson='NV_user.json') else: yelpData = yelp_json_parser.loadFromFile() friendshipMap = yelpData[0] businessReviews = yelpData[1] if buildClean: yelpGraph = yelpData[5] degreeCentrality = yelpData[2] closenessCentrality = yelpData[3] betweennessCentrality = yelpData[4] print "Betweenness Centralities" print len(degreeCentrality) # Create appropriate similarity measure (with necessary yelp data) and # either calculate similarities from scratch (buildClean == True) or # load similarities from file (buildClean == False) similarityScores = dict() if argv[1] == 'foverlap': similarityMeasure = FriendshipOverlapSimilarity(friendshipMap) elif argv[1] == 'community': similarityMeasure = CommunitySimilarity(yelpGraph) elif argv[1] == 'commute': similarityMeasure = CommuteTimeSimilarity() elif argv[1] == 'pagerank': similarityMeasure = PageRankSimilarity(friendshipMap) elif argv[1] == 'featureDist': factory = FeatureFactory((degreeCentrality, closenessCentrality, betweennessCentrality)) vectors = factory.getFeatureMatrix() similarityMeasure = FeatureDistanceSimilarity(vectors) elif argv[1] == 'all': similarityMeasure = FriendshipOverlapSimilarity(friendshipMap) similarityMeasure.calculateSimilarities() similarityScores = similarityMeasure.similarities predictionsFOverlap = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor()) similarityMeasure = CommunitySimilarity(yelpGraph) similarityMeasure.calculateSimilarities() similarityScores = similarityMeasure.similarities predictionsCommunity = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor()) similarityMeasure = PageRankSimilarity(friendshipMap) similarityMeasure.calculateSimilarities() similarityScores = similarityMeasure.similarities predictionsPageRank = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor()) factory = FeatureFactory((degreeCentrality, closenessCentrality, betweennessCentrality)) vectors = factory.getFeatureMatrix() similarityMeasure = FeatureDistanceSimilarity(vectors) similarityMeasure.calculateSimilarities() similarityScores = similarityMeasure.similarities predictionsFeatureDist = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor()) predictions = RegressorUtil.averagePredictions((predictionsFOverlap,predictionsCommunity,predictionsPageRank,predictionsFeatureDist)) RegressorUtil.evaluateRegressor(predictions, 'All', 'All') sys.exit(0) if buildClean: similarityMeasure.calculateSimilarities() else: similarityMeasure.loadFromFile() similarityScores = similarityMeasure.similarities print len(similarityScores) # Create appropriate prediction model and # generate list of predictions if argv[2] == 'baseline': predictionModel = RandomRegressor(1,5) elif argv[2] == 'knn': predictionModel = KNNRegressor() # Once similarities are calculated, the true ratings are parsed, # and the prediction model is chosen, we then run our regression model # to generate our predictions for each business-user pair predictions = RegressorUtil.runRegressor(similarityScores, businessReviews, predictionModel) # Once all the predictions have been calculated, we evaluate the accuracy of # our system and report error statistics RegressorUtil.evaluateRegressor(predictions, predictionModel.nameLabel, similarityMeasure.nameLabel)
def __init__(self): self.path = "./" self.featureFactory = FeatureFactory()
def __init__(self): self.path = "./" print "building classifier" self.featureFactory = FeatureFactory()