def getLabel(self, value):
        feature = FeatureFactory()
        feature.createFeature(value, "")
        dict = {};
        dict['attributes'] = {}
        attributes = []
        line = feature.datatable[0]
        for i in range(len(line)):
            dict['attributes'][str(i)] = line[i]
            attributes.append(str(i))
        res = self.model.predict(dict)
	r = max(res.iterkeys(),key=lambda k:res[k])
	return r
Exemple #2
0
 def getLabel(self, value):
     feature = FeatureFactory()
     feature.createFeature(value, "")
     dict = {}
     dict['attributes'] = {}
     attributes = []
     line = feature.datatable[0]
     for i in range(len(line)):
         dict['attributes'][str(i)] = line[i]
         attributes.append(str(i))
     res = self.model.predict(dict)
     r = max(res.iterkeys(), key=lambda k: res[k])
     return r
Exemple #3
0
def getClass(setting, value):
    setting = setting.decode("string-escape")
    #print setting
    classifier = pickle.loads(setting)
    feature = FeatureFactory()
    feature.createFeature(value, "")
    dict = {}
    dict['attributes'] = {}
    attributes = []
    line = feature.datatable[0]
    for i in range(len(line)):
        dict['attributes'][str(i)] = line[i]
        attributes.append(str(i))
    res = classifier.predict(dict)
    r = max(res.iterkeys(), key=lambda k: res[k])
    return r
def getClass(setting, value):
    setting = setting.decode("string-escape")
    #print setting
    classifier = pickle.loads(setting)
    feature = FeatureFactory()
    feature.createFeature(value, "")
    dict = {};
    dict['attributes'] = {}
    attributes = []
    line = feature.datatable[0]
    for i in range(len(line)):
        dict['attributes'][str(i)] = line[i]
        attributes.append(str(i))
    res = classifier.predict(dict)
    r = max(res.iterkeys(), key=lambda k: res[k])     
    return r        
Exemple #5
0
class IDCTClassifier(PartitionClassifierType):
    def __init__(self):
        self.path = "./"
        self.featureFactory = FeatureFactory()

    def addTrainingData(self, value, label):
        self.featureFactory.createFeature(value, label)

    def learnClassifer(self):
        model = NaiveBayes()
        dict = {}
        dict['cases'] = 1
        attributes = []
        for j in range(len(self.featureFactory.datatable)):
            dict = {}
            dict['cases'] = 1
            dict['attributes'] = {}
            line = self.featureFactory.datatable[j]
            for i in range(len(line)):
                dict['attributes'][str(i)] = line[i]
                attributes.append(str(i))
            dict['label'] = self.featureFactory.classes[j]
            model.add_instances(dict)
        model.set_real(attributes)
        model.train()
        self.model = model
        return pickle.dumps(model).encode('string_escape')

    def getLabel(self, value):
        feature = FeatureFactory()
        feature.createFeature(value, "")
        dict = {}
        dict['attributes'] = {}
        attributes = []
        line = feature.datatable[0]
        for i in range(len(line)):
            dict['attributes'][str(i)] = line[i]
            attributes.append(str(i))
        res = self.model.predict(dict)
        r = max(res.iterkeys(), key=lambda k: res[k])
        return r
class IDCTClassifier(PartitionClassifierType):
    def __init__(self):
        self.path = "./"
        print "building classifier"
        self.featureFactory = FeatureFactory()
    def addTrainingData(self, value, label):
        self.featureFactory.createFeature(value, label)
    def learnClassifer(self):
       model = NaiveBayes()
       dict = {};
       dict['cases'] = 1
       attributes = []
       for j in range(len(self.featureFactory.datatable)):
           dict = {};
           dict['cases'] = 1
           dict['attributes'] = {}
           line = self.featureFactory.datatable[j]
           for i in range(len(line)):
               dict['attributes'][str(i)] = line[i]
               attributes.append(str(i))
           dict['label'] = self.featureFactory.classes[j]
           model.add_instances(dict)
       model.set_real(attributes)
       model.train()
       self.model = model
       return pickle.dumps(model).encode('string_escape')
       
    def getLabel(self, value):
        feature = FeatureFactory()
        feature.createFeature(value, "")
        dict = {};
        dict['attributes'] = {}
        attributes = []
        line = feature.datatable[0]
        for i in range(len(line)):
            dict['attributes'][str(i)] = line[i]
            attributes.append(str(i))
        res = self.model.predict(dict)
	r = max(res.iterkeys(),key=lambda k:res[k])
	return r
def main(argv):
	# Set default values
	similarityMeasure = None
	predictionModel = None
	numTrials = 0
	buildClean = False

	# Check for 'help' argument
	if argv[1] == 'help':
		printArgsHelp()
		sys.exit(0)

	# Check for valid arguments
	if len(argv) < 4:
		sys.stderr.write('Too few arguments! Try running with the \'help\' argument.\n')
		sys.exit(1)
	if argv[1] not in similarityMeasureStrings:
		sys.err.write('Invalid similarity measure! Try running with the \'help\' argument.\n')
		sys.exit(1)
	if argv[2] not in predicitonModelStrings:
		sys.err.write('Invalid prediction model! Try running with the \'help\' argument.\n')
		sys.exit(1)
	try:
		numTrials = int(argv[3])
	except ValueError:
		sys.err.write('Please provide int value for \'trials\' argument. Try running with the \'help\' argument.\n')
		sys.exit(1)
	if int(argv[3]) < 0:
		sys.err.write('Invalid integer for \'trials\'! Try running with the \'help\' argument.\n')
		sys.exit(1)
	if len(argv) > 4 and argv[4] != 'clean':
		sys.err.write('Invalid value for buildClean! Try running with the \'help\' argument.\n')
		sys.exit(1)

	if len(argv) > 4 and argv[4] == 'clean':
		buildClean = True

	# Generate Yelp data either
	# by parsing Jsons or loading from .bins

	# yelpData currently contains:
	# a map from user -> friends
	# a map from business -> users who rated that business
	# TODO: expand what yelp data contains as necessary for other sim measures
	if buildClean:
		yelpData = yelp_json_parser.parseJsons(businessJson='NV_business.json', reviewJson='NV_review.json', userJson='NV_user.json')
	else:
		yelpData = yelp_json_parser.loadFromFile()

	friendshipMap = yelpData[0]
	businessReviews = yelpData[1]
	if buildClean:
			yelpGraph = yelpData[5]
	degreeCentrality = yelpData[2]
	closenessCentrality = yelpData[3]
	betweennessCentrality = yelpData[4]

	print "Betweenness Centralities"
	print len(degreeCentrality)

	# Create appropriate similarity measure (with necessary yelp data) and
	# either calculate similarities from scratch (buildClean == True) or
	# load similarities from file (buildClean == False)
	similarityScores = dict()

	if argv[1] == 'foverlap':
		similarityMeasure = FriendshipOverlapSimilarity(friendshipMap)
	elif argv[1] == 'community':
		similarityMeasure = CommunitySimilarity(yelpGraph)
	elif argv[1] == 'commute':
		similarityMeasure = CommuteTimeSimilarity()
	elif argv[1] == 'pagerank':
		similarityMeasure = PageRankSimilarity(friendshipMap)
	elif argv[1] == 'featureDist':
		factory = FeatureFactory((degreeCentrality, closenessCentrality, betweennessCentrality))
		vectors = factory.getFeatureMatrix()
		similarityMeasure = FeatureDistanceSimilarity(vectors)
	elif argv[1] == 'all':
		similarityMeasure = FriendshipOverlapSimilarity(friendshipMap)
		similarityMeasure.calculateSimilarities()
		similarityScores = similarityMeasure.similarities
		predictionsFOverlap = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor())

		similarityMeasure = CommunitySimilarity(yelpGraph)
		similarityMeasure.calculateSimilarities()
		similarityScores = similarityMeasure.similarities
		predictionsCommunity = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor())

		similarityMeasure = PageRankSimilarity(friendshipMap)
		similarityMeasure.calculateSimilarities()
		similarityScores = similarityMeasure.similarities
		predictionsPageRank = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor())

		factory = FeatureFactory((degreeCentrality, closenessCentrality, betweennessCentrality))
		vectors = factory.getFeatureMatrix()
		similarityMeasure = FeatureDistanceSimilarity(vectors)
		similarityMeasure.calculateSimilarities()
		similarityScores = similarityMeasure.similarities
		predictionsFeatureDist = RegressorUtil.runRegressor(similarityScores, businessReviews, KNNRegressor())

		predictions = RegressorUtil.averagePredictions((predictionsFOverlap,predictionsCommunity,predictionsPageRank,predictionsFeatureDist))
		RegressorUtil.evaluateRegressor(predictions, 'All', 'All')
		sys.exit(0)



	
	if buildClean:
		similarityMeasure.calculateSimilarities()
	else:
		similarityMeasure.loadFromFile()
	
	similarityScores = similarityMeasure.similarities

	print len(similarityScores)

	# Create appropriate prediction model and
	# generate list of predictions
	if argv[2] == 'baseline':
		predictionModel = RandomRegressor(1,5)
	elif argv[2] == 'knn':
		predictionModel = KNNRegressor()

	# Once similarities are calculated, the true ratings are parsed,
	# and the prediction model is chosen, we then run our regression model
	# to generate our predictions for each business-user pair
	predictions = RegressorUtil.runRegressor(similarityScores, businessReviews, predictionModel)

	# Once all the predictions have been calculated, we evaluate the accuracy of
	# our system and report error statistics
	RegressorUtil.evaluateRegressor(predictions, predictionModel.nameLabel, similarityMeasure.nameLabel)
 def __init__(self):
     self.path = "./"
     self.featureFactory = FeatureFactory()
Exemple #9
0
 def __init__(self):
     self.path = "./"
     self.featureFactory = FeatureFactory()
 def __init__(self):
     self.path = "./"
     print "building classifier"
     self.featureFactory = FeatureFactory()