def parseAndCreateObjects(inputFileName): parentUserIdToUserDict = dict() parentBusinessIdToBusinessDict = dict() parent_reviews = dict() isBusinessAlreadyPresent = False with open(inputFileName) as f: for line in f: if re.match('^B=', line): exec(line) #print 'B = ', B isBusinessAlreadyPresent = False if B[0] in parentBusinessIdToBusinessDict: #business_already_present i am skipping isBusinessAlreadyPresent=True bnss = business(B[0],B[1],B[2],B[4]) parentBusinessIdToBusinessDict[bnss.getId()] = bnss elif re.match('^R=', line): exec(line) if isBusinessAlreadyPresent: #business_already_present i am skipping continue #print 'R = ', R for recoRev in R: (username, imgSrc, userLocation, userFriendCount, userReviewCount) = recoRev[1] usrId = (username, imgSrc, userLocation) #usrId = (username, imgSrc, userLocation, userFriendCount, userReviewCount) usr = user(usrId, recoRev[2]) dictUsr = parentUserIdToUserDict.get(usr.getId()) if not dictUsr: parentUserIdToUserDict[usr.getId()] = usr dictUsr = usr revw = review(recoRev[0], dictUsr.getId(), bnss.getId(), recoRev[3],recoRev[4], '', True) revwKey = (revw.getUserId(),revw.getBusinessID()) if revwKey in parent_reviews: continue parent_reviews[revwKey] = revw elif re.match('^NR=', line): exec(line) if isBusinessAlreadyPresent: #business_already_present i am skipping continue #print 'NR = ', NR for noRecoRev in NR: (username, imgSrc, userLocation, userFriendCount, userReviewCount) = noRecoRev[1] usrId = (username, imgSrc, userLocation) #usrId = (username, imgSrc, userLocation, userFriendCount, userReviewCount) usr = user(usrId, noRecoRev[2]) dictUsr = parentUserIdToUserDict.get(usr.getId()) if not dictUsr: parentUserIdToUserDict[usr.getId()] = usr dictUsr = usr revw = review(noRecoRev[0], dictUsr.getId(), bnss.getId(), noRecoRev[3], noRecoRev[4], '', False) revwKey = (revw.getUserId(),revw.getBusinessID()) if revwKey in parent_reviews: continue parent_reviews[revwKey] = revw return (parentUserIdToUserDict,parentBusinessIdToBusinessDict,parent_reviews)
def readDataForBnss(self, inputDirName, fileName): content = 'data=' with open(join(inputDirName, fileName), mode='r') as f: data = dict() content = content+f.readline() exec(content) bnssName = data[BNSS_NAME] bnssAddress = data[ADDRESS] bnssUrl = data[URL] bnssId = (bnssUrl, bnssAddress) if bnssId not in self.bnssIdToBnssDict: bnss = business(bnssId, bnssName, url=bnssUrl) self.bnssIdToBnssDict[bnss.getId()] = bnss nrReviews = data[NOT_RECOMMENDED] rReviews = data[RECOMMENDED] #print bnssName, len(rReviews), len(nrReviews) for r in rReviews: review_rating = r[RATING] review_text = r[REVIEW_TEXT] review_date = r[REVIEW_DATE].split('Updated review')[0] usr_location = r[USR_LOCATION] usr_name = r[NAME] usr_review_count = r[REVIEW_COUNT] usr_friend_count = r[FRIEND_COUNT] if not usr_name or usr_name=='': print "Continue" continue usrId = r['usrId'] if usrId in self.usrIdToUsrDict: usr = self.usrIdToUsrDict[usrId] else: usrExtra = (usr_location, usr_review_count, usr_friend_count) usr = user(usrId, usr_name, usrExtra) self.usrIdToUsrDict[usr.getId()] = usr review_id = (usr.getId(), bnss.getId()) revw = review(review_id, usr.getId(), bnss.getId(), float(review_rating), review_date, review_text, True) self.reviewIdToReviewDict[revw.getId()] = revw for nr in nrReviews: review_rating = nr[RATING] review_text = nr[REVIEW_TEXT] review_date = nr[REVIEW_DATE].split('Updated review')[0] usr_location = nr[USR_LOCATION] usr_name = nr[NAME] usr_review_count = nr[REVIEW_COUNT] usr_friend_count = nr[FRIEND_COUNT] if not usr_name or usr_name=='': print "Continue" continue usrId = nr['usrId'] if usrId in self.usrIdToUsrDict: usr = self.usrIdToUsrDict[usrId] else: usrExtra = (usr_location, usr_review_count, usr_friend_count) usr = user(usrId, usr_name, usrExtra) self.usrIdToUsrDict[usr.getId()] = usr review_id = (usr.getId(), bnss.getId()) revw = review(review_id, usr.getId(), bnss.getId(), float(review_rating), review_date, review_text, False) self.reviewIdToReviewDict[revw.getId()] = revw