def getUserBookTags(self): results = self.service.GetUserBookTags(self.api.url) if results: book_tags, count = results else: return False if not book_tags: return False user = None users = User.objects(uid=self.api.api_id) if users: user = users[0] else: return False try: user_book_related = None user_book_related, created = UserBookRelated.objects.get_or_create( uid=user.uid, auto_save=False, defaults={'uid':user.uid, 'book_tags':book_tags}) if not created: user_book_related.book_tags.extend(book_tags) try: user_book_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, \ error.to_dict(), user_book_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: UserBookRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id))
def getUser(self): user = self.service.GetPeople(self.api.url) if user: exist_user = User.objects(uid=user.uid) if not exist_user: try: user.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s at uid:%s" %(error, error.to_dict(), user.uid)) return False else: return exist_user[0]
def getProfile(self): user_api = Api(url='http://api.douban.com/people/'+self.uid, api_id=self.uid, api_type='USER') user = UserProcessor(user_api, 'default').getUser() logger.debug('%s'%(user)) if user: profile = {} profile['uid'] = user.uid profile['nickname'] = user.nickname profile['signature'] = user.signature profile['image_link'] = user.image_link profile['website'] = user.link profile['introduction'] = user.content return profile else: return None
def getBook(self, oldbook=None): book = self.service.GetBook(self.api.url) if book: if oldbook: oldbook.summary = book.summary oldbook.common_tags = book.common_tags oldbook.rating = book.rating try: oldbook.save() return oldbook except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s at bid:%s" %(error, error.to_dict(), oldbook.bid)) try: book.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s at bid:%s" %(error, error.to_dict(), book.bid))
def getUserContacts(self, recursion=False): results = self.service.GetContacts(self.api.url) if results: users, count = results else: return False new_users = [] new_users_data = [] indexs = [] for index, user in enumerate(users): exist_user = User.objects(uid=user.uid) if exist_user: users[index] = exist_user[0] else: new_users.append(user) new_user = user.__dict__.get('_data') for key, value in new_user.items(): if value is None: new_user.pop(key) new_user.pop('image') new_users_data.append(new_user) indexs.append(index) if new_users: ids = self.db.user.insert(new_users_data) for i, index in enumerate(indexs): users[index].id = ids[i] try: uid = self.api.api_id user_related = None user_related, created = UserRelated.objects.get_or_create( uid=uid, auto_save=False, defaults = {'uid':uid, 'contacts':users}) if not created: user_related.contacts.extend(users) try: user_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" \ %(error, error.to_dict(), user_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: UserRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id))
def getUserFriends(self, recursion=False): users = self.service.GetFriends(self.api.url) if results: users, count = results else: return False new_users = [] for index, user in enumerate(users): exist_user = User.objects(uid=user.uid) if exist_user: users[index] = exist_user[0] else: new_users.append(user) try: user.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save uid:%s at api:%s" \ %(error, error.to_dict(), user.uid, self.api.url))
def Get(self, uri, converter, extra_headers={}, *args, **kwargs): auth_header = self.client.get_auth_header('GET', uri) if auth_header: extra_headers.update(auth_header) elif self.api_key: param = 'apikey=' + self.api_key if '?' in uri: uri += '&' + param else: uri += '?' + param #print 'extra_headers:', extra_headers #TODO remove! request = Request(uri) if extra_headers: for key, value in extra_headers.items(): request.add_header(key, value) try: response = urlopen(request) return converter(response.read()) except URLError, e: if hasattr(e, 'reason'): logger.debug('[ERROR]Failed to reach a server. Reason: %s' %(e.reason)) elif hasattr(e, 'code'): logger.debug('[ERROR]The server couldn\'t fulfill the request. Error code: %s' %(e.code)) logger.debug('-------Error Url: ' + uri) return None
def processData(op): stop = False while(not stop): data_thread = DataProcessThread(op) data_thread.start() time.sleep(1) logger.debug("Now at:" + str(DataProcessThread.index)) if op == 'FIX_BOOK_COLLECTION' and \ DataProcessThread.index >= DataProcessThread.book_collections_size: stop = True elif op == 'book_related' and \ DataProcessThread.index >= DataProcessThread.book_related_size: stop = True elif op == 'FIX_BOOK_REVIEW' and \ DataProcessThread.index >= DataProcessThread.book_review_size: stop = True elif op == 'FIX_NOTE' and \ DataProcessThread.index >= DataProcessThread.note_size: stop = True elif op == 'FIX_RECOMMENDATION' and \ DataProcessThread.index >= DataProcessThread.recommendation_size: stop = True
def getBookTags(self): results = self.service.GetBookTags(self.api.url) if results: book_tags, count = results else: return False book = None books = Book.objects(bid=self.api.api_id) if books: book = books[0] else: return False if not book_tags: return False else: db = MongoDB.getConnection('mining') for tag in book_tags: db.book_tag.update({'title':tag.title}, {'$addToSet':{'books':book.bid}, '$inc':{'count':tag.count}}, upsert=True) try: book_related = None book_related, created = BookRelated.objects.get_or_create(\ bid=book.bid, auto_save=False, defaults=\ {'bid':book.bid, 'book':book, 'tags':book_tags}) if not created: book_related.tags.extend(book_tags) try: book_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, \ error.to_dict(), book_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: BookRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id))
def getUserRecommendations(self): results = self.service.GetUserRecommendations(self.api.url) if results: recommendations, count, user = results else: return False if not recommendations or not user: return False recommendation_datas = [] for recommendation in recommendations: recommendation_data = recommendation.__dict__.get('_data') for key, value in recommendation_data.items(): if value is None: recommendation_data.pop(key) recommendation_data['author'] = DBRef('user', user.id) recommendation_datas.append(recommendation_data) if recommendation_datas: ids = self.db.recommendation.insert(recommendation_datas, check_keys=False) for index, object_id in enumerate(ids): recommendations[index].id = object_id try: user_related = None user_related, created = UserRelated.objects.get_or_create( uid=user.uid, auto_save=False, defaults={'uid':user.uid, 'recommendations':recommendations}) if not created: user_related.recommendations.extend(recommendations) try: user_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" \ %(error, error.to_dict(), user_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: UserRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id))
def getBookCollections(self): results = self.service.GetBookCollections(self.api.url) if results: collections, count, user = results else: return False if not user or not collections: return False collection_datas = [] for collection in collections: collection_data = collection.__dict__.get('_data') for key, value in collection_data.items(): if value is None: collection_data.pop(key) collection_data['author'] = DBRef('user', user.id) if collection.book: #添加该书到book_tag集合 db = MongoDB.getConnection('mining') for tag in collection.tags: db.book_tag.update({'title':tag}, {'$addToSet':{'books':collection.book.bid}, '$inc':{'count':1}}, upsert=True) collection_data['book'] = DBRef('book', collection.book.id) collection_datas.append(collection_data) if collection_datas: ids = self.db.book_collection.insert(collection_datas, check_keys=False) for index, object_id in enumerate(ids): collections[index].id = object_id try: user_book_related = None user_book_related, created = UserBookRelated.objects.get_or_create(\ uid=self.api.api_id, auto_save=False, defaults=\ {'uid':self.api.api_id, 'book_collections':collections}) if not created: user_book_related.book_collections.extend(collections) try: user_book_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, \ error.to_dict(), user_book_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: UserBookRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id)) if user_book_related and created: try: user.user_book_related = user_book_related user.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" \ %(error, error.to_dict(), user, self.api.url))
def getBookReviews(self): results = self.service.GetBookReviews(self.api.url) if results: book_reviews, count, book = results else: return False if not book or not book_reviews: return False book_review_datas = [] indexs = [] for index, book_review in enumerate(book_reviews): exist = BookReview.objects(rid=book_review.rid) if exist: book_reviews[index] = exist[0] else: book_review_data = book_review.__dict__.get('_data') for key, value in book_review_data.items(): if value is None: book_review_data.pop(key) if book_review.author: book_review_data['author'] = DBRef('user', book_review.author.id) book_review_data['book'] = DBRef('book', book.id) book_review_datas.append(book_review_data) indexs.append(index) if book_review_datas: ids = self.db.book_review.insert(book_review_datas, check_keys=False) for i, index in enumerate(indexs): book_reviews[index].id = ids[i] try: book_related = None book_related, created = BookRelated.objects.get_or_create(\ bid=book.bid, auto_save=False, defaults=\ {'bid':book.bid, 'reviews':book_reviews}) if not created: book_related.reviews.extend(book_reviews) try: book_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, \ error.to_dict(), book_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: BookRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id)) if book_related and created: try: book.book_related = book_related book.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" \ %(error, error.to_dict(), book, self.api.url))
'$inc':{'count':tag.count}}, upsert=True) try: book_related = None book_related, created = BookRelated.objects.get_or_create(\ bid=book.bid, auto_save=False, defaults=\ {'bid':book.bid, 'book':book, 'tags':book_tags}) if not created: book_related.tags.extend(book_tags) try: book_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, \ error.to_dict(), book_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: BookRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id)) if book_related and created: try: book.book_related = book_related book.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" %(error, error.to_dict(), book, self.api.url)) #logger.debug('[%s]Api:%s is processed!' %(datetime.now(), self.api.url)) return book_tags
logger.debug("[ERROR]%s: %s when save uid:%s at api:%s" \ %(error, error.to_dict(), user.uid, self.api.url)) try: uid = self.api.api_id user_related = None user_related, created = UserRelated.objects.get_or_create( uid=uid, auto_save=False, defaults = {'uid':uid, 'friends':users}) if not created: user_related.friends.extend(users) try: user_related.save() except mongoengine.base.ValidationError, error: logger.debug("[ERROR]%s: %s when save %s at api:%s" \ %(error, error.to_dict(), user_related, self.api.url)) except Exception, error: logger.debug("[ERROR]%s: UserRelated.objects.get_or_create(uid=%s ..."\ %(error, self.api.api_id)) if count: start_index = int(count.get('startIndex')) total_results = int(count.get('totalResults')) items_per_page = int(count.get('itemsPerPage')) if start_index + items_per_page < total_results: user_contacts_api = self.api_operation.apiGenerator(\ 'USER_CONTACTS', self.api.api_id,\ start_index=start_index+items_per_page) api = Api(url=user_contacts_api, api_id=self.api.api_id,\ api_type='USER_CONTACTS') api.save()