def get_about(self, username): # facebook_scraper = pickle.load(open( "facebook_scraper.pickle", "rb" )) try: result = facebook_scraper.get_about(username) user = FacebookUser.query.filter_by(username=username).first() if not user: user = FacebookUser() convert_result(user, result) user.created_at = datetime.now() db.session.add(user) transact_type = 'create' else: convert_result(user, result) transact_type = 'update' user.updated_at = datetime.now() except Exception as e: transaction = Transaction( timestamp = datetime.utcnow(), transact_type = 'error', func = 'get_about(%s)' % username, ref = "%s: %s" % ( str(e.errno) if hasattr(e, 'errno') else 0, e.strerror if hasattr(e, 'strerror') else e ) ) if 'result' in locals(): transaction.data = str(result) transaction.ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid)) db.session.add(transaction) db.session.commit() return ## Scrape Transaction transaction = Transaction( timestamp = datetime.utcnow(), transact_type = transact_type, ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid)), func = 'get_about(%s)' % username, data = str(result) ) db.session.add(transaction) db.session.commit() return result
def get_fans(self, pagename): max_retries = 3 for retries in xrange(max_retries + 1): try: page = FacebookPage.query.filter_by(username=pagename).first() break except (DatabaseError, OperationalError): if retries + 1 > max_retries: raise if not page: raise Exception("put the page in the db first plz") results = [] try: for result in facebook_scraper.get_fans(pagename): try: user = FacebookUser.query.filter_by(username=result.username).first() if not user: user = FacebookUser() convert_result(user, result) user.created_at = datetime.now() db.session.add(user) transact_type = 'create' else: convert_result(user, result) transact_type = 'update' # logger.debug() # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)}) except Exception as e: transaction = Transaction( timestamp = datetime.utcnow(), transact_type = 'error', func = 'get_likes(%s)' % pagename, ref = "%s: %s" % ( str(e.errno) if hasattr(e, 'errno') else 0, e.strerror if hasattr(e, 'strerror') else e ) ) print transaction.__dict__ if 'result' in locals(): transaction.data = str(result) # logger.debug() # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)}) db.session.add(transaction) db.session.commit() continue user.updated_at = datetime.now() user.pages.append(page) ## Scrape Transaction transaction = Transaction( timestamp = datetime.utcnow(), transact_type = transact_type, ref = "%s.%s" % (FacebookUser.__tablename__, str(result.uid)), func = 'get_likes(%s)' % pagename, data = str(result) ) db.session.add(transaction) db.session.commit() results.append(result) logger.info( "%s - %i - %s" % (pagename, len(results), result.username)) except (ScrapingError, ValueError) as e: logger.info(e) raise return results
def get_likes(self, username): # TODO: move this to FacebookUser model # # from sqlalchemy.exc import DatabaseError, OperationalError # # retries method on DatabaseError or OperationalError # def retry(fun): # @wraps(fun) # def _inner(*args, **kwargs): # max_retries = kwargs.pop('max_retries', 3) # for retries in xrange(max_retries + 1): # try: # return fun(*args, **kwargs) # except (DatabaseError, OperationalError): # if retries + 1 > max_retries: # raise # return _inner # # @retry # def get(username_or_uid): # if type(username_or_uid) == int: # return FacebookUser.query.get(username_or_uid) # elif type(username_or_uid) == str: # return FacebookUser.query.filter_by(username=username).first() # else: # raise max_retries = 3 for retries in xrange(max_retries + 1): try: user = FacebookUser.query.filter_by(username=username).first() break except (DatabaseError, OperationalError): if retries + 1 > max_retries: raise if not user: raise Exception("scrape the dude's about information first plz") user.scrape_status = 1 results = [] try: for result in facebook_scraper.get_likes(username): try: page = FacebookPage.query.filter_by(username=result.username).first() if not page: page = FacebookPage() convert_result(page, result) page.created_at = datetime.now() db.session.add(page) transact_type = 'create' else: convert_result(page, result) transact_type = 'update' # logger.debug() # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)}) except Exception as e: transaction = Transaction( timestamp = datetime.utcnow(), transact_type = 'error', func = 'get_likes(%s)' % username, ref = "%s: %s" % ( str(e.errno) if hasattr(e, 'errno') else 0, e.strerror if hasattr(e, 'strerror') else e ) ) if 'result' in locals(): transaction.data = str(result) # logger.debug() # self.update_state(state='PROGRESS', meta={'transact_type': transact_type, 'current_result': result.username, 'current_total': len(results)}) db.session.add(transaction) db.session.commit() continue page.updated_at = datetime.now() page.users.append(user) ## Scrape Transaction transaction = Transaction( timestamp = datetime.utcnow(), transact_type = transact_type, ref = "%s.%s" % (FacebookPage.__tablename__, str(result.page_id)), func = 'get_likes(%s)' % username, data = str(result) ) db.session.add(transaction) db.session.commit() results.append(result) logger.info( "%s - %i - %s" % (username, len(results), result.username)) except ScrapingError as e: user.scrape_status = 0 if user.scrape_status == 1: user.scrape_status = 2 db.session.commit() return results