def getWeightHistory(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         diet, startWeight, goalWeight, weightHistory = None, None, None, None
         try:
             if user['serverId'] is not None:
                 weightHistoryURL = self.getURL(user, 1)
                 page = self.br.open(weightHistoryURL)
                 soup = BeautifulSoup(page.read())
                 tag = soup.find('b')
                 diet = tag.contents[1].text
                 tag = soup.find(attrs={'style' : 'padding:0px 10px'})
                 startWeight = float(tag.contents[1].split(': ')[1].split()[0])
                 goalWeight = float(tag.contents[0].text.split(': ')[1].split()[0])
                 weightList, dateList = [], []
                 for tag in soup.findAll(attrs={'class' : 'borderBottom date'}):
                     dateList.append(parser.parse(tag.text))
                 for tag in soup.findAll(attrs={'class' : 'borderBottom weight'}):
                     weightList.append(float(tag.text.split()[0]))
                 weightHistory = zip(dateList, weightList)
                 weightHistory = sorted(weightHistory, key= lambda record : record[0])
         except Exception as e:
             logException(user['id'], self.getWeightHistory.__name__, e)
         finally:
             self.db.updateWeightHistory(user['id'], diet, startWeight, goalWeight, weightHistory)
 def getBuddy(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         buddyIdList = []
         try:
             if user['serverId'] is not None:
                 buddyURL = self.getURL(user, 5)
                 while True:
                     page = self.br.open(buddyURL)
                     soup = BeautifulSoup(page.read())
                     results = soup.findAll('a', attrs={'class' : 'member', 'onmouseout' : 'hideTip()'})
                     for tag in results:
                         if tag.text != '':
                             buddyName = tag.text.strip()
                             buddy = self.db.addNewUser(buddyName)
                             buddyIdList.append(buddy['id'])
                             if 'serverId' not in buddy:
                                 self.getServerId(buddy['id'])
                     result = soup.find('span', attrs={'class' : 'next'})
                     if result is None:
                         break
                     else:
                         buddyURL = 'http://fatsecret.com/' + result.contents[0].attrs['href']
         except Exception as e:
             logException(user['id'], self.getBuddy.__name__, e)
         finally:
             self.db.addBuddyInUser(user['id'], buddyIdList)
 def run(self):
     try:
         logInfo(self.userId, 'start crawling')
         de = DataExtractor()
         de.getServerId(self.userId)
         de.getWeightHistory(self.userId)
         de.getDietHistory(self.userId)
         de.getGroup(self.userId)
         de.getChallenge(self.userId)
         de.getBuddy(self.userId)
         logInfo(self.userId, 'Done crawling')
     except Exception as e:
         logException(self.userId, self.run.__name__, e)
 def getDietHistory(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         dietHistory = None
         try:
             if user['serverId'] is not None:
                 dietHistoryURL = self.getURL(user, 2)
                 page = self.br.open(dietHistoryURL)
                 soup = BeautifulSoup(page.read())
                 months = soup.findAll('td', attrs={'colspan' : '6', 'class' : 'borderBottom'})
                 monthList = []
                 if months == []:
                     raise Exception('no diet history records')
                 for month in months:
                     monthList.append(datetime.strptime(month.text, '%B %Y'))
                 rows = soup.findAll('tr', attrs={'valign' : 'middle'})
                 prevDay = 32
                 monthIndex = 0
                 dietHistory = []
                 for row in rows:
                     try:
                         if len(row.contents) != 13:
                             continue
                         day = int(re.sub('[^0-9]', '', row.contents[1].text))
                         if day >= prevDay:
                             monthIndex += 1 
                         prevDay = day
                         date = datetime(monthList[monthIndex].year, monthList[monthIndex].month, day)
                         food = self.getIntFromRawString(row.contents[3].text)
                         RDI = self.getDecimalFromPercentageString(row.contents[5].text)
                         fat, protein, carbs = self.getDataFromNutrionalSummary(row.contents[7].text)
                         exercise = self.getIntFromRawString(row.contents[9].text)
                         net = self.getIntFromRawString(row.contents[11].text)
                         dietHistory.append((date, food, RDI, fat, protein, carbs, exercise, net))
                     except Exception as e:
                         logException(user['id'], self.getDietHistory.__name__, e, 'scrape row error')
                 if 'dietHistory' in user and user['dietHistory'] is not None:
                     dietHistory = self.mergeDietTrack(user['dietHistory'], dietHistory)
                 else:
                     dietHistory.sort(key=lambda item : item[0])
         except Exception as e:
             logException(user['id'], self.getDietHistory.__name__, e)
         finally:
             self.db.updateDietHistory(user['id'], dietHistory)
 def getChallenge(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         challengeIdList = []
         try:
             if user['serverId'] is not None:
                 challengeURL = self.getURL(user, 4)
                 page = self.br.open(challengeURL)
                 soup = BeautifulSoup(page.read())
                 results = soup.findAll('td', attrs={'width' : '50', 'align' : 'center'})
                 for tag in results:
                     challengeName = tag.contents[1].attrs['title']
                     challenge = self.db.addNewChallenge(challengeName)
                     self.db.addUserInChallenge(user['id'], challenge['id'])
                     challengeIdList.append(challenge['id'])
         except Exception as e:
             logException(user['id'], self.getChallenge.__name__, e)
         finally:
             self.db.addChallengeInUser(user['id'], challengeIdList)
 def getGroup(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         groupIdList = []
         try:
             if user['serverId'] is not None:
                 groupURL = self.getURL(user, 3)
                 page = self.br.open(groupURL)
                 soup = BeautifulSoup(page.read())
                 results = soup.findAll('td', attrs={'width' : '50', 'align' : 'center'})
                 for tag in results:
                     groupName =  tag.contents[1].attrs['title']
                     group = self.db.addNewGroup(groupName)
                     self.db.addUserInGroup(user['id'], group['id'])
                     groupIdList.append(group['id'])
         except Exception as e:
             logException(user['id'],self.getGroup. __name__, e)
         finally:
             self.db.addGroupInUser(user['id'], groupIdList)
 def getServerId(self, userId=None):
     users = self.convertUserIdToUserList(userId)
     for user in users:
         if 'serverId' in user and user['serverId'] is not None:
             continue
         serverId = None
         try:
             memberURL = self.getURL(user, 0)
             page = self.br.open(memberURL)
             soup = BeautifulSoup(page.read())
             result = soup.find('div', attrs={'align' : 'right', 'class' : 'smallText', 'style' : 'padding-top:5px'})
             if result is not None:
                 for tag in result.contents:
                     if isinstance(tag, element.Tag) and 'href' in tag.attrs and tag.attrs['href'].find('id') != -1:
                         serverId = tag.attrs['href'].split('id=')[1]
                         break     
         except Exception as e:
             logException(user['id'], self.getServerId.__name__, e)
         finally:      
             self.db.updateServerId(user['id'], serverId)