class DataExtractor(object):
    def __init__(self):
        self.db = DBController()
        self.br = self.login()
    
    def login(self):
        br = Browser()
        cj = cookielib.LWPCookieJar()
        br.set_cookiejar(cj)
        
        br.set_handle_equiv(True)
        br.set_handle_redirect(True)
        br.set_handle_referer(True)
        br.set_handle_robots(False)
        br.set_handle_refresh(_http.HTTPRefreshProcessor(), max_time=2)
        
        br.open('http://www.fatsecret.com/Auth.aspx?pa=s')
        br.select_form(nr=0)
        #name attr of login tr
        #PLEASE input your username and password here!!!!
        br['_ctl0:_ctl7:Logincontrol1:Name'] = 'username'
        br['_ctl0:_ctl7:Logincontrol1:Password'] = '******'
        br.submit()
        return br
    
    #========================================================================================
    # URLType: 0 memberURL, 1 weightHistory, 2 dietHistory, 3 groups, 4 challenges, 5 buddies
    #========================================================================================
    def getURL(self, user, URLType):
        if URLType == 0:
            return 'http://fatsecret.com/member/' + '+'.join(user['name'].encode('utf-8', 'ignore').split())
        if user['serverId'] is None:
            return None
        elif URLType == 1:
            return 'http://www.fatsecret.com/Default.aspx?pa=memh&id=' + user['serverId']
        elif URLType == 2:
            return 'http://www.fatsecret.com/Diary.aspx?pa=mdcs&id=' + user['serverId']
        elif URLType == 3:
            return 'http://www.fatsecret.com/Default.aspx?pa=memgrps&id=' + user['serverId']
        elif URLType == 4:
            return 'http://www.fatsecret.com/Default.aspx?pa=memchals&id=' + user['serverId']
        elif URLType == 5:
            return 'http://www.fatsecret.com/Default.aspx?pa=memb&id=' + user['serverId']
        else:
            raise Exception('invalid URL type')
        
    def convertUserIdToUserList(self, userId):
        if userId is None or userId == []:
            return self.db.getAllUserList()
        elif isinstance(userId, list) and userId != []:
            userList = []
            for v in userId:
                user = self.db.getUserById(v)
                if user is not None:
                    userList.append(user)
            return userList
        elif isinstance(userId, int):
            user = self.db.getUserById(userId)
            return [user] if user is not None else []
        else:
            raise Exception('invalid input userId')
            
    def getServerId(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            if 'serverId' in user and user['serverId'] is not None:
                continue
            serverId = None
            try:
                memberURL = self.getURL(user, 0)
                page = self.br.open(memberURL)
                soup = BeautifulSoup(page.read())
                result = soup.find('div', attrs={'align' : 'right', 'class' : 'smallText', 'style' : 'padding-top:5px'})
                if result is not None:
                    for tag in result.contents:
                        if isinstance(tag, element.Tag) and 'href' in tag.attrs and tag.attrs['href'].find('id') != -1:
                            serverId = tag.attrs['href'].split('id=')[1]
                            break     
            except Exception as e:
                logException(user['id'], self.getServerId.__name__, e)
            finally:      
                self.db.updateServerId(user['id'], serverId)
    
    def getWeightHistory(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            diet, startWeight, goalWeight, weightHistory = None, None, None, None
            try:
                if user['serverId'] is not None:
                    weightHistoryURL = self.getURL(user, 1)
                    page = self.br.open(weightHistoryURL)
                    soup = BeautifulSoup(page.read())
                    tag = soup.find('b')
                    diet = tag.contents[1].text
                    tag = soup.find(attrs={'style' : 'padding:0px 10px'})
                    startWeight = float(tag.contents[1].split(': ')[1].split()[0])
                    goalWeight = float(tag.contents[0].text.split(': ')[1].split()[0])
                    weightList, dateList = [], []
                    for tag in soup.findAll(attrs={'class' : 'borderBottom date'}):
                        dateList.append(parser.parse(tag.text))
                    for tag in soup.findAll(attrs={'class' : 'borderBottom weight'}):
                        weightList.append(float(tag.text.split()[0]))
                    weightHistory = zip(dateList, weightList)
                    weightHistory = sorted(weightHistory, key= lambda record : record[0])
            except Exception as e:
                logException(user['id'], self.getWeightHistory.__name__, e)
            finally:
                self.db.updateWeightHistory(user['id'], diet, startWeight, goalWeight, weightHistory)
    
    def getDietHistory(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            dietHistory = None
            try:
                if user['serverId'] is not None:
                    dietHistoryURL = self.getURL(user, 2)
                    page = self.br.open(dietHistoryURL)
                    soup = BeautifulSoup(page.read())
                    months = soup.findAll('td', attrs={'colspan' : '6', 'class' : 'borderBottom'})
                    monthList = []
                    if months == []:
                        raise Exception('no diet history records')
                    for month in months:
                        monthList.append(datetime.strptime(month.text, '%B %Y'))
                    rows = soup.findAll('tr', attrs={'valign' : 'middle'})
                    prevDay = 32
                    monthIndex = 0
                    dietHistory = []
                    for row in rows:
                        try:
                            if len(row.contents) != 13:
                                continue
                            day = int(re.sub('[^0-9]', '', row.contents[1].text))
                            if day >= prevDay:
                                monthIndex += 1 
                            prevDay = day
                            date = datetime(monthList[monthIndex].year, monthList[monthIndex].month, day)
                            food = self.getIntFromRawString(row.contents[3].text)
                            RDI = self.getDecimalFromPercentageString(row.contents[5].text)
                            fat, protein, carbs = self.getDataFromNutrionalSummary(row.contents[7].text)
                            exercise = self.getIntFromRawString(row.contents[9].text)
                            net = self.getIntFromRawString(row.contents[11].text)
                            dietHistory.append((date, food, RDI, fat, protein, carbs, exercise, net))
                        except Exception as e:
                            logException(user['id'], self.getDietHistory.__name__, e, 'scrape row error')
                    if 'dietHistory' in user and user['dietHistory'] is not None:
                        dietHistory = self.mergeDietTrack(user['dietHistory'], dietHistory)
                    else:
                        dietHistory.sort(key=lambda item : item[0])
            except Exception as e:
                logException(user['id'], self.getDietHistory.__name__, e)
            finally:
                self.db.updateDietHistory(user['id'], dietHistory)
    
    def getGroup(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            groupIdList = []
            try:
                if user['serverId'] is not None:
                    groupURL = self.getURL(user, 3)
                    page = self.br.open(groupURL)
                    soup = BeautifulSoup(page.read())
                    results = soup.findAll('td', attrs={'width' : '50', 'align' : 'center'})
                    for tag in results:
                        groupName =  tag.contents[1].attrs['title']
                        group = self.db.addNewGroup(groupName)
                        self.db.addUserInGroup(user['id'], group['id'])
                        groupIdList.append(group['id'])
            except Exception as e:
                logException(user['id'],self.getGroup. __name__, e)
            finally:
                self.db.addGroupInUser(user['id'], groupIdList)
    
    def getChallenge(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            challengeIdList = []
            try:
                if user['serverId'] is not None:
                    challengeURL = self.getURL(user, 4)
                    page = self.br.open(challengeURL)
                    soup = BeautifulSoup(page.read())
                    results = soup.findAll('td', attrs={'width' : '50', 'align' : 'center'})
                    for tag in results:
                        challengeName = tag.contents[1].attrs['title']
                        challenge = self.db.addNewChallenge(challengeName)
                        self.db.addUserInChallenge(user['id'], challenge['id'])
                        challengeIdList.append(challenge['id'])
            except Exception as e:
                logException(user['id'], self.getChallenge.__name__, e)
            finally:
                self.db.addChallengeInUser(user['id'], challengeIdList)
    
    def getBuddy(self, userId=None):
        users = self.convertUserIdToUserList(userId)
        for user in users:
            buddyIdList = []
            try:
                if user['serverId'] is not None:
                    buddyURL = self.getURL(user, 5)
                    while True:
                        page = self.br.open(buddyURL)
                        soup = BeautifulSoup(page.read())
                        results = soup.findAll('a', attrs={'class' : 'member', 'onmouseout' : 'hideTip()'})
                        for tag in results:
                            if tag.text != '':
                                buddyName = tag.text.strip()
                                buddy = self.db.addNewUser(buddyName)
                                buddyIdList.append(buddy['id'])
                                if 'serverId' not in buddy:
                                    self.getServerId(buddy['id'])
                        result = soup.find('span', attrs={'class' : 'next'})
                        if result is None:
                            break
                        else:
                            buddyURL = 'http://fatsecret.com/' + result.contents[0].attrs['href']
            except Exception as e:
                logException(user['id'], self.getBuddy.__name__, e)
            finally:
                self.db.addBuddyInUser(user['id'], buddyIdList)
    
    def mergeDietTrack(self, oldTrack, newTrack):
        oldTrack, newTrack = sorted(oldTrack, key= lambda item : item[0]), sorted(newTrack, key= lambda item: item[0])
        i = 0
        for item in oldTrack:
            if item[0] >= newTrack[0][0]:
                break
            i += 1
        return oldTrack[0 : i] + newTrack
    
    def cleanNonNumercial(self, dataString):
        return re.sub('[^0-9.]', '', dataString.strip())
    
    def getIntFromRawString(self, dataString):
        dataString = self.cleanNonNumercial(dataString)
        return int(dataString) if dataString != '' else None
    
    def getDataFromNutrionalSummary(self, dataString):
        if dataString.strip() == '':
            return None, None, None
        fat = float(dataString.split('fat: ')[1].split('g')[0])
        protein = float(dataString.split('protein: ')[1].split('g')[0])
        carbs = float(dataString.split('carbs: ')[1].split('g')[0])
        return fat, protein, carbs
    
    def getDecimalFromPercentageString(self, dataString):
        dataString = self.cleanNonNumercial(dataString)
        return float(self.cleanNonNumercial(dataString)) / 100 if dataString != '' else None
class DataExporter(object):
    def __init__(self):
        self.db = DBController()
    
    def valueToCSVFormat(self, value):
        if value == '' or value is None:
            return ''
        elif isinstance(value, str) or isinstance(value, unicode):
            return '\"' + value.encode('utf-8', 'ignore') + '\"'
        elif isinstance(value, int) or isinstance(value, float):
            return str(value)
        elif isinstance(value, datetime):
            return datetime.strftime(value, '%Y-%m-%d')
        else:
            raise Exception('Value data type ERROR, must be string, int, float or datetime')
        
    def exportHistory(self, isDietHistory):
        if isDietHistory:
            key = 'dietHistory'
            fileName = key + '.csv'
            attributeLine = 'id,name,count,lastUpdateDate,updateTime,food,RDI,fat,protein,carbs,exercise,net\n'
        else:
            key = 'weightHistory'
            fileName = key + '.csv'
            attributeLine = 'id,name,count,lastUpdateDate,lastUpdateWeight,startWeight,goalWeight,updateTime,weight\n'
        
        with open(fileName, 'w') as f:
            f.write(attributeLine)
            users = self.db.getAllUserList()
            users = sorted(users, key=lambda user : user['id'])
            for user in users:
                lineList = [user['id'], user['name']]
                try:
                    if key in user and user[key] is not None and len(user[key]) > 0:
                        lineList.append(len(user[key]))
                        lineList.append(user[key][-1][0])
                        if not isDietHistory:
                            lineList.append(user[key][-1][1])
                            lineList.append(user['startWeight'])
                            lineList.append(user['goalWeight'])
                        for detailInfoTuple in user[key]:
                            lineList.extend(detailInfoTuple)                            
                    else:
                        
                        lineList.append(0)
                    line = ','.join([self.valueToCSVFormat(value) for value in lineList]) + '\n'
                    f.write(line)
                except Exception as e:
                    print e, user['id']
    
    def exportGroupChallenge(self, isGroup):
        items = self.db.getAllGroupList() if isGroup else self.db.getAllChallengeList()
        items = sorted(items, key=lambda item : item['id'])
        fileName = 'group.csv' if isGroup else 'challenge.csv'
        with open(fileName, 'w') as f:
            f.write('id,name,count,memberId\n')
            for item in items:
                lineList = [item['id'], item['name']]
                if 'member' in item and item['member'] is not None:
                    lineList.append(len(item['member']))
                    lineList.extend(item['member'])
                else:
                    lineList.append(0)
                line = ','.join([self.valueToCSVFormat(value) for value in lineList]) + '\n'
                f.write(line)
    
#     def exportUserGroupChallenge(self, isGroup):
#         users = self.db.getAllUserIter()
#         directory = 'userGroup/' if isGroup else 'userChallenge/'
#         key = 'group' if isGroup else 'challenge'
#         if not os.path.exists(directory):
#             os.mkdir(directory)
#         for user in users:
#             fileName = directory + str(user['id']) + '.txt'
#             if key in user and user[key] is not None:
#                 with open(fileName, 'w') as f:
#                     itemList = sorted(user[key])
#                     for itemId in itemList:
#                         line = str(itemId) + '\n'
#                         f.write(line)
#             else:
#                 with open(fileName, 'w') as f:
#                     pass

    def exportUserGroupChallenge(self, isGroup):
        key = 'group' if isGroup else 'challenge'
        fileName = 'userGroup.csv' if isGroup else 'userChallenge.csv'
        attributeLine = 'id,count,group\n' if isGroup else 'id,count,challenge\n'
        with open(fileName, 'w') as f:
            users = self.db.getAllUserList()
            users = sorted(users, key=lambda user : user['id'])
            f.write(attributeLine)
            for user in users:
                lineList = [user['id']]
                if key in user and user[key] is not None:
                    lineList.append(len(user[key])) 
                    lineList.extend(user[key])
                else:
                    lineList.append(0)
                line = ','.join([self.valueToCSVFormat(value) for value in lineList]) + '\n'
                f.write(line)
    
    def exportBuddy(self):
        users = self.db.getAllUserIter()
        if not os.path.exists('buddy/'):
            os.mkdir('buddy/')
        for user in users:
            fileName = 'buddy/' + str(user['id']) + '.txt'
            if 'buddy' in user and user['buddy'] is not None:
                with open(fileName, 'w') as f:
                    buddyIdList = sorted(user['buddy'])
                    for userId in buddyIdList:
                        line = str(user['id']) + ' ' + str(userId) + '\n'
                        f.write(line)
            else:
                with open(fileName, 'w') as f:
                    pass

    def getUserIdNameDict(self):
        users = self.db.getAllUserIter()
        userIdNameDict = {}
        for user in users:
            userIdNameDict[user['id']] = user['name']
        return userIdNameDict