Ejemplo n.º 1
0
    def extract_profile(self, soup, filename):
        """Extract user's profile"""

        profile = Profile()
        profile.site_name = 'menessentials'            
        
        profile.user_id = re.search('\Wu=(\d+)', filename).group(1)
        name = soup.find('td', 'navbar-links').find('a', {'href': '#'})
        profile.user_name = re.search('Viewing profile :: (.*)', name.string).group(1)
        
        avatar_descr = soup.find('span', 'postdetails')
        if avatar_descr:
            if avatar_descr.string:
                profile.avatar_descr = avatar_descr.string
            avatar = avatar_descr.parent.find('img')
            if avatar and avatar['src']:
                profile.avatar_url = avatar['src']
        
        span = soup.find(text='Joined: ')
        date = span.parent.parent.findNextSiblings('td')[0]
        profile.join_date = datetime.strptime(self.get_text(date), '%d %b %Y').date()
        
        span = soup.find(text='Location: ')
        profile.location = self.get_text(span.parent.parent.findNextSiblings('td')[0])
        
        span = soup.find(text='Occupation: ')
        profile.occupation = self.get_text(span.parent.parent.findNextSiblings('td')[0])
        
        span = soup.find(text='Interests: ')
        profile.interests = self.get_text(span.parent.parent.findNextSiblings('td')[0])
        
        print >>sys.stderr, '    profile for', profile.user_name
        
        return profile
Ejemplo n.º 2
0
    def extract_profile(self, soup, filename):
        """Extract user's profile"""

        profile = Profile()
        profile.site_name = 'shavemyface'
        
        profile.user_id = re.search('\Wu=(\d+)', filename).group(1)
        name = soup.find('th', 'thHead')
        profile.user_name = re.search('Viewing profile :: (.*)', name.string).group(1)
        
        avatar_img = soup.find('img', src=re.compile(r'^images/avatars/'))
        if avatar_img:
            profile.avatar_url = avatar_img['src']
        
        # Extract user attributes
        for attr in soup.findAll('span', 'gen'):
            if attr.parent.name != 'td': continue
            
            nodes = attr.parent.findNextSiblings('td', limit=1)
            if not nodes: continue
            
            value = nodes[0].find('span', 'gen')
            if not value: continue
            
            if attr.string == 'Joined: ':
                profile.join_date = datetime.strptime(value.string, '%d %b %Y').date()
            elif attr.string == 'Location: ':
                profile.location = value.string
            elif attr.string == 'Occupation: ':
                profile.occupation = value.string
            elif attr.string == 'Interests: ':
                profile.interests = value.string
            
        print >>sys.stderr, '    profile for', profile.user_name
        
        return profile