Beispiel #1
0
    def fill(self,session):
        sys.stderr.write("Loading Profile [%s]\n" % self.Id)
        assert isinstance(self.Id,numbers.Number)
        self._link    =   "https://fetlife.com/users/%s" % self.Id
        self._page    =   session.get(self._link)
        if self._page.url != self._link:
            sys.stderr.write("Missing Profile [%s]\n" % self.Id)
            return False

        tree    =   html.fromstring(self._page.text)
         
        self.Name    =   tree.xpath('//h2[@class="bottom"]/text()')[0].strip()
        rawPair         =   tree.xpath('//span[@class="small quiet"]/text()')[0].strip()
        splitList       =   re.split(" ",rawPair)
        if len(splitList) > 1:
            self.Type    =   splitList[1]
        try:
            self.Age         =   int(re.sub(r'[^0-9]','', splitList[0]))
        except ValueError:
            self.Age        =   -1
        if self.Age != splitList[0]:
            self.Gender     =   re.sub(r'[0-9 ]','', splitList[0])
        Location            =   tree.xpath('//div[@class="span-13 append-1"]/p/em/a/text()')
        self.Location       =   [unicode(x) for x in Location]
        table               =   tree.xpath('//div[@class="span-13 append-1"]/table/tr')

        for item in table:
            children = [x for x in item]
            header  =   children[0]
            if header.text == "relationship status:" or header.text == "D/s relationship status:":
                assert len(children[1:]) == 1
                td = children[1]
                assert len(td.getchildren()) == 1
                ul = td.getchildren()[0]
                for li in ul:
                    if len(li.getchildren()) == 1:
                        a =  li.getchildren()[0]
                        url =   a.get("href")
                        rel =   li.text.strip()
                        pid =   int(re.sub(r'[^0-9 ]','', url))

                        self.Relationships.append(tuple([pid,rel]))
            elif header.text == "orientation:":
                assert len(children[1:]) == 1
                td = children[1]
                self.Orientation =   td.text
            elif header.text == "active:":
                assert len(children[1:]) == 1
                td = children[1]
                self.Active =   td.text
            elif header.text == "is looking for:":
                assert len(children[1:]) == 1
                td = children[1]
                for text in td.itertext():
                    self.LookingFor.append(text)
            else:
                raise RuntimeError,"Unknown table [%s]" % header.text

        lastActive  =   tree.xpath('//ul[@id="mini_feed"]/li/span[@class="quiet small"]/text()')
        if len(lastActive) != 0:
            self.setLastActive(lastActive[0])

        for groupURL in tree.xpath('//li/a[contains(@href,"/groups/")]/@href'):
            try:
                self.Groups.add(int(re.sub(r'[^0-9]','', groupURL)))
            except ValueError:
                pass
        #---------------------------------------------------
        # Fetishes
        #---------------------------------------------------
        stringMap   =   StringMap()
        stuff       =   tree.xpath('//em[text()="Into:"]/ancestor::p')
        if len(stuff) != 0:
            #sys.stderr.write("Into [%s]\n" % stuff)
            intoList    =   []
            for item in stuff[0]:
                if item.text is None:
                    continue
                #sys.stderr.write("\t[%s][%s]\n" % (item,item.text))
                try:
                    if "href" in item.keys():
                        fetishName          =   item.text
                        fetishId            =   int(re.sub(r'[^0-9 ]','', item.get("href")))
                        intoList.append( [fetishId,None] )
                        if not stringMap.hasString("Fetish",fetishId):
                            stringMap.addString("Fetish",fetishId,fetishName)
                    elif len(intoList) > 0:
                        intoList[-1][1]  =   item.text[1:-1]
                except ValueError:
                    pass

            #sys.stderr.write("\n%s\n" % intoList)
            for (k,v) in intoList:
                if v not in self.Into:
                    self.Into[v] =   set()
                self.Into[v].add(k)

        stuff      =   tree.xpath('//em[text()="Curious about:"]/ancestor::p')
        if len(stuff) != 0:
        #sys.stderr.write("Curious About [%s]\n" % stuff)
            curiousList =   []
            for item in stuff[0]:
                if item.text is None:
                    continue
                #sys.stderr.write("\t[%s][%s] - [%s]\n" % (item,item.text,item.keys()))
                try:
                    if "href" in item.keys():
                        fetishName          =   item.text
                        fetishId            =   int(re.sub(r'[^0-9 ]','', item.get("href")))
                        curiousList.append( [fetishId,None] )
                        if not stringMap.hasString("Fetish",fetishId):
                            stringMap.addString("Fetish",fetishId,fetishName)
                    elif len(curiousList) > 0:
                        curiousList[-1][1]  =   item.text[1:-1]
                except ValueError:
                    pass
            #sys.stderr.write("\n%s\n" % curiousList)
            for (k,v) in curiousList:
                if v not in self.Curious:
                    self.Curious[v] =   set()
                self.Curious[v].add(k)

        #---------------------------------------------------
        # Now, friends
        #---------------------------------------------------
        pageNum =   1
        while True:
            self._link  =   "https://fetlife.com/users/%s/friends?page=%d" % (self.Id,pageNum)
            self._page  =   session.get(self._link)
            tree    =   html.fromstring(self._page.text)
        
            urls =   tree.xpath('//div[@class="clearfix user_in_list"]/div/a/@href')
            for url in urls:
                friend =  int(re.sub(r'[^0-9 ]','', url))
                self.Friends.append(friend)

            next    =   tree.xpath('//a[@class="next_page"]')
            if len(next) == 1:
                pageNum += 1
            else:
                break

        self.setCrawlDate()
        sys.stderr.write("Done Loading Profile [%s]\n" % self.Id)

        return True