Beispiel #1
0
class MySpaceBot:

    __name_regex = 'href="http://profile.myspace.com/index.cfm?fuseaction=user.viewprofile&friendID=[0-9]+" linkindex='
    __contact_regex = 'href="http://profile.myspace.com/index.cfm\?fuseaction=user.viewprofile&friendid=[0-9]+" id="ctl[0-9]+_Main_ctl[0-9]+_UserFriends1_FriendRepeater_ctl[0-9]+_friendLink">[^\n]+</a>'
    
    __complete_name_regex = 'friendLink">[^\n]+</a>'
    __complete_name_prefix = 'friendLink">'
    __complete_name_sufix = '</a>'
    
    __url_regex = 'href="http://profile.myspace.com/index.cfm\?fuseaction=user.viewprofile&friendid=[0-9]+" id'
    __url_prefix = 'href="'
    __url_sufix = '" id'

    def __init__(self, proxies_per_proto={}, debug=False):
        self.__br = Browser()
        self.__br.set_proxies(proxies_per_proto)
        self.__br.set_debug_http(debug)
        #  no sign in
        pass
    
    def search(self, query):
        br = self.__br
        q_field = urllib.urlencode({'f_first_name': query})
        url = 'http://searchresults.myspace.com/index.cfm?fuseaction=find.search&searchType=network&interesttype=&country=&searchBy=Display&%s&Submit=Find&SearchBoxID=FindAFriend' % q_field
        br.open(url)
        links_urls = []
        for link in br.links(url_regex='profile.myspace.com/index.cfm\?fuseaction=user.viewprofile'):#self.__name_regex):
            if not link.url in links_urls:
                links_urls.append(link.url)
        return links_urls

    def __strip_complete_name(self, html_match):
        match = re.search(self.__complete_name_regex, html_match)
        match = match.group()[len(self.__complete_name_prefix):-len(self.__complete_name_sufix)]
        return match

    def __strip_url(self, html_match):
        match = re.search(self.__url_regex, html_match)
        match = match.group()[len(self.__url_prefix):-len(self.__url_sufix)]
        return match

    # search for display name "name" and retrieves contact as tuples (display_name, url).
    def contacts(self, name):
        br = self.__br
        # check if name exists.
        results = self.search(name)
        if len(results) == 0:
            raise Exception('display name "%s" doesn\'t exist in MySpace' % name)
        # assume the first person that matches
        name_link = results[0]
        # retrieve the first n (20?) contacts as tuples (complete_name, twitter_url).
        resp = self.__br.open(name_link)
        cont =  resp.read()
        matches = re.findall(self.__contact_regex, cont)
        complete_names  = map(self.__strip_complete_name, matches)
        urls            = map(self.__strip_url, matches)
        return zip(complete_names, urls)
Beispiel #2
0
 def __init__(self, proxies_per_proto={}, debug=False):
     self.__br = Browser()
     self.__br.set_proxies(proxies_per_proto)
     self.__br.set_debug_http(debug)
     #  no sign in
     pass