def _get_info_for_launchpad_username(identifier): """This figures out what the named person has been involved with. It returns a dictionary like this: { 'F-Spot': { 'url': 'http://launchpad.net/f-spot', 'involvement_types': ['Bug Management', 'Bazaar Branches'], 'languages' : ['python', 'shell script'] } } """ # Maybe identifier is an email address? ... username = None # If the OpenHatch user entered an email address, # try to find the corresponding Launchpad username first. if '@' in identifier: maybe_email_address = identifier username = get_launchpad_username_by_email(maybe_email_address) # ^ This method returns None # if we couldn't find anybody by that email address. # Maybe it's not an email address... if username is None: username = identifier try: b = mechanize_get('https://launchpad.net/~%s' % urllib.quote(username)) except urllib2.HTTPError, e: if str(e.code) == '404': return {} else: raise # not a 404? Bubble-up the explosion.
def project2languages(project_name): """Find the Launchpad URL for the given project. Scrape launchpad page for languages.""" if project_name.startswith('http://') or project_name.startswith('https://'): project_url = project_name else: # Normalize project name to be lowercase project_name = project_name.lower() project_url = 'https://launchpad.net/%s' % ( urllib.quote(project_name)) # Now grab the project page and parse it b = mechanize_get(project_url) doc = b.response().read() doc_u = unicode(doc, 'utf-8') tree = lxml.html.document_fromstring(doc_u) try: langs_text = tree.cssselect('#programminglang')[0].text_content() return map(lambda s: s.strip(), langs_text.split(',')) except IndexError: return []