Example #1
0
def _get_info_for_launchpad_username(identifier):
    """This figures out what the named person has been involved with.
    It returns a dictionary like this:
    {
        'F-Spot': {
            'url': 'http://launchpad.net/f-spot',
            'involvement_types': ['Bug Management', 'Bazaar Branches'],
            'languages' : ['python', 'shell script']
        }
    }
    """

    # Maybe identifier is an email address? ...

    username = None

    # If the OpenHatch user entered an email address,
    # try to find the corresponding Launchpad username first.
    if '@' in identifier: 
        maybe_email_address = identifier
        username = get_launchpad_username_by_email(maybe_email_address) 
        #               ^ This method returns None
        #               if we couldn't find anybody by that email address.

    # Maybe it's not an email address...
    if username is None:
        username = identifier

    try:
        b = mechanize_get('https://launchpad.net/~%s' % urllib.quote(username))
    except urllib2.HTTPError, e:
        if str(e.code) == '404':
            return {}
        else:
            raise # not a 404? Bubble-up the explosion.
Example #2
0
def project2languages(project_name):
    """Find the Launchpad URL for the given project. Scrape launchpad page for languages."""
    if project_name.startswith('http://') or project_name.startswith('https://'):
        project_url = project_name
    else:
        # Normalize project name to be lowercase
        project_name = project_name.lower()
        project_url = 'https://launchpad.net/%s' % (
            urllib.quote(project_name))

    # Now grab the project page and parse it
    b = mechanize_get(project_url)
    doc = b.response().read()
    doc_u = unicode(doc, 'utf-8')
    tree = lxml.html.document_fromstring(doc_u)
    try:
        langs_text = tree.cssselect('#programminglang')[0].text_content()
        return map(lambda s: s.strip(), langs_text.split(','))
    except IndexError:
        return []