def sourceforge(self, username):
     self.verbose('Checking SourceForge...')
     url = f"http://sourceforge.net/u/{username}/profile/"
     resp = self.request('GET', url)
     sfName = re.search(r'<title>(.+) / Profile', resp.text)
     if sfName:
         self.alert(f"Sourceforge username found - ({url})")
         # extract data
         sfJoin = re.search(r'<dt>Joined:</dt><dd>\s*(\d\d\d\d-\d\d-\d\d) ', resp.text)
         sfLocation = re.search(r'<dt>Location:</dt><dd>\s*(\w.*)', resp.text)
         sfGender = re.search(r'<dt>Gender:</dt><dd>\s*(\w.*)', resp.text)
         sfProjects = re.findall(r'class="project-info">\s*<a href="/p/.+/">(.+)</a>', resp.text)
         # establish non-match values
         sfName = sfName.group(1)
         sfJoin = sfJoin.group(1) if sfJoin else None
         sfLocation = sfLocation.group(1) if sfLocation else None
         sfGender = sfGender.group(1) if sfGender else None
         # build and display a table of the results
         tdata = []
         tdata.append(['Resource', 'Sourceforge'])
         tdata.append(['Name', sfName])
         tdata.append(['Profile URL', url])
         tdata.append(['Joined', sfJoin])
         tdata.append(['Location', sfLocation])
         tdata.append(['Gender', sfGender])
         for sfProj in sfProjects:
             tdata.append(['Projects', sfProj])
         self.table(tdata, title='Sourceforge')
         # add the pertinent information to the database
         if not sfName: sfName = username
         fname, mname, lname = parse_name(sfName)
         self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Sourceforge Contributor')
     else:
         self.output('Sourceforge username not found.')
 def codeplex(self, username):
     self.verbose('Checking CodePlex...')
     url = f"http://www.codeplex.com/site/users/view/{username}"
     resp = self.request('GET', url)
     cpName = re.search(r'<h1 class="user_name" style="display: inline">(.+)</h1>', resp.text)
     if cpName:
         self.alert(f"CodePlex username found - ({url})")
         # extract data
         cpJoin = re.search(r'Member Since<span class="user_float">([A-Z].+[0-9])</span>', resp.text)
         cpLast = re.search(r'Last Visit<span class="user_float">([A-Z].+[0-9])</span>', resp.text)
         cpCoordinator = re.search(r'(?s)<p class="OverflowHidden">(.*?)</p>', resp.text)
         # establish non-match values
         cpName = cpName.group(1) if cpName else None
         cpJoin = cpJoin.group(1) if cpJoin else 'January 1, 1900'
         cpLast = cpLast.group(1) if cpLast else 'January 1, 1900'
         cpCoordinator = cpCoordinator.group(1) if cpCoordinator else ''
         # build and display a table of the results
         tdata = []
         tdata.append(['Resource', 'CodePlex'])
         tdata.append(['Name', cpName])
         tdata.append(['Profile URL', url])
         tdata.append(['Joined', time.strftime('%Y-%m-%d', time.strptime(cpJoin, '%B %d, %Y'))])
         tdata.append(['Date Last', time.strftime('%Y-%m-%d', time.strptime(cpLast, '%B %d, %Y'))])
         cpCoordProject = re.findall(r'<a href="(http://.+)/" title=".+">(.+)<br /></a>', cpCoordinator)
         for cpReposUrl, cpRepos in cpCoordProject:
             tdata.append(['Project', f"{cpRepos} ({cpReposUrl})"])
         self.table(tdata, title='CodePlex')
         # add the pertinent information to the database
         if not cpName: cpName = username
         fname, mname, lname = parse_name(cpName)
         self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='CodePlex Contributor')
     else:
         self.output('CodePlex username not found.')
 def bitbucket(self, username):
     self.verbose('Checking Bitbucket...')
     url = f"https://bitbucket.org/api/2.0/users/{username}"
     resp = self.request('GET', url)
     data = resp.json()
     if 'username' in data:
         self.alert(f"Bitbucket username found - ({url})")
         # extract data from the optional fields
         bbName = data['display_name']
         bbJoin = data['created_on'].split('T')
         # build and display a table of the results
         tdata = []
         tdata.append(['Resource', 'Bitbucket'])
         tdata.append(['User Name', data['username']])
         tdata.append(['Display Name', bbName])
         tdata.append(['Location', data['location']])
         tdata.append(['Joined', bbJoin[0]])
         tdata.append(['Personal URL', data['website']])
         tdata.append(['Bitbucket URL', data['links']['html']['href']])
         #tdata.append(['Avatar URL', data['user']['avatar']]) # This works but is SOOOO long it messes up the table
         self.table(tdata, title='Bitbucket')
         # add the pertinent information to the database
         if not bbName: bbName = username
         fname, mname, lname = parse_name(bbName)
         self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, title='Bitbucket Contributor')
     else:
         self.output('Bitbucket username not found.')
Example #4
0
 def module_run(self, domains):
     key = self.keys.get('builtwith_api')
     # Maybe the commercial version?
     #url = 'http://api.builtwith.com/v5/api.json'
     # Free Version
     url = 'https://api.builtwith.com/free1/api.json'
     title = 'BuiltWith contact'
     for domain in domains:
         self.heading(domain, level=0)
         payload = {'KEY': key, 'LOOKUP': domain}
         resp = self.request('GET', url, params=payload)
         if 'error' in resp.json():
             self.error(resp.json()['error'])
             continue
         for result in resp.json()['Results']:
             # extract and add emails to contacts
             emails = result['Meta']['Emails']
             if emails is None: emails = []
             for email in emails:
                 self.insert_contacts(first_name=None,
                                      last_name=None,
                                      title=title,
                                      email=email)
             # extract and add names to contacts
             names = result['Meta']['Names']
             if names is None: names = []
             for name in names:
                 fname, mname, lname = parse_name(name['Name'])
                 self.insert_contacts(first_name=fname,
                                      middle_name=mname,
                                      last_name=lname,
                                      title=title)
             # extract and consolidate hosts and associated technology data
             data = {}
             for path in result['Result']['Paths']:
                 domain = path['Domain']
                 subdomain = path['SubDomain']
                 host = subdomain if domain in subdomain else '.'.join(
                     filter(len, [subdomain, domain]))
                 if not host in data: data[host] = []
                 data[host] += path['Technologies']
             for host in data:
                 # add host to hosts
                 # *** might domain integrity issues here ***
                 domain = '.'.join(host.split('.')[-2:])
                 if domain != host:
                     self.insert_hosts(host)
             # process hosts and technology data
             if self.options['show_all']:
                 for host in data:
                     self.heading(host, level=0)
                     # display technologies
                     if data[host]:
                         self.output(self.ruler * 50)
                     for item in data[host]:
                         for tag in item:
                             self.output(
                                 f"{tag}: {textwrap.fill(self.to_unicode_str(item[tag]), 100, initial_indent='', subsequent_indent=self.spacer*2)}"
                             )
                         self.output(self.ruler * 50)
Example #5
0
 def module_run(self, domains):
     url = 'http://pgp.key-server.io/pks/lookup'
     for domain in domains:
         self.heading(domain, level=0)
         payload = {'search': domain}
         resp = self.request('GET', url, params=payload)
         # split the response into the relevant lines
         lines = [x.strip() for x in re.split('[\n<>]', resp.text) if domain in x]
         results = []
         for line in lines:
             # remove parenthesized items
             line = re.sub(r'\s*\(.*\)\s*', '', line)
             # parse out name and email address
             match = re.search(r'^(.*)&lt;(.*)&gt;$', line)
             if match:
                 # clean up and append the parsed elements
                 results.append(tuple([x.strip() for x in match.group(1, 2)]))
         results = list(set(results))
         if not results:
             self.output('No results found.')
             continue
         for contact in results:
             name = contact[0].strip()
             fname, mname, lname = parse_name(name)
             email = contact[1]
             if email.lower().endswith(domain.lower()):
                 self.insert_contacts(
                     first_name=fname,
                     middle_name=mname,
                     last_name=lname,
                     email=email,
                     title='PGP key association',
                 )
Example #6
0
 def gitorious(self, username):
     self.verbose('Checking Gitorious...')
     url = f"https://gitorious.org/~{username}"
     resp = self.request('GET', url)
     if re.search(rf'href="/~{username}" class="avatar"', resp.text):
         self.alert(f"Gitorious username found - ({url})")
         # extract data
         gitoName = re.search(
             r'<strong>([^<]*)</strong>\s+</li>\s+<li class="email">',
             resp.text)
         # Gitorious URL encodes the user's email to obscure it...lulz. No problem.
         gitoEmailRaw = re.search(r"eval\(decodeURIComponent\('(.+)'",
                                  resp.text)
         gitoEmail = re.search(
             r'mailto:([^\\]+)', unquote_plus(
                 gitoEmailRaw.group(1))) if gitoEmailRaw else None
         gitoJoin = re.search(r'Member for (.+)', resp.text)
         gitoPersonalUrl = re.search(r'rel="me" href="(.+)">', resp.text)
         gitoProjects = re.findall(
             r'<tr class="project">\s+<td>\s+<a href="/([^"]*)">([^<]*)</a>\s+</td>\s+</tr>',
             resp.text)
         # establish non-match values
         gitoName = gitoName.group(1) if gitoName else None
         gitoEmail = gitoEmail.group(1) if gitoEmail else None
         gitoJoin = gitoJoin.group(1) if gitoJoin else None
         gitoPersonalUrl = gitoPersonalUrl.group(
             1) if gitoPersonalUrl else None
         # build and display a table of the results
         tdata = []
         tdata.append(['Resource', 'Gitorious'])
         tdata.append(['Name', gitoName])
         tdata.append(['Profile URL', url])
         tdata.append(['Membership', gitoJoin])
         tdata.append(['Email', gitoEmail])
         tdata.append(['Personal URL', gitoPersonalUrl])
         for gitoProjUrl, gitoProjName in gitoProjects:
             tdata.append([
                 'Project',
                 f"{gitoProjName} (https://gitorious.org/{gitoProjUrl})"
             ])
         self.table(tdata, title='Gitorious')
         # add the pertinent information to the database
         if not gitoName: gitoName = username
         fname, mname, lname = parse_name(gitoName)
         self.insert_contacts(first_name=fname,
                              middle_name=mname,
                              last_name=lname,
                              title='Gitorious Contributor',
                              email=gitoEmail)
     else:
         self.output('Gitorious username not found.')
Example #7
0
 def module_run(self, repos):
     for repo in repos:
         commits = self.query_github_api(
             endpoint=f"/repos/{quote_plus(repo[0])}/{quote_plus(repo[1])}/commits",
             payload={},
             options={'max_pages': int(self.options['maxpages']) or None},
         )
         for commit in commits:
             for key in ('committer', 'author'):
                 if self.options[key] and key in commit and commit[key]:
                     url = commit[key]['html_url']
                     login = commit[key]['login']
                     self.insert_profiles(username=login, url=url, resource='Github', category='coding')
                 if self.options[key] and key in commit['commit'] and commit['commit'][key]:
                     name = commit['commit'][key]['name']
                     email = commit['commit'][key]['email']
                     fname, mname, lname = parse_name(name)
                     self.insert_contacts(first_name=fname, middle_name=mname, last_name=lname, email=email, title='Github Contributor')
Example #8
0
 def github(self, username):
     self.verbose('Checking Github...')
     url = f"https://api.github.com/users/{username}"
     resp = self.request('GET', url)
     data = resp.json()
     if 'login' in data:
         self.alert(f"Github username found - ({url})")
         # extract data from the optional fields
         gitName = data['name'] if 'name' in data else None
         gitCompany = data['company'] if 'company' in data else None
         gitBlog = data['blog'] if 'blog' in data else None
         gitLoc = data['location'] if 'location' in data else None
         gitEmail = data['email'] if 'email' in data else None
         gitBio = data['bio'] if 'bio' in data else None
         gitJoin = data['created_at'].split('T')
         gitUpdate = data['updated_at'].split('T')
         # build and display a table of the results
         tdata = []
         tdata.append(['Resource', 'Github'])
         tdata.append(['User Name', data['login']])
         tdata.append(['Real Name', gitName]) if gitName else None
         tdata.append(['Profile URL', data['html_url']])
         tdata.append(['Avatar URL', data['avatar_url']])
         tdata.append(['Location', gitLoc])
         tdata.append(['Company', gitCompany])
         tdata.append(['Blog URL', gitBlog])
         tdata.append(['Email', gitEmail])
         tdata.append(['Bio', gitBio])
         tdata.append(['Followers', data['followers']])
         tdata.append(['ID', data['id']])
         tdata.append(['Joined', gitJoin[0]])
         tdata.append(['Updated', gitUpdate[0]])
         self.table(tdata, title='Github')
         # add the pertinent information to the database
         if not gitName: gitName = username
         fname, mname, lname = parse_name(gitName)
         self.insert_contacts(first_name=fname,
                              middle_name=mname,
                              last_name=lname,
                              title='Github Contributor')
     else:
         self.output('Github username not found.')
    def get_contact_info(self, url):
        search_result = self.search_bing_api(url, 1)

        # Search by url. If the url doesn't match, it has potential to be a different person
        if search_result and search_result[0]["url"] == url:
            search_result = search_result[0]
            # "Name" is a misnomer, it actually refers to the link title
            link_title = search_result["name"]

            # Split the title on the pipe to get rid of "linkedIn" portion at the end
            name_and_title = link_title.split("|")[0]
            # Split whats left on the Dashes, which is usually name - title - company
            # some european LinkedIn sites use em-dash
            EM_DASH = b'\xe2\x80\x93'.decode('utf-8')
            delimeter_expression = '- | ' + EM_DASH
            name_title_company_list = re.split(delimeter_expression,
                                               name_and_title)
            # Parse out name
            fullname = name_title_company_list[0]
            fname, mname, lname = parse_name(fullname)

            # Sometimes "LinkedIn" is left at the end anyway, and we don't want to confuse that for the company
            if "linkedin" not in name_title_company_list[-1].lower():
                company = name_title_company_list[-1]
            else:
                company = False

            # Try to parse out a title and company if it's there
            if "linkedin" not in name_title_company_list[1].lower():
                if not company:
                    title = name_title_company_list[1]
                else:
                    title = f"{name_title_company_list[1]} at {company}"
                self.insert_contacts(first_name=fname,
                                     middle_name=mname,
                                     last_name=lname,
                                     title=title)
            else:
                self.insert_contacts(first_name=fname,
                                     middle_name=mname,
                                     last_name=lname)
Example #10
0
 def module_run(self, domains):
     url = 'https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers'
     resp = self.request('GET', url)
     if resp.status_code != 200:
         self.alert('When retrieving IANA PEN Registry, got HTTP status code ' + str(resp.status_code) + '!')
     for domain in domains:
         dom = re.escape(domain)
         regex = r'(\d+)\s*\n\s{2}(.*)\s*\n\s{4}(.*)\s*\n\s{6}(.*)&' + dom + r'\s*\n'
         matchfound = False
         for match in re.finditer(regex, resp.text, re.IGNORECASE):
             fullname = match.groups()[2]
             fname, mname, lname = parse_name(fullname)
             email = match.groups()[3] + '@' + domain
             self.insert_contacts(
                 first_name=fname,
                 middle_name=mname,
                 last_name=lname,
                 email=email
             )
             matchfound = True
         if not matchfound:
             self.alert('No matches found for domain \'' + domain + '\'')
Example #11
0
 def module_run(self, companies):
     url = 'https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers'
     resp = self.request('GET', url)
     if resp.status_code != 200:
         self.alert(
             'When retrieving IANA PEN Registry, got HTTP status code ' +
             str(resp.status_code) + '!')
     for company in companies:
         comp = re.escape(company)
         regex = r'(\d+)\s*\n\s{2}.*' + comp + r'.*\s*\n\s{4}(.*)\s*\n\s{6}(.*)\s*\n'
         matchfound = False
         for match in re.finditer(regex, resp.text, re.IGNORECASE):
             fullname = match.groups()[1]
             fname, mname, lname = parse_name(fullname)
             email = match.groups()[2].replace('&', '@')
             self.insert_contacts(first_name=fname,
                                  middle_name=mname,
                                  last_name=lname,
                                  email=email)
             matchfound = True
         if not matchfound:
             self.alert('No matches found for company \'' + company + '\'')
Example #12
0
    def module_run(self, entities):
        api_key = self.keys.get('fullcontact_api')
        base_url = 'https://api.fullcontact.com/v3/person.enrich'
        while entities:
            entity = entities.pop(0)
            payload = {'email': entity}
            headers = {'Authorization': 'Bearer ' + api_key}
            resp = self.request('POST',
                                base_url,
                                json=payload,
                                headers=headers)
            if resp.status_code == 200:

                # parse contact information
                name = resp.json().get('fullName')
                if name:
                    first_name, middle_name, last_name = parse_name(name)
                    self.alert(name)
                emails = [entity]
                new_emails = resp.json()['details'].get('emails') or []
                for email in new_emails:
                    emails.append(email['value'])
                    self.alert(email['value'])
                title = resp.json().get('title')
                organization = resp.json().get('organization')
                if title and organization:
                    title = f"{title} at {organization}"
                elif organization:
                    title = f"Employee at {organization}"
                if title:
                    self.alert(title)

                # parse location
                region = resp.json().get('location')
                if region:
                    self.alert(region)

                # insert contacts
                for email in emails:
                    self.insert_contacts(first_name=first_name,
                                         middle_name=middle_name,
                                         last_name=last_name,
                                         title=title,
                                         email=email,
                                         region=region)

                # parse and insert profiles
                for resource in ['twitter', 'linkedin', 'facebook']:
                    url = resp.json().get(resource)
                    if url:
                        username = url.split('/')[-1]
                        self.alert(url)
                        self.insert_profiles(username=username,
                                             url=url,
                                             resource=resource,
                                             category='social')

            elif resp.status_code == 202:
                # add emails queued by fullcontact back to the list
                entities.append(entity)
                self.output(f"{entity} queued and added back to the list.")
            else:
                self.output(f"{entity} - {resp.json()['message']}")
            # 600 requests per minute api rate limit
            sleep(.1)
Example #13
0
 def parse_fullname(self, name):
     fullname = name.split(" -")[0]
     fullname = fullname.split(" |")[0]
     fullname = fullname.split(",")[0]
     fname, mname, lname = parse_name(fullname)
     return fullname, fname, mname, lname