def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) to_parse = rawres.people_twitter() # fix invalid handles that look like @user other_output handles = set() for handle in to_parse: handle = str(handle).strip() if len(handle) > 2: if ' ' in handle: handle = handle.split(' ')[0] # strip off period at the end if exists if handle[len(handle) - 1] == '.': handle = handle[:len(handle) - 1] # strip periods if contains three of them if '...' in handle: handle = handle[:handle.index('.')] if '-' == handle[0]: handle = handle[1:] if '-' == handle[1]: handle = handle[0] + handle[2:] handles.add(handle) if '@' in handles: handles.remove('@') return handles
async def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) temp = await rawres.people_linkedin() return [ person for person in temp if person[0] != '.' and '...' not in person and len(person.split()) != 1 ]
def get_urls(self): try: rawres = myparser.Parser(self.totalresults, 'trello.com') trello_urls = rawres.urls() visited = set() for url in trello_urls: # Iterate through Trello URLs gathered and visit them, append text to totalresults. if url not in visited: # Make sure visiting unique URLs. visited.add(url) request = grequests.get(url=url, headers={'User-Agent': googleUA}) response = grequests.map([request]) self.totalresults = response[0].content.decode('UTF-8') rawres = myparser.Parser(self.totalresults, self.word) return rawres.hostnames(), trello_urls except Exception as e: print(f'Error occurred: {e}')
def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) to_parse = rawres.people_twitter() # fix invalid handles that look like @user other_output handles = set() for handle in to_parse: result = re.search(r'^@?(\w){1,15}', handle) if result: handles.add(result.group(0)) return handles
def get_urls(self): try: rawres = myparser.Parser(self.totalresults, 'trello.com') self.trello_urls = set(rawres.urls()) self.totalresults = '' # reset what totalresults as before it was just google results now it is trello results headers = { 'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4']) } # do not change the headers req = (grequests.get(url, headers=headers, timeout=4) for url in self.trello_urls) responses = grequests.imap(req, size=8) for response in responses: self.totalresults += response.content.decode('UTF-8') rawres = myparser.Parser(self.totalresults, self.word) self.hostnames = rawres.hostnames() except Exception as e: print(f'Error occurred: {e}')
async def get_urls(self): try: rawres = myparser.Parser(self.totalresults, 'trello.com') self.trello_urls = set(await rawres.urls()) self.totalresults = '' # reset what totalresults as before it was just google results now it is trello results headers = { 'User-Agent': random.choice(['curl/7.37.0', 'Wget/1.19.4']) } # do not change the headers responses = await AsyncFetcher.fetch_all(self.trello_urls, headers=headers, proxy=self.proxy) for response in responses: self.totalresults += response rawres = myparser.Parser(self.totalresults, self.word) self.hostnames = await rawres.hostnames() except Exception as e: print(f'Error occurred: {e}')
async def get_hostnames(self): rawres = myparser.Parser(self.results, self.word) new_lst = [] for host in await rawres.hostnames(): host = str(host) if host[0].isdigit(): matches = re.match('.+([0-9])[^0-9]*$', host) # Get last digit of string and shift hostname to remove ip in string new_lst.append(host[matches.start(1) + 1:]) else: new_lst.append(host) return new_lst
async def get_emails(self): rawres = myparser.Parser(self.total_results, self.word) toparse_emails = await rawres.emails() emails = set() # strip out numbers and dashes for emails that look like [email protected] for email in toparse_emails: email = str(email) if '-' in email and email[0].isdigit() and email.index('-') <= 9: while email[0] == '-' or email[0].isdigit(): email = email[1:] emails.add(email) return list(emails)
async def get_hostnames(self): rawres = myparser.Parser(self.total_results, self.word) return await rawres.hostnames()
def get_files(self): rawres = myparser.Parser(self.total_results, self.word) return rawres.fileurls(self.files)
async def get_profiles(self): rawres = myparser.Parser(self.totalresults, self.word) return rawres.profiles()
async def get_emails(self): rawres = myparser.Parser(self.totalresults, self.word) return await rawres.emails()
async def get_links(self): links = myparser.Parser(self.totalresults, self.word) return await splitter(await links.links_linkedin())
def test_emails(self): word = 'domain.com' results = '@domain.com***a@domain***banotherdomain.com***[email protected]***[email protected]***' parse = myparser.Parser(results, word) emails = sorted(parse.emails()) assert emails, ['*****@*****.**', '*****@*****.**']
def get_emails(self): rawres = myparser.Parser(self.total_results, self.word) return rawres.emails()
def get_hostnames(self): return myparser.Parser(self.results, self.word).hostnames()
def get_links(self): links = myparser.Parser(self.totalresults, self.word) return splitter(links.links_linkedin())
async def get_hostnames(self) -> list: parser = myparser.Parser(self.total_results, self.word) return await parser.hostnames()
async def get_emails(self) -> set: parser = myparser.Parser(self.total_results, self.word) return await parser.emails()
async def get_hostnames(self, proxy=False): self.proxy = proxy rawres = myparser.Parser(self.total_results, self.word) return await rawres.hostnames()
async def get_people(self): rawres = myparser.Parser(self.totalresults, self.word) return await rawres.people_linkedin()
async def parse_emails(self, content): rawres = myparser.Parser(content, self.word) return await rawres.emails()
def get_hostnames(self): rawres = myparser.Parser(self.results, self.word) return rawres.hostnames()
def get_allhostnames(self): rawres = myparser.Parser(self.total_results, self.word) return rawres.hostnames_all()
async def parse_hostnames(self, content): rawres = myparser.Parser(content, self.word) return await rawres.hostnames()