def find_phones(self, text, leader=False): """the method searches for phone numbers on the page""" phones = list() try: if leader is False: for match in phonenumbers.PhoneNumberMatcher(text, "CH"): phone = str(match).split(sep=') ', maxsplit=1)[1] if phone: phones.append(phone) if leader is True: soup = BeautifulSoup(text, 'lxml') for word in self.words_for_company_leader: if word in str(soup): try: for match in phonenumbers.PhoneNumberMatcher(str(soup.find(text=re.compile(word)).parent), "CH"): # noqa TODO result = str(match).split(sep=') ', maxsplit=1)[1] if result: phones.append(result) except Exception: # noqa continue if not phones: # noqa for word in self.words_for_company_leader: if word in str(soup): try: for match in phonenumbers.PhoneNumberMatcher(str(soup.find(text=re.compile(word)).parent.parent), "CH"): # noqa TODO result = str(match).split(sep=') ', maxsplit=1)[1] if result: phones.append(result) except Exception: # noqa continue phones = self.unique_phones(phones) except Exception as e: print(f'find_phones: {e}') return phones
def lf_contains_phonenumber(x): res = ''.join(filter(lambda i: i.isdigit(), str(x.text))) if len(res) != 0: temp = [] lets = str(x.text) count_of_dash = str.count("-", lets) pre_context = x.text_blob[x.string_index - 15:x.string_index] post_context = x.text_blob[x.string_index:x.string_index + 15] for match in phonenumbers.PhoneNumberMatcher(lets, "US"): temp.append( phonenumbers.format_number( match.number, phonenumbers.PhoneNumberFormat.E164)) if len(temp) > 0: return SPECIAL_NUMBER elif re.search(r"\w{3}-\w{3}-\w{4}", lets): return SPECIAL_NUMBER elif re.search(r"(\w{3})\w{3}-\w{4}", lets): return SPECIAL_NUMBER elif re.search("\(\w{3}\)\w{3}-\w{4}", lets): return SPECIAL_NUMBER elif count_of_dash != 0: just_test = str(pre_context + lets + post_context) for later_search in phonenumbers.PhoneNumberMatcher( just_test, "US"): temp.append(later_search) if len(temp) > 0: return SPECIAL_NUMBER else: return ABSTAIN else: return ABSTAIN else: return ABSTAIN
def test(sent): for match in phonenumbers.PhoneNumberMatcher(text, "US"): print(match) for match in phonenumbers.PhoneNumberMatcher(text, "US"): print( phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164))
def forgot_password(): if request.method == 'POST': phone_email = request.form['phone_email'] for match in phonenumbers.PhoneNumberMatcher(phone_email, 'MY'): phone_number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) qr_user = Qr.query.filter_by(phone_number=PhoneNumber(phone_number, 'MY')).first() if qr_user: qr_user.token = secrets.token_urlsafe(90) db.session.commit() reset_link = url_for('reset_password', _external=True)+'?token='+qr_user.token msg = Message(subject="Resetting your QR Id password", sender='Pintar-AI', recipients=[qr_user.email]) msg.html = render_template('mail_reset.html', username=qr_user.name, reset_link=reset_link) mail.send(msg) qr_user = Qr.query.filter_by(email=phone_email).first() if qr_user: qr_user.token = secrets.token_urlsafe(90) db.session.commit() reset_link = url_for('reset_password', _external=True)+'?token='+qr_user.token msg = Message(subject="Resetting your QR Id password", sender='Pintar-AI', recipients=[qr_user.email]) msg.html = render_template('mail_reset.html', username=qr_user.name, reset_link=reset_link) mail.send(msg) flash("If any account related, we sent link to your email", category="success") return redirect(url_for('forgot_password')) else: return render_template('forgot_password.html')
def qr_login(): phone_email = request.form['phone_email'] password = request.form['password'] for match in phonenumbers.PhoneNumberMatcher(phone_email, 'MY'): phone_number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) qr_user = Qr.query.filter_by(phone_number=PhoneNumber(phone_number, 'MY')).first() if qr_user: if not bcrypt.checkpw(password.encode('utf-8'), qr_user.password): flash("Wrong password", category="danger") return redirect(url_for('login')) if not qr_user.is_verified: flash("Please verify your account", category="danger") return redirect(url_for('login')) img = string_to_qr("qr_id/" + qr_user.phone_number.e164) return serve_pil_image(img) qr_user = Qr.query.filter_by(email=phone_email).first() if qr_user: if not bcrypt.checkpw(password.encode('utf-8'), qr_user.password): flash("Wrong password", category="danger") return redirect(url_for('login')) if not qr_user.is_verified: flash("Please verify your account", category="danger") return redirect(url_for('login')) img = string_to_qr("qr_id/" + qr_user.phone_number.e164) return serve_pil_image(img) else: flash("Phone number or Email hasn't been registered", category="danger") return redirect(url_for('login'))
def _parse_phone(msg): for match in phonenumbers.PhoneNumberMatcher(msg, "US"): number = phonenumbers.format_number( match.number, phonenumbers.PhoneNumberFormat.E164) if number != MY_PHONE: return number return ''
def parse_by_phonenumbers(text): not_validated_numbers = set() validated_numbers = set() def format_number(number_object): return phonenumbers.format_number( number_object, phonenumbers.PhoneNumberFormat.E164, ) for match in phonenumbers.PhoneNumberMatcher( text, 'RU', # I used POSSIBLE to get numbers without city code phonenumbers.Leniency.POSSIBLE, ): # let's suppose: number without city code always have "-" symbol if phonenumbers.is_valid_number(match.number): validated_numbers.add(format_number(match.number)) elif '-' in match.raw_string: not_validated_numbers.add(format_number(match.number)) for number in not_validated_numbers: if len(number) == NO_CITY_CODE_LEN_NUMBER: number = f'+7{MOSCOW_CODE}{number[2:]}' validated_numbers.add(number) # convert international format to russian internal format return {f'8{n[2:]}' for n in validated_numbers}
def find_phone_numbers(string: str, region_code: Optional[str] = None) -> str: """ Python port of Google's libphonenumber. https://github.com/daviddrysdale/python-phonenumbers Parameters ---------- region_code : str, optional If specified, will find the number of the specified country. eg. 06.00.00.00.00 if "FR" is specified. If not specified, only works for international-formatted phone numbers. - ie. phone number with +country code specified eg. 06.00.00.00.00 will return an error but +33 6 00 00 00 00 will work. supported value: look SUPPORTED_COUNTRY variable. Returns ------- list list of matched phone numbers. Raises ------ ValueError if country code is not supported. """ if region_code not in SUPPORTED_COUNTRY: raise ValueError( 'Please enter a valid contry code. See SUPPORTED_COUNTRY list.') return [ match.raw_string for match in _phonenumbers.PhoneNumberMatcher(string, region_code) ]
def get_phones(self, text): ''' extracts phones from webpage ''' phones = [] for match in phonenumbers.PhoneNumberMatcher(text, ''): phones.append(phonenumbers.format_number(match.number, )) phones = list(set(phones+self.info['phones'])) return phones
def normalize_devices_in_string(string, with_emails=False, country="US"): result = re.findall(r'[\w\.-]+@[\w\.-]+', str(string)) if with_emails else [] for match in phonenumbers.PhoneNumberMatcher(str(string), country): number = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) if len(number) > 10: result.append(number) return result
def extract_number_and_standardise(list_of_text, default_country): # Preconditions assert isinstance(list_of_text, list) national = list() # List to store national version of number international = list() # List to store international version of number e164 = list() # List to store E164 version of number # Extract numbers in each string for t in list_of_text: number = phonenumbers.PhoneNumberMatcher(t, default_country) for n in number: # Standardise to national format national.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.NATIONAL)) # Standardise to international format international.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.INTERNATIONAL)) # Standardise to E164 format e164.append(phonenumbers.format_number(n.number, phonenumbers.PhoneNumberFormat.E164)) return national, international, e164
def anon_ner(text): result = '' doc = Doc(text) doc.segment(segmenter) doc.tag_ner(ner_tagger) result_temp = '' last = 0 for span in doc.spans: if span.type == 'PER': result_temp += text[last:span.start] result_temp += 'ИМЯ' if span.type == 'ORG': result_temp += text[last:span.start] result_temp += 'ОРГАНИЗАЦИЯ' if span.type == 'LOC': result_temp += text[last:span.start] result_temp += 'ЛОКАЦИЯ' if span.type == 'PER' or span.type == 'ORG' or span.type == 'LOC': last = span.stop result_temp += text[last:] result = result_temp result_temp = "" last = 0 countries = [ 'AZ', 'AM', 'BY', 'KZ', 'KG', 'MD', 'RU', 'TJ', 'TM', 'UZ', 'UA' ] for country in countries: for match in phonenumbers.PhoneNumberMatcher(result, country): result_temp += result[last:match.start] result_temp += 'ТЕЛЕФОН ' last = match.end result_temp += result[last:] result = result_temp return result
def test(): print find_phone_numbers("PHONE: 1021-34662020/21/22/23/24") print find_phone_numbers("1021-34662020") print "done.." text = "Call me at ++1510-748-8230 if it's before 9:30, or on +703-4800500 after 10am. +971-9-4662020" for match in phonenumbers.PhoneNumberMatcher(text, "US"): print match
def url_fetch(query=""): if not query: last_query = session.get('last_query', None) if last_query is not None: query = session['last_query']['query'] stopset = set(stopwords.words('english')) q = {"fields": ["file"], "query": {"term": {"file": query}}} r = es.search(body=q, index=es_index) data = r['hits']['hits'] urls = [] pn = [] for doc in data: urls.append(re.findall(r'(https?://[^\s]+)', doc['fields']['file'][0])) try: for match in phonenumbers.PhoneNumberMatcher( doc['fields']['file'][0], region=None): pn.append({ 'number': phonenumbers.format_number( match.number, phonenumbers.PhoneNumberFormat.E164), 'location': geocoder.description_for_number(match.number, "en") }) except KeyError: pass urls = filter(lambda x: x != [], urls) # urls_flat=reduce(lambda x,y: x.extend(y),urls) urls_flat = [item for sublist in urls for item in sublist] return json.dumps({'urls': dict(Counter(urls_flat)), 'pn': pn})
def find_info(value_matrix, sheet): phone_string = "" data_matrix = [[]] data_matrix.clear() for v in value_matrix: # Provider name from matrix provider_string = v[0] try: add_string = pyap.parse(v[1], country='US')[0].__str__( ) # Find address from matrix value index 1 except: add_string = "" # Expect error when no address found. Make value "" to add to matrix try: web_string = URLExtract().find_urls( v[1])[0] # Find URL from matrix value index 1 except: web_string = "" # Expect error when no URL found. Make value "" to add to matrix for match in phonenumbers.PhoneNumberMatcher( v[1], "US"): # Find phone number from matrix value index 1 phone_string = phonenumbers.format_number( match.number, phonenumbers.PhoneNumberFormat.NATIONAL) data_matrix.append( [provider_string, phone_string, web_string, add_string]) phone_string = "" sheet.update_values("C:H", data_matrix) # Update cell range with found values
def anonymize(data): try: for match in phonenumbers.PhoneNumberMatcher(data, "US"): data = data.replace(match.raw_string, '<Phone>') finally: return data pass
def get_phone_numbers(self, text) -> List: return [ pn.format_number(match.number, pn.PhoneNumberFormat.INTERNATIONAL).replace( '-', ' ') for match in pn.PhoneNumberMatcher(text, self.__region) ]
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results.append(sourceData) self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) # Make potential phone numbers more friendly to parse content = eventData.replace('.', '-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) return None
def evaluate(self, runner, run, context, text): country = run.org.country # try to find a phone number in the text we have been sent matches = phonenumbers.PhoneNumberMatcher(text, country) # try it as an international number if we failed if not matches.has_next(): matches = phonenumbers.PhoneNumberMatcher('+' + text, country) if matches.has_next(): number = next(matches).number number = phonenumbers.format_number(number, phonenumbers.PhoneNumberFormat.E164) return Test.Result.match(number) else: return Test.Result.NO_MATCH
def handleEvent(self, event): if "sfp_spider" in event.module: eventSource = event.sourceEvent else: eventSource = event eventName = event.eventType srcModuleName = event.module eventData = event.data hashData = self.sf.hashstring(eventData) if hashData in self.results: return None else: self.results.append(hashData) self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) for match in phonenumbers.PhoneNumberMatcher(eventData, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, eventSource) self.notifyListeners(evt) return None
def handleEvent(self, event): eventName = event.eventType srcModuleName = event.module eventData = event.data sourceData = self.sf.hashstring(eventData) if sourceData in self.results: return None else: self.results[sourceData] = True self.sf.debug("Received event, " + eventName + ", from " + srcModuleName) if eventName in ['TARGET_WEB_CONTENT', 'DOMAIN_WHOIS', 'NETBLOCK_WHOIS']: # Make potential phone numbers more friendly to parse content = eventData.replace('.', '-') for match in phonenumbers.PhoneNumberMatcher(content, region=None): n = phonenumbers.format_number(match.number, phonenumbers.PhoneNumberFormat.E164) evt = SpiderFootEvent("PHONE_NUMBER", n, self.__name__, event) if event.moduleDataSource: evt.moduleDataSource = event.moduleDataSource else: evt.moduleDataSource = "Unknown" self.notifyListeners(evt) if eventName == 'PHONE_NUMBER': try: number = phonenumbers.parse(eventData) except BaseException as e: self.sf.debug('Error parsing phone number: ' + str(e)) return None try: number_carrier = carrier.name_for_number(number, 'en') except BaseException as e: self.sf.debug('Error retrieving phone number carrier: ' + str(e)) return None if number_carrier: evt = SpiderFootEvent("PROVIDER_TELCO", number_carrier, self.__name__, event) self.notifyListeners(evt) else: self.sf.debug("No carrier information found for " + eventData) #try: # location = geocoder.description_for_number(number, 'en') #except BaseException as e: # self.sf.debug('Error retrieving phone number location: ' + str(e)) # return None #if location: # evt = SpiderFootEvent("GEOINFO", location, self.__name__, event) # self.notifyListeners(evt) #else: # self.sf.debug("No location information found for " + eventData) return None
def parse_item(self, response): """ * * * * * * * Uses regex to broad scrape the entirety of the HTML for numbers and emails * on the current web page, if any emails or phone numbers exist * they will be passed down the item pipeline for further validation. * * * * * * * @param <Response> response : Scrapy Response object from the newest page * @yield ContactInfo : scrapy Item class with emails, logos, numbers, and url */ """ contact_info = ContactInfo() contact_info['url'] = response.url html_text = str(response.text) potential_numbers = [ pn.format_number(match.number, pn.PhoneNumberFormat.E164) for match in pn.PhoneNumberMatcher(html_text, self.region) ] potential_emails = re.findall( r'([a-zA-Z0-9+._-]+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)', html_text) potential_logos = [] soup = BeautifulSoup(html_text) try: potential_logos = [soup.findAll('img')[0]['src']] except IndexError: pass if response.url not in self.seen_urls and \ (len(potential_numbers) != 0 or len(potential_emails) != 0 or len(potential_logos) != 0): if self.scrape_emails: contact_info['emails'] = potential_emails else: contact_info['emails'] = [] if self.scrape_numbers: contact_info['numbers'] = potential_numbers else: contact_info['numbers'] = [] if self.scrape_logos: contact_info['logos'] = potential_logos else: contact_info['logos'] = [] if not self.max_results or self.total_results < self.max_results: self.total_results += 1 self.seen_urls.add(response.url) yield contact_info logging.info( f"found {self.total_results}/{self.max_results} results") if self.total_results >= self.max_results: raise CloseSpider('Reached max results')
def iter_filth(self, text): # create a copy of text to handle multiple phone numbers correctly for match in phonenumbers.PhoneNumberMatcher(text, self.region): yield PhoneFilth( beg=match.start, end=match.end, text=match.raw_string, )
def verify_phone_number(number: str, region: str): numbers = [ i.raw_string for i in phonenumbers.PhoneNumberMatcher(number, region) ] #assert numbers != [] if not numbers: return messages.PHONE_EMPTY.value return numbers
def extract_phones_from_file(file): phones = [] for line in file: for match in pn.PhoneNumberMatcher(line.strip(), "US"): phones.append(pn.format_number(match.number, pn.PhoneNumberFormat.E164)) return phones
def iter_filth(self, text, document_name: Optional[str] = None): # create a copy of text to handle multiple phone numbers correctly for match in phonenumbers.PhoneNumberMatcher(text, self.region): yield PhoneFilth(beg=match.start, end=match.end, text=match.raw_string, detector_name=self.name, document_name=document_name)
def get_phones(text, country): list_of_phones = list(phonenumbers.PhoneNumberMatcher(text, country)) return list( set([ phonenumbers.format_number(x.number, phonenumbers.PhoneNumberFormat.E164) for x in list_of_phones ]))
def find_all(text): dic = {"dates": [], "numbers": []} # if search_dates(text): # for date, ts in search_dates(text): # dic["dates"].append(date) for match in phonenumbers.PhoneNumberMatcher(text, "RU"): dic["numbers"].append(match.raw_string) return dic
def format(text): diff = 0 for match in phonenumbers.PhoneNumberMatcher(text, 'RU'): num = match.number num.country_code = 1 formatted = phonenumbers.format_number(num, 1) text = text[:match.start + diff] + formatted + text[match.end + diff:] diff += len(formatted) - len(match.raw_string) return text
def replace_phone(text, region): for reg in region: offset = 0 cleaned = "" for match in phonenumbers.PhoneNumberMatcher(text, reg): cleaned += text[offset:match.start] + "{PHONENUMBER}" offset = match.end cleaned += text[offset:] return(cleaned)