def test__prepare_batch_url(self): fc = FullContact('') assert_equal( fc._prepare_batch_url(('person', { 'email': '*****@*****.**' })), 'https://api.fullcontact.com/v2/person.json?email=test%40test.com')
def full_contact_request(email): """ Request fullcontact info based on email """ if (constants.FULLCONTACT_KEY is None): logger.fatal("constants.FULLCONTACT_KEY is not set.") return logger.info('Looking up %s', email) fc = FullContact(constants.FULLCONTACT_KEY) r = fc.person(email=email) MIN_RETRY_SECS = 10 MAX_RETRY_SECS = 600 code = int(r.status_code) if (code == 200) or (code == 404): # Success or not found # (We log "not found" results in db too, so that we know # we tried and can move on to next email.) contact_json = r.json() fc_row = db_models.FullContact() fc_row.email = email fc_row.fullcontact_response = contact_json if 'socialProfiles' in contact_json: profiles = contact_json['socialProfiles'] for profile in profiles: if 'typeId' in profile and 'username' in profile: network = profile['typeId'] username = profile['username'] if network == 'angellist': fc_row.angellist_handle = username if network == 'github': fc_row.github_handle = username if network == 'twitter': fc_row.twitter_handle = username try: db.session.add(fc_row) db.session.commit() logger.info('Email %s recorded to fullcontact', email) except IntegrityError as e: logger.warning( "Email %s has already been entered in FullContact table.", email) elif code == 403: # Key fail logger.fatal("constants.FULLCONTACT_KEY is not set or is invalid.") elif code == 202: # We're requesting too quickly, randomly back off delay = randint(MIN_RETRY_SECS, MAX_RETRY_SECS) logger.warning( "Throttled by FullContact. Retrying after random delay of %d" % delay) full_contact_request.retry(countdown=delay) else: logger.fatal("FullContact request %s with status code %s", email, r.status_code) logger.fatal(r.json())
def test_invalid_api_keys(self): fc = FullContact('test_key') r = fc.person(email='*****@*****.**') assert_equal(r.status_code, 403) test_batch = [ ('person', {'email': '*****@*****.**'}), ('person', {'name': 'Bob Smith'}) ] r = fc.api_batch(test_batch) assert_equal(r.status_code, 403)
def test_invalid_api_keys(self): fc = FullContact('') r = fc.api_get('person', **{'email': '*****@*****.**'}) assert_equal(r.status_code, 403) test_batch = [('person', { 'email': '*****@*****.**' }), ('person', { 'name': 'Bob Smith' })] r = fc.api_batch(test_batch) assert_equal(r.status_code, 403)
def check_fullcontact(self, email, password, interactive_flag=False, elastic=False): print("---" + Fore.CYAN + "FullContact" + Fore.RESET + "---") fc = FullContact(conf['keys']['fullcontact']) person = fc.person(email=email) decoded_person_json = person.content.decode("utf-8") person_json = json.loads(decoded_person_json) social_to_push = [] to_elastic = {"email": email, "password": password} try: if person_json['status'] == 200: if 'contactInfo' in person_json: if 'fullName' in person_json['contactInfo']: print(person_json['contactInfo']['fullName'] ) if 'socialProfiles' in person_json: for social in person_json['socialProfiles']: social_to_push.append(social['url']) print(social['url']) if 'demographics' in person_json: if 'locationGeneral' in person_json['demographics']: print(person_json['demographics']['locationGeneral']) to_elastic.update(person_json) if elastic: self.put_elastic('fullcontact', 'email', to_elastic) elif person_json['status'] == 202: if interactive_flag: time_dec = input("Your search is queued, do you want to wait for 2 minutes? [Y/N] \n> ") if time_dec == "Y": print("Sleeping...") time.sleep(60 * 2) self.check_fullcontact(email, elastic) else: pass else: print("No results") except Exception as e: print(Fore.RED + str(e) + Fore.RESET) if len(social_to_push) > 0 and interactive_flag: return social_to_push else: return False
class FullContact_Client: def __init__(self): self.fc = FullContact('ab76dbb1c4b8c50f') def searchbyemail(self, email): return self.fc.get(email=email)
def _email_search(self, email, api_key=""): try: person = clearbit.Person.find(email=email, stream=True) except: person = None data = {"pattern":None, "name":None, "email":email, "domain":email.split("@")[-1], "crawl_source":"email_hunter"} if person: pattern = EmailGuessHelper()._find_email_pattern(person["name"]["fullName"], email) if pattern: data = {"pattern":pattern, "name":person["name"]["fullName"], "email":email, "domain":email.split("@")[-1], "crawl_source":"email_hunter"} elif not person or not pattern: person = FullContact()._person_from_email(email) print person try: person = person["contactInfo"]["fullName"] fullcontact_person = True except: fullcontact_person = False if fullcontact_person: person = person["contactInfo"]["fullName"] pattern = EmailGuessHelper()._find_email_pattern(person, email) data = {"pattern":pattern, "name":person, "email":email, "domain":email.split("@")[-1], "crawl_source":"email_hunter"} print pattern else: _email = email.replace(".", " ").replace("-", " ").replace("_"," ") _email = _email.replace("@", " ") g = Google().search("{0} site:linkedin.com/pub".format(_email)) g1 = Google().search("{0} site:linkedin.com/pub".format(_email.split(" "[0]))) g2 = Google().search("{0} site:linkedin.com/pub".format(_email).split(" ")[-1]) g = pd.concat([g, g1, g2]) choices = [i.split(" |")[0] for i in g.link_text] person = process.extract(_email, choices, limit=1) try: person = person[0][0] except: ''' ''' pattern = EmailGuessHelper()._find_email_pattern(person, email) print "google search pattern", pattern if pattern: data = {"pattern":pattern, "name":person, "email":email, "domain":email.split("@")[-1], "crawl_source":"email_hunter"} else: data = {"pattern":None, "name":None, "email":email, "domain":email.split("@")[-1], "crawl_source":"email_hunter"} #data = pd.DataFrame([data]) conn = r.connect(host="localhost", port=28015, db="triggeriq") r.table('email_pattern_crawls').insert(data).run(conn) #CompanyEmailPatternCrawl()._persist(data, "emailhunter", api_key) # persist to rethinkdb print "person", person
def get_identity(config, email): fc = FullContact(config['FC_KEY']) db = divan.Database(config['DB_URI'], config['DB_NAME'], auth=(config['DB_USER'], config['DB_PASS'])) try: res = db.get_or_create() assert res.status_code in [200, 201] except AssertionError: return res.status_code, res.json() else: res = db.get(email) if res.status_code == 200: return res.status_code, res.json() else: profile = fc.get(email=email) if profile['status'] == 200: profile['_id'] = email res = db.post(params=profile) return res.status_code, profile else: return profile['status'], profile
def _whois_search(self, domain): # TODO - fix this try: results = pythonwhois.get_whois(domain) emails = pythonwhois.get_whois(domain) except: return pd.DataFrame() emails = filter(None, results['contacts'].values()) emails = pd.DataFrame(emails) emails['domain'] = domain for index, row in emails.iterrows(): name = FullContact()._normalize_name(row['name']) email = row.email.strip() pattern = EmailGuessHelper()._find_email_pattern(name, row.email) emails.ix[index, 'pattern'] = pattern CompanyEmailPatternCrawl()._persist(emails, "whois_search")
def _research_emails(self, emails): _emails = pd.DataFrame() for email in emails: # if -, ., _ | clean emails full_name = FullContact()._person_from_email(email) print email, full_name if type(full_name) is str: continue full_name = full_name['contactInfo']['fullName'] person = EmailGuessHelper()._name_to_email_variables(full_name) person['domain'] = email.split('@')[-1] for pattern in EmailGuessHelper()._patterns(): _email = pystache.render(pattern, person) if email.lower() == _email.lower(): person['pattern'], person['email'] = pattern, email _emails = _emails.append(person, ignore_index=True) return _emails
def run(self, conf, args, plugins): fc = FullContact(conf['FullContact']['key']) if args.twitter: res = fc.person(twitter=args.twitter) print(json.dumps(res.json(), sort_keys=True, indent=4)) elif args.email: res = fc.person(email=args.email) print(json.dumps(res.json(), sort_keys=True, indent=4)) elif args.phone: res = fc.person(phone=args.phone) print(json.dumps(res.json(), sort_keys=True, indent=4)) elif args.md5: res = fc.person(emailMD5=args.md5) print(json.dumps(res.json(), sort_keys=True, indent=4)) elif args.domain: res = fc.person(domain=args.domain) print(json.dumps(res.json(), sort_keys=True, indent=4)) else: self.parser.print_help()
def _zoominfo_search(self, domain): qry = 'site:zoominfo.com/p/ "@{0}"'.format(domain) queue = "zoominfo-check-" + domain test = Google().search(qry, 5) res = [[word.lower() for word in link.split() if "@" in word] for link in test[test.link_span.str.contains('@')].link_span] test.ix[test.link_span.str.contains('@'), 'email'] = res test = test[test.email.notnull()] test['name'] = [link.split('|')[0].strip() for link in test.link_text] emails = test emails['domain'] = domain patterns = [] for index, row in emails.iterrows(): name = FullContact()._normalize_name(row['name']).strip() print row.email email = row.email.strip() if email[-1] is ".": email = email[:-1] pattern = EmailGuessHelper()._find_email_pattern(name, email) patterns.append(pattern) emails['pattern'] = patterns CompanyEmailPatternCrawl()._persist(emails, "zoominfo_search")
def test__prepare_batch_url(self): fc = FullContact('test_key') assert_equal( fc._prepare_batch_url(('person', {'email': '*****@*****.**'})), 'https://api.fullcontact.com/v2/person.json?email=test%40test.com' )
from fullcontact import FullContact import json fc = FullContact('your_api_key') user_id = input('Please enter user email-id: ') r = fc.person(email=user_id) #data = json.load(r.json()) data = r.json() #print(data['contactInfo']) if data['status'] == 200: print('Name: ' + data['contactInfo']['fullName']) print('Location: ' + data['demographics']['locationDeduced']['deducedLocation']) else: print('Data unavailable right now.')
from fullcontact import FullContact from config import fullcontact_api from person import perDetail import json fc = FullContact(fullcontact_api) def fetchData(email_id): person = fc.person(email=email_id) data = person.json() try: print "\n-------------------------------------------------\n" print "[+] Gathering Personal Details from [FullContact]\n" print "Full Name: ----> " + data['contactInfo']['fullName'] print "Gender: -------> " + str(data['demographics']['gender']) print "State: --------> " + str( data['demographics']['locationDeduced']['state']['name']) print "Country: ------> " + str( data['demographics']['locationDeduced']['country']['name']) for u in data['contactInfo']['websites']: print "Website: ------> " + u['url'] except: print "Unavailable" try: print "\n\n[+] Gathering Employment Details from [FullContact]\n" for org in data['organizations']: print "Organisation Name: " + org[ 'name'] + " " + "\nJob Title: " + " " + org[ 'title'] + " " + "\nStart date: " + " " + org[
def hello(): first_name = request.form["firstName"] last_name = request.form["lastName"] company_url = request.form["companyUrl"] source = "Ad Hoc" api_key = "ddb2740f8d2338c78497519c13cc7076" params = {"key": api_key, "domain": company_url, "first": first_name, "last": last_name} toofr_url = "http://toofr.com/api/guess?" test = requests.get(toofr_url, params=params) toofr_data = test.json() try: toofr_email = toofr_data["response"]["email"] except: toofr_email = None try: toofr_confidence = toofr_data["response"]["confidence"] except: toofr_confidence = None toofr_data = pd.DataFrame( columns=("toofr_email", "first_name", "last_name", "toofr_confidence", "company_url", "source") ) existing_records_final_links = len(toofr_data) toofr_data.loc[existing_records_final_links] = [ toofr_email, first_name, last_name, toofr_confidence, company_url, source, ] toofr_data.to_sql("investor_toofr_data", con=conn, flavor="mysql", if_exists="append", index=False) query = """ select td.id,td.toofr_email as email from investor_toofr_data td having td.id = (select max(id) from investor_toofr_data);""" toofr_data = psql.read_frame(query, conn) toofr_data_dict = {} toofr_data_dict = toofr_data.set_index("id").to_dict() toofr_data_dict = toofr_data_dict["email"] for key, value in toofr_data_dict.items(): fc = FullContact("76152464a239f71c") print key, value person_profile = fc.get(email=value) if person_profile["status"] == 200: rep_gender = None rep_location = None rep_klout_score = None rep_klout_topic = None rep_facebook_url = None rep_facebook_followers = None rep_facebook_following = None rep_linkedin_url = None rep_twitter_url = None rep_twitter_followers = None rep_twitter_following = None rep_angellist_url = None rep_angellist_followers = None try: rep_gender = person_profile["demographics"]["gender"] except: print "gender_missing" try: rep_location = person_profile["demographics"]["locationGeneral"] except: print "location_missing" try: rep_klout_score = person_profile["digitalFootprint"]["scores"][0]["value"] except: print "klout score missing" try: rep_klout_topic = person_profile["digitalFootprint"]["topics"][0]["value"] except: print "klout topic missing" try: rep_social_profiles = person_profile["socialProfiles"] if len(rep_social_profiles) > 0: for i in xrange(0, len(rep_social_profiles)): if rep_social_profiles[i]["typeName"] == "Facebook": try: rep_facebook_url = rep_social_profiles[i]["url"] except: print "facebook url missing" try: rep_facebook_followers = rep_social_profiles[i]["followers"] except: print "facebook followers missing" try: rep_facebook_following = rep_social_profiles[i]["following"] except: print "facebook following missing" if rep_social_profiles[i]["typeName"] == "LinkedIn": try: rep_linkedin_url = rep_social_profiles[i]["url"] except: print "linkedin url missing" if rep_social_profiles[i]["typeName"] == "Twitter": try: rep_twitter_url = rep_social_profiles[i]["url"] except: print "twitter url missing" try: rep_twitter_followers = rep_social_profiles[i]["followers"] except: print "twitter followers missing" try: rep_twitter_following = rep_social_profiles[i]["following"] except: print "twitter following missing" if rep_social_profiles[i]["typeName"] == "AngelList": try: rep_angellist_url = rep_social_profiles[i]["url"] except: print "angel list url missing" try: rep_angellist_followers = rep_social_profiles[i]["followers"] except: print "angel list followers missing" except: print "no social profile found" data = pd.DataFrame( columns=( "toofr_id", "rep_gender", "rep_location", "rep_klout_score", "rep_klout_topic", "rep_facebook_url", "rep_facebook_followers", "rep_facebook_following", "rep_linkedin_url", "rep_twitter_url", "rep_twitter_followers", "rep_twitter_following", "rep_angellist_url", "rep_angellist_followers", ) ) existing_records_final_links = len(data) data.loc[existing_records_final_links] = [ key, rep_gender, rep_location, rep_klout_score, rep_klout_topic, rep_facebook_url, rep_facebook_followers, rep_facebook_following, rep_linkedin_url, rep_twitter_url, rep_twitter_followers, rep_twitter_following, rep_angellist_url, rep_angellist_followers, ] data = data.where(pd.notnull(data), None) data.to_sql("fullcontact", con=conn, flavor="mysql", if_exists="append", index=False) return render_template("form_action.html", firstName=first_name, lastName=last_name)
def test_adds_endpoint_methods(self): fc = FullContact('') for endpoint in fc.get_endpoints: assert_true(isinstance(getattr(fc, endpoint), FunctionType))
def __init__(self): super().__init__() self.fc = FullContact(self.THE_KEY)
def __init__(self): self.fc = FullContact('ab76dbb1c4b8c50f')
from fullcontact import FullContact import json from attest import Tests, assert_hook fc_tests = Tests() api_key = raw_input("Please enter an API key for FullContact: ") fc = FullContact(api_key) test_email = "*****@*****.**" test_twitter = "garbados" @fc_tests.test def bad_key(): test = fc.get(email=test_email, apiKey='this is a bad api key') assert test['status'] == 403 @fc_tests.test def bad_params(): test = fc.get() assert test['status'] == 422 @fc_tests.test def good_param(): test = fc.get(email=test_email) assert test['status'] == 200 @fc_tests.test def many_params():
def hello(): first_name = request.form['firstName'] last_name = request.form['lastName'] company_url = request.form['companyUrl'] source = "Ad Hoc" api_key = "ddb2740f8d2338c78497519c13cc7076" params = { 'key': api_key, 'domain': company_url, 'first': first_name, 'last': last_name } toofr_url = "http://toofr.com/api/guess?" test = requests.get(toofr_url, params=params) toofr_data = test.json() try: toofr_email = toofr_data['response']['email'] except: toofr_email = None try: toofr_confidence = toofr_data['response']['confidence'] except: toofr_confidence = None toofr_data = pd.DataFrame(columns=('toofr_email', 'first_name', 'last_name', 'toofr_confidence', 'company_url', 'source')) existing_records_final_links = len(toofr_data) toofr_data.loc[existing_records_final_links] = [ toofr_email, first_name, last_name, toofr_confidence, company_url, source ] toofr_data.to_sql("investor_toofr_data", con=conn, flavor='mysql', if_exists='append', index=False) query = ''' select td.id,td.toofr_email as email from investor_toofr_data td having td.id = (select max(id) from investor_toofr_data);''' toofr_data = psql.read_frame(query, conn) toofr_data_dict = {} toofr_data_dict = toofr_data.set_index('id').to_dict() toofr_data_dict = toofr_data_dict['email'] for key, value in toofr_data_dict.items(): fc = FullContact('76152464a239f71c') print key, value person_profile = fc.get(email=value) if person_profile['status'] == 200: rep_gender = None rep_location = None rep_klout_score = None rep_klout_topic = None rep_facebook_url = None rep_facebook_followers = None rep_facebook_following = None rep_linkedin_url = None rep_twitter_url = None rep_twitter_followers = None rep_twitter_following = None rep_angellist_url = None rep_angellist_followers = None try: rep_gender = person_profile['demographics']['gender'] except: print 'gender_missing' try: rep_location = person_profile['demographics'][ 'locationGeneral'] except: print 'location_missing' try: rep_klout_score = person_profile['digitalFootprint']['scores'][ 0]['value'] except: print 'klout score missing' try: rep_klout_topic = person_profile['digitalFootprint']['topics'][ 0]['value'] except: print 'klout topic missing' try: rep_social_profiles = person_profile['socialProfiles'] if len(rep_social_profiles) > 0: for i in xrange(0, len(rep_social_profiles)): if rep_social_profiles[i]['typeName'] == 'Facebook': try: rep_facebook_url = rep_social_profiles[i][ 'url'] except: print 'facebook url missing' try: rep_facebook_followers = rep_social_profiles[ i]['followers'] except: print 'facebook followers missing' try: rep_facebook_following = rep_social_profiles[ i]['following'] except: print 'facebook following missing' if rep_social_profiles[i]['typeName'] == 'LinkedIn': try: rep_linkedin_url = rep_social_profiles[i][ 'url'] except: print 'linkedin url missing' if rep_social_profiles[i]['typeName'] == 'Twitter': try: rep_twitter_url = rep_social_profiles[i]['url'] except: print 'twitter url missing' try: rep_twitter_followers = rep_social_profiles[i][ 'followers'] except: print 'twitter followers missing' try: rep_twitter_following = rep_social_profiles[i][ 'following'] except: print 'twitter following missing' if rep_social_profiles[i]['typeName'] == 'AngelList': try: rep_angellist_url = rep_social_profiles[i][ 'url'] except: print 'angel list url missing' try: rep_angellist_followers = rep_social_profiles[ i]['followers'] except: print 'angel list followers missing' except: print 'no social profile found' data = pd.DataFrame( columns=('toofr_id', 'rep_gender', 'rep_location', 'rep_klout_score', 'rep_klout_topic', 'rep_facebook_url', 'rep_facebook_followers', 'rep_facebook_following', 'rep_linkedin_url', 'rep_twitter_url', 'rep_twitter_followers', 'rep_twitter_following', 'rep_angellist_url', 'rep_angellist_followers')) existing_records_final_links = len(data) data.loc[existing_records_final_links] = [ key, rep_gender, rep_location, rep_klout_score, rep_klout_topic, rep_facebook_url, rep_facebook_followers, rep_facebook_following, rep_linkedin_url, rep_twitter_url, rep_twitter_followers, rep_twitter_following, rep_angellist_url, rep_angellist_followers ] data = data.where(pd.notnull(data), None) data.to_sql('fullcontact', con=conn, flavor='mysql', if_exists='append', index=False) return render_template('form_action.html', firstName=first_name, lastName=last_name)
def test_init(self): fc = FullContact('') assert_equal(fc.api_key, '')
class FullContactEngager(Engager): #AYDRATE_KEY = "13edcff433f0c479" # Aydrate #ACURATE_TRIAL_KEY = "2e7c73db16e677f8" # AcureRate - Trial ACURATE_PRODUCTION_KEY = "401739667f580b02" # AcureRate - Production THE_KEY = ACURATE_PRODUCTION_KEY # Correct Key def __init__(self): super().__init__() self.fc = FullContact(self.THE_KEY) def __str__(self): return 'FullContact Engager' def __repr__(self): return 'FullContact Engager' def get_provider_name(self): return 'FullContact' def get_short_symbol(self): return 'fc' def get_api_key(self): return FullContactEngager.THE_KEY def set_enrich_key(self): t = self.enriched_entity.__class__.__name__ if t == 'AcureRatePerson': email = self.get_pivot_email() if email is None: raise EngagementException( "FullContacts - cannot engage. No email available as enrich key" ) self.enrich_key = email elif t == 'AcureRateCompany': if C.DOMAIN not in self.enriched_entity.deduced: raise EngagementException( "FullContacts - cannot engage - no domain property to use as key" ) self.enrich_key = self.enriched_entity.deduced.get(C.DOMAIN) else: raise EngagementException( "FullContacts - cannot engage - cannot generate enrich key. Unknown entity type" ) def enrich_person(self): result_obj = self._get_person_info() self.set_data("score", result_obj['likelihood']) contact_info = result_obj.get('contactInfo', None) if contact_info: if 'givenName' in contact_info: self.set_data(P.FIRST_NAME, contact_info['givenName']) if 'familyName' in contact_info: self.set_data(P.LAST_NAME, contact_info['familyName']) demographics = result_obj.get('demographics', None) if demographics: gender = demographics.get('gender', None) if gender: self.add_data(P.GENDER, gender.lower()) loc = demographics.get('locationGeneral', None) if loc: self.add_data(P.LOCATIONS, loc) photos = result_obj.get('photos', None) if photos: for photo in photos: new_photo = {} m = {"url": P.PHOTO_URL, "typeName": P.PHOTO_SOURCE} AcureRateUtils.dict2dict(photo, new_photo, m) self.add_data(P.PHOTOS, new_photo) organizations = result_obj.get('organizations', None) if organizations: for org in organizations: new_job = {} m = { "name": P.JOB_NAME, "title": P.JOB_TITLE, "current": P.JOB_CURRENT, "isPrimary": P.JOB_PRIMARY } AcureRateUtils.dict2dict(org, new_job, m) # If there are start/end dates, grab them (year only - drop the month) if 'startDate' in org: new_job[P.JOB_STARTED] = org['startDate'][0:4] if 'endDate' in org: new_job[P.JOB_ENDED] = org['endDate'][0:4] self.add_data(P.JOBS, new_job) social_profiles = result_obj.get('socialProfiles', None) if social_profiles: for social_profile in social_profiles: if social_profile.get('typeName', '') == 'Twitter': self.set_data(P.TWITTER_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'LinkedIn': self.set_data(P.LINKEDIN_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'GooglePlus': self.set_data(P.GOOGLEPLUS_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'Facebook': self.set_data(P.FACEBOOK_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'Gravatar': self.set_data(P.GRAVATAR_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'Foursquare': self.set_data(P.FOURSQUARE_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'Pinterest': self.set_data(P.PINTEREST_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'Klout': self.set_data(P.KLOUT_URL, social_profile['url']) elif social_profile.get('typeName', '') == 'AngelList': self.set_data(P.ANGELLIST_URL, social_profile['url']) else: print('Something else...') # TODO: add all other attributes received from FullContact return [P.JOBS] def enrich_company(self): domain = self.enriched_entity.deduced.get(C.DOMAIN, None) if domain is None: return [] result_obj = self._get_company_info(domain) # Keep the logo url and website if 'logo' in result_obj: self.add_data(C.LOGOS, { C.LOGO_URL: result_obj['logo'], C.LOGO_SOURCE: 'fullcontact' }) if 'website' in result_obj: self.set_data(C.WEBSITE, result_obj['website']) # Keep the founding year if 'founded' in result_obj['organization']: self.set_data(C.FOUNDING_YEAR, result_obj['organization']['founded']) # Approximate Employees if 'approxEmployees' in result_obj['organization']: self.set_data(C.EMPLOYEES_NUMBER, result_obj['organization']['approxEmployees']) # Keep keywords if 'keywords' in result_obj['organization']: self.set_data(C.KEYWORDS, result_obj['organization']['keywords']) # Keep name if 'name' in result_obj['organization']: self.set_data(C.NAME, result_obj['organization']['name']) # Keep social profiles URL # TODO: keep other social profiles... for profile in result_obj.get('socialProfiles', []): if profile['typeId'] == 'crunchbasecompany': self.set_data(C.CRUNCHBASE_URL, profile['url']) return [C.NAME] def _handle_fc_api_errors(self, response): if response.status_code == 200: # All is ok. return # Handle different errors. Documentation - https://www.fullcontact.com/developer/docs/ if response.status_code == 403: # Quota exceeded - need special treatment raise EngagementException("403. Quota Exceeded.", True) elif response.status_code == 405 or response.status_code == 410 or response.status_code == 422: raise EngagementException( "%s. Invalid request sent to FC %s" % (response.status_code, response.text), True) elif response.status_code == 404: raise EngagementException( "404. Searched in the past 24 hours and nothing was found: %s" % response.text) elif response.status_code == 500 or response.status_code == 503: raise EngagementException( "%s. Transient errors in FC server. Possible maintenance/downtime. %s" % (response.status_code, response.text), True) elif response.status_code == 202: # being processed... raise EngagementException( "202. Did not get info. Request is being processed. Return later." ) else: raise EngagementException( "%s. Unknown error: %s" % (response.status_code, response.text), True) def _get_person_info(self): try: response = self.fc.api_get('person', **{'email': self.enrich_key}) if hasattr(response, 'from_cache'): self.set_data("from_cache", response.from_cache) self._handle_fc_api_errors(response) # TODO: check if we can inspect the header and see our limit remaining... #r.headers['x-rate-limit-remaining'] except EngagementException as e: raise e except Exception as e: raise EngagementException(e, True) json = response.json() return json def _get_company_info(self, domain): try: response = self.fc.api_get('company', **{'domain': domain}) if hasattr(response, 'from_cache'): self.set_data("from_cache", response.from_cache) self._handle_fc_api_errors(response) except EngagementException as e: raise e except Exception as e: raise EngagementException(e, True) json = response.json() return json
from fullcontact import FullContact from config import * import json fc=FullContact(fullcontact_api_key) def fetchData(email_id): person = fc.person(email=email_id) data = person.json() try: print "Personal Information :: " print "Full Name :: "+ data['contactInfo']['fullName'] print "Given Name :: "+ data['contactInfo']['givenName'] print "Gender :: "+ str(data['demographics']['gender']) print "Website :: "+ str(data['contactInfo']['websites']) print "Full Address :: "+ str(data['demographics']['locationDeduced']['normalizedLocation']) print "City :: "+ str(data['demographics']['locationDeduced']['city']['name']) print "State :: "+ str(data['demographics']['locationDeduced']['state']['name']) print "Country :: "+ str(data['demographics']['locationDeduced']['country']['name']) print "\n" except: print "Unavailable" try: print "Employment Detail :: " for org in data['organizations']: print "Organization::"+org['name']+" "+"Start date::"+" "+org['startDate']+" "+"Job Title::"+" "+org['title'] except: print " "
from fullcontact import FullContact import urllib.request, json import pyodbc server = 'dbserveranasight.database.windows.net' database = 'warehouse' username = '******' password = '******' cnxn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+';DATABASE='+database+';UID='+username+';PWD='+ password) cursor = cnxn.cursor() fc = FullContact('0DHVXQOcAgZzZbUPTeTQ83AakHlzIE8L') APIKey='0DHVXQOcAgZzZbUPTeTQ83AakHlzIE8L' app = Flask(__name__) #************ Get Contact details By Email ******* @app.route("/person_enrich", methods=['POST','GET']) def person_enrich(): if request.method == 'POST': jsonData = request.get_json(force=True) email1 =jsonData['email'] if email1 == "" : return jsonify({'type': 'validation','message': 'Email is required','status': 0}) sys.exit() if email1 !="": headers = {
def _company_info(self, company_name, api_key=""): #TODO - company_name = self._remove_non_ascii(company_name) add to save qry = { 'where': json.dumps({'company_name': company_name}), 'limit': 1000 } qry['order'] = '-createdAt' crawls = Parse().get('CompanyInfoCrawl', qry).json()['results'] if not crawls: # start crawls return company_name crawls = self._source_score(pd.DataFrame(crawls)) crawls = self._logo_score(crawls) #crawls = crawls[crawls.api_key == api_key] crawls['name_score'] = [ fuzz.token_sort_ratio(row['name'], row.company_name) for index, row in crawls.iterrows() ] crawls = crawls[crawls.name_score > 70].append( crawls[crawls.name.isnull()]) logo = crawls.sort("logo_score", ascending=False) #logo=logo[(logo.logo != "") & (logo.logo.notnull())][["source","logo"]] logo = logo[(logo.logo != "") & (logo.logo.notnull())].logo.tolist() logo = logo[0] if logo else "" #crawls = crawls[["press", 'source_score', 'source', 'createdAt', 'domain']] final = {} #print crawls.press.dropna() for col in crawls.columns: if col in ['source_score', 'source', 'createdAt']: continue df = crawls[[col, 'source_score', 'source', 'createdAt']] if df[col].dropna().empty: continue if type(list(df[col].dropna())[0]) == list: df[col] = df[col].dropna().apply(tuple) try: df = df[df[col] != ""] except: "lol" try: df = df[df[col].notnull()] df = [ source[1].sort('createdAt').drop_duplicates(col, True) for source in df.groupby(col) ] df = [_df for _df in df if _df is not None] df = [pd.DataFrame( columns=['source_score', col])] if len(df) is 0 else df df = pd.concat(df).sort('source_score')[col] if list(df): final[col] = list(df)[-1] except: "lol" if 'industry' in final.keys(): try: final['industry'] = final['industry'][0] except: final["industry"] = "" try: final['industry_keywords'] = list( set(crawls.industry.dropna().sum())) except: final['industry_keywords'] = [] if 'address' in final.keys(): final['address'] = FullContact()._normalize_location( final['address']) try: final['handles'] = crawls[['source', 'handle']].dropna() final['handles'] = final['handles'].drop_duplicates().to_dict('r') except: "lol" tmp = crawls[['source', 'logo']].dropna() #print tmp #print "THE LOGO", logo final["logo"] = logo final['logos'] = tmp.drop_duplicates().to_dict('r') try: tmp = crawls[['source', 'phone']].dropna() final['phones'] = tmp.drop_duplicates().to_dict('r') except: """ """ # TODO - if company_name exists update # TODO - find if domain exists under different company_name then update final = self._prettify_fields(final) if "name_score" in final.keys(): del final["name_score"] #print json.dumps(final) self._add_to_clearspark_db('Company', 'company_name', company_name, final) # TODO - find main domain from domain -> ie canon.ca should be canon.com # clean data - ie titleify fields, and lowercase domain # TODO - start a domain search with the deduced domain and the company_name #print "RQUEUE CHECK" if "domain" in final.keys(): domain = final["domain"] ''' if len(RQueue()._results("{0}_{1}".format(company_name, api_key))) == 1: q.enqueue(Companies()._domain_research, domain, api_key, company_name) q.enqueue(Companies()._secondary_research, company_name, domain, api_key) ''' if RQueue()._has_completed("{0}_{1}".format(company_name, api_key)): #q.enqueue(Companies()._domain_research, domain, api_key, company_name) #q.enqueue(Companies()._secondary_research, company_name, domain, api_key) print "WEBHOOK <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<" if "company_name" in final.keys(): Webhook()._update_company_info(final) ''' job = q.enqueue(EmailGuess().search_sources, final["domain"],api_key,"") job.meta["{0}_{1}".format(company_name, api_key)] = True job.save() for domain in crawls.domain.dropna().drop_duplicates(): job = q.enqueue(EmailGuess().search_sources, domain, api_key, "") RQueue()._meta(job, "{0}_{1}".format(company_name, api_key)) ''' return final