def _email_pattern(self, domain, api_key=""): print ''' Score email pattern based on number of occurrences ''' qry = {'where':json.dumps({'domain': domain}),'limit':1000} crawls = Parse().get('CompanyEmailPatternCrawl', qry) crawls = pd.DataFrame(crawls.json()['results']) df = crawls[crawls.pattern.notnull()].drop_duplicates('email') _df = df[df.crawl_source != "mx_check"] df = df[df.crawl_source == "mx_check"].drop_duplicates('pattern') if len(df.pattern) > 2: df = df[df.crawl_source != "mx_check"] df = _df.append(df) df = df.pattern.value_counts() score = pd.DataFrame() score['pattern'], score['freq'] = df.index, df.values score['score'] = [freq / float(score.freq.sum()) for freq in score['freq']] score['source'], score['tried'] = 'clearspark', False score = score.fillna("") score = score.to_dict('records') #print score, api_key print "SCORE" print score score = {'domain':domain, 'company_email_pattern':score} self._find_if_object_exists('EmailPattern','domain', domain, score) # TODO - add date crawled to score if RQueue()._has_completed("{0}_{1}".format(domain, api_key)): if score['company_email_pattern'] == []: score['email_guess'] = EmailGuess()._random() #q.enqueue(Sources()._jigsaw_search, domain) Webhook()._update_company_email_pattern(score)
def _email_pattern_research(): companies = Parse().get("Company", {"order":"-createdAd", "limit":1000}) for company in companies.json()["results"]: if "domain" in company.keys(): domain = company["domain"] api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8" #name = request.args['name'] if "name" in request.args.keys() else "" name = "" q.enqueue(EmailGuess().search_sources, domain, name, api_key, timeout=6000) return {"research":"started"}
def company_list_employee_webhook(self, company_list, qry="", limit=0, prospect_list=""): qry = {"lists": Parse()._pointer("CompanyProspectList", company_list)} rr = Parse().get('CompanyProspect', {'where': json.dumps(qry)}) for company in rr.json()['results']: self.employee_webhook(company['name'], company['user']['objectId'], company['company']['objectId'], qry, limit, prospect_list)
def _daily_collect(self, profile_id): profile = Parse().get("ProspectProfile/"+profile_id, {"include":"profiles"}) _signal = [i["press_id"] for i in profile.json()["profiles"] if i["className"] == "TwitterProfile"] d1, d2 = Helper()._timestamp() qry = {"signal":_signal[0],"timestamp":{"$gte": d1,"$lte": d2}} press = Parse().get("Tweet",{"limit":1000, "skip":0, "count":True, "where": json.dumps(qry), "order":"-timestamp"}).json()["results"] profile = profile.json() report = {"user": profile["user"], "user_company": profile["user_company"]} report["profile"] = Parse()._pointer("ProspectProfile", profile["objectId"]) _report = Parse().create("SignalReport", report).json()["objectId"] _report = Parse()._pointer("SignalReport", _report) cos = pd.DataFrame(press) if cos.empty: return cos = cos[cos.company_name.notnull()].drop_duplicates("company_name") cos["report"] = [_report] * len(cos.index) Parse()._batch_df_create("PeopleSignal", cos) # TODO - Queue ProspectTitle Search if present q.enqueue(PeopleSignal()._check_for_people_signal, cos, profile, _report)
def _daily_collect(self, profile_id): profile = Parse().get("ProspectProfile/" + profile_id, {"include": "profiles"}) _signal = [ i["press_id"] for i in profile.json()["profiles"] if i["className"] == "TwitterProfile" ] d1, d2 = Helper()._timestamp() qry = {"signal": _signal[0], "timestamp": {"$gte": d1, "$lte": d2}} press = Parse().get( "Tweet", { "limit": 1000, "skip": 0, "count": True, "where": json.dumps(qry), "order": "-timestamp" }).json()["results"] profile = profile.json() report = { "user": profile["user"], "user_company": profile["user_company"] } report["profile"] = Parse()._pointer("ProspectProfile", profile["objectId"]) _report = Parse().create("SignalReport", report).json()["objectId"] _report = Parse()._pointer("SignalReport", _report) cos = pd.DataFrame(press) if cos.empty: return cos = cos[cos.company_name.notnull()].drop_duplicates("company_name") cos["report"] = [_report] * len(cos.index) Parse()._batch_df_create("PeopleSignal", cos) # TODO - Queue ProspectTitle Search if present q.enqueue(PeopleSignal()._check_for_people_signal, cos, profile, _report)