Esempio n. 1
0
    def _email_pattern(self, domain, api_key=""):
        print ''' Score email pattern based on number of occurrences '''
        qry = {'where':json.dumps({'domain': domain}),'limit':1000}
        crawls = Parse().get('CompanyEmailPatternCrawl', qry)
        crawls = pd.DataFrame(crawls.json()['results'])

        df = crawls[crawls.pattern.notnull()].drop_duplicates('email')
        _df = df[df.crawl_source != "mx_check"]
        df = df[df.crawl_source == "mx_check"].drop_duplicates('pattern')
        if len(df.pattern) > 2: df = df[df.crawl_source != "mx_check"]
        df = _df.append(df)
        df = df.pattern.value_counts()

        score = pd.DataFrame()
        score['pattern'], score['freq'] = df.index, df.values
        score['score'] = [freq / float(score.freq.sum()) for freq in score['freq']]
        score['source'], score['tried'] = 'clearspark', False
        score = score.fillna("")
        score = score.to_dict('records')
        #print score, api_key
        print "SCORE"
        print score
        score = {'domain':domain, 'company_email_pattern':score}
        self._find_if_object_exists('EmailPattern','domain', domain, score)

        # TODO - add date crawled to score
        if RQueue()._has_completed("{0}_{1}".format(domain, api_key)):
            if score['company_email_pattern'] == []:
                score['email_guess'] = EmailGuess()._random()
                #q.enqueue(Sources()._jigsaw_search, domain)
            Webhook()._update_company_email_pattern(score)
Esempio n. 2
0
def _email_pattern_research():
    companies = Parse().get("Company", {"order":"-createdAd", "limit":1000}) 
    for company in companies.json()["results"]:
        if "domain" in company.keys():
            domain = company["domain"]
            api_key = "9a31a1defcdc87a618e12970435fd44741d7b88794f7396cbec486b8"
            #name = request.args['name'] if "name" in request.args.keys() else ""
            name = ""
            q.enqueue(EmailGuess().search_sources, domain, name, api_key, timeout=6000)
    return {"research":"started"}
Esempio n. 3
0
 def company_list_employee_webhook(self,
                                   company_list,
                                   qry="",
                                   limit=0,
                                   prospect_list=""):
     qry = {"lists": Parse()._pointer("CompanyProspectList", company_list)}
     rr = Parse().get('CompanyProspect', {'where': json.dumps(qry)})
     for company in rr.json()['results']:
         self.employee_webhook(company['name'], company['user']['objectId'],
                               company['company']['objectId'], qry, limit,
                               prospect_list)
Esempio n. 4
0
    def _daily_collect(self, profile_id):
        profile = Parse().get("ProspectProfile/"+profile_id, {"include":"profiles"})
        _signal = [i["press_id"] for i in profile.json()["profiles"]
                   if i["className"] == "TwitterProfile"]
        d1, d2 = Helper()._timestamp()
        qry = {"signal":_signal[0],"timestamp":{"$gte": d1,"$lte": d2}}
        press = Parse().get("Tweet",{"limit":1000, "skip":0, "count":True,
                                     "where": json.dumps(qry),
                                     "order":"-timestamp"}).json()["results"]

        profile  = profile.json()
        report = {"user": profile["user"], "user_company": profile["user_company"]}
        report["profile"] = Parse()._pointer("ProspectProfile", profile["objectId"])
        _report = Parse().create("SignalReport", report).json()["objectId"]
        _report = Parse()._pointer("SignalReport", _report)

        cos = pd.DataFrame(press)
        if cos.empty: return
        cos = cos[cos.company_name.notnull()].drop_duplicates("company_name")
        cos["report"]  = [_report] * len(cos.index)
        Parse()._batch_df_create("PeopleSignal", cos)
        # TODO - Queue ProspectTitle Search if present
        q.enqueue(PeopleSignal()._check_for_people_signal, cos,  profile, _report)
    def _daily_collect(self, profile_id):
        profile = Parse().get("ProspectProfile/" + profile_id,
                              {"include": "profiles"})
        _signal = [
            i["press_id"] for i in profile.json()["profiles"]
            if i["className"] == "TwitterProfile"
        ]
        d1, d2 = Helper()._timestamp()
        qry = {"signal": _signal[0], "timestamp": {"$gte": d1, "$lte": d2}}
        press = Parse().get(
            "Tweet", {
                "limit": 1000,
                "skip": 0,
                "count": True,
                "where": json.dumps(qry),
                "order": "-timestamp"
            }).json()["results"]

        profile = profile.json()
        report = {
            "user": profile["user"],
            "user_company": profile["user_company"]
        }
        report["profile"] = Parse()._pointer("ProspectProfile",
                                             profile["objectId"])
        _report = Parse().create("SignalReport", report).json()["objectId"]
        _report = Parse()._pointer("SignalReport", _report)

        cos = pd.DataFrame(press)
        if cos.empty: return
        cos = cos[cos.company_name.notnull()].drop_duplicates("company_name")
        cos["report"] = [_report] * len(cos.index)
        Parse()._batch_df_create("PeopleSignal", cos)
        # TODO - Queue ProspectTitle Search if present
        q.enqueue(PeopleSignal()._check_for_people_signal, cos, profile,
                  _report)