def start(self, **kwargs): only_campaign= kwargs.get('campaign', None) regenerate_all = kwargs.get('regenerate', False) while True: end = self.getCurrentSummarizationEnd() for account in MongoManager.getActiveAccounts(max_age=timedelta(hours=1)): for campaign in account.getActiveCampaigns(): MongoManager.ensureIndex('summarized_tweets_%s' % campaign.getId(), [("start", 1)]) if only_campaign and only_campaign.getId() != campaign.getId(): continue if regenerate_all: self.clearSummarization(campaign) collection_name = 'tweets_%s' % campaign.getId() res = MongoManager.findTweets(collection_name, sort=("x_created_at", 1), limit=1) if res.count(): lsd = res[0]['x_created_at'].replace(minute=0, second=0, microsecond=0) else: lsd = datetime.now().replace(minute=0, second=0, microsecond=0) else: lsd = self.getLastSummarizedDate(campaign) if lsd < end: while lsd < end: self.summarize(campaign, lsd, min(end, lsd + timedelta(days=1)), timedelta(hours=1), None) lsd = lsd + timedelta(days=1) pprint("sleeping 20 seconds") regenerate_all = False time.sleep(20)
def getAllFeedURLs(self): res = [] accs = MongoManager.getActiveAccounts() for acc in accs: for camp in acc.getActiveCampaigns(): for url in camp.getForums(): res.append((acc, camp, url)) return res
def generateGnipRulesFromMongo(self): accounts = MongoManager.getActiveAccounts() rules = [] for acc in accounts: for camp in acc.getActiveCampaigns(): for fp in camp.getFacebookFanpages(): #rules.append({"value": fp, "tag": "%s/%s/%s" % (acc.getName(), camp.getName(), fp)}) rules.append({"value": fp, "tag": None}) return rules
def getAllHistoryFeedURLs(self): res = [] accs = MongoManager.getActiveAccounts() for acc in accs: for camp in acc.getActiveCampaigns(): hff = camp.getHistoryFetchedForums() for url in camp.getForums(): if url not in hff: res.append((acc, camp, url)) return res
def getBrandClassifiers(cls): #faltaria buffer por max_age o = cls() accounts = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) rules = [] for acc in accounts: rules.extend(o.getAccountRules(acc)) res = [] for r in rules: res.append(o.generateBrandClassifier(r)) return res
def getFanpageToCampaignsDict(cls): if not cls.fanpage_to_campaigns_max_age or not cls.cached_fanpage_to_campaigns or (datetime.now() - cls.cached_fanpage_to_campaigns['fetch_time'] > cls.fanpage_to_campaigns_max_age): print "refetching fanpages to campagins dict" accounts = MongoManager.getActiveAccounts() data = {} for acc in accounts: for camp in acc.getActiveCampaigns(): for fp in camp.getFacebookFanpages(): if fp not in data: data[fp] = [] data[fp].append(camp) cls.cached_fanpage_to_campaigns = {'data': data, 'fetch_time': datetime.now()} return cls.cached_fanpage_to_campaigns['data']
def getTopicClassifiers(cls): #faltaria buffer por max_age #devuelve un diccionario con los topics x campania o = cls() res = {} accounts = MongoManager.getActiveAccounts(max_age=timedelta(seconds=10)) for acc in accounts: for campaign in acc.getActiveCampaigns(): topics = campaign.getTopics() if not topics: continue res[campaign.getId()] = {} for topic in topics: #topic['_id'] = topic.getId() ###ESTO VA??? res[campaign.getId()][topic.getId()] = o.generateTopicClassifier(topic) return res
def start(self, **kwargs): only_campaign = kwargs.get('campaign', None) regenerate_all = kwargs.get('regenerate', False) while True: end = self.getCurrentSummarizationEnd() for account in MongoManager.getActiveAccounts(max_age=timedelta( hours=1)): for campaign in account.getActiveCampaigns(): MongoManager.ensureIndex( 'summarized_tweets_%s' % campaign.getId(), [("start", 1)]) if only_campaign and only_campaign.getId( ) != campaign.getId(): continue if regenerate_all: self.clearSummarization(campaign) collection_name = 'tweets_%s' % campaign.getId() res = MongoManager.findTweets(collection_name, sort=("x_created_at", 1), limit=1) if res.count(): lsd = res[0]['x_created_at'].replace(minute=0, second=0, microsecond=0) else: lsd = datetime.now().replace(minute=0, second=0, microsecond=0) else: lsd = self.getLastSummarizedDate(campaign) if lsd < end: while lsd < end: self.summarize(campaign, lsd, min(end, lsd + timedelta(days=1)), timedelta(hours=1), None) lsd = lsd + timedelta(days=1) pprint("sleeping 20 seconds") regenerate_all = False time.sleep(20)
def generateGnipRulesFromMongo(self): accounts = MongoManager.getActiveAccounts() rules = [] for acc in accounts: for camp in acc.getActiveCampaigns(): for brand in camp.getBrands(): fa = sorted(brand.getFollowAccounts()) if fa: rules.append({ "value": " OR ".join(fa), "tag": "%s/%s/%s/follow accounts - mention" % (acc.getName(), camp.getName(), brand.getName()) }) clean_user_names = [x.replace("@", "") for x in fa] rules.append({ "value": " OR ".join( ["from:%s" % x for x in clean_user_names]), "tag": "%s/%s/%s/follow accounts - from" % (acc.getName(), camp.getName(), brand.getName()) }) rules.append({ "value": " OR ".join( ["to:%s" % x for x in clean_user_names]), "tag": "%s/%s/%s/follow accounts - to" % (acc.getName(), camp.getName(), brand.getName()) }) #BRAND RULES for brule in brand.getIdentificationRules(): brule = brule.replace("[m]", "[M]").replace("[p]", "[P]") for bsearch_keyword in brand.getSearchKeywords(): brand_replaced_rule = '"' + brule.replace( "[M]", bsearch_keyword) + '"' if (brule.upper().find("[P]") >= 0): for product in brand.getProducts(): if product.isUsingBrandIdRules(): for psearch_keyword in product.getSearchKeywords( ): product_replaced_rule = brand_replaced_rule.replace( "[P]", psearch_keyword) rules.append({ "value": product_replaced_rule, "tag": "%s/%s/%s/%s: %s" % (acc.getName(), camp.getName(), brand.getName(), product.getName(), brule) }) else: rules.append({ "value": brand_replaced_rule, "tag": "%s/%s/%s: %s" % (acc.getName(), camp.getName(), brand.getName(), brule) }) #PRODUCT RULES for product in brand.getProducts(): for prule in product.getIdentificationRules(): prule = prule.replace("[m]", "[M]").replace("[p]", "[P]") for bsearch_keyword in brand.getSearchKeywords(): brand_replaced_rule = '"' + prule.replace( "[M]", bsearch_keyword) + '"' for psearch_keyword in product.getSearchKeywords( ): product_replaced_rule = brand_replaced_rule.replace( "[P]", psearch_keyword) rules.append({ "value": product_replaced_rule, "tag": "%s/%s/%s/%s: %s" % (acc.getName(), camp.getName(), brand.getName(), product.getName(), prule) }) for poll in acc.getActivePolls(): rules.append({ "value": " OR ".join(sorted(poll.getSearchHashtags())), "tag": "%s/poll %s" % (acc.getName(), poll.getName()) }) return rules