Example #1
0
def pop_from_queue(external_id=None, account_id=None):
    if account_id == None or external_id == None:
        return json.dumps([]);
    sql = "SELECT * FROM peepbuzz.twitter_queue WHERE external_id=" + str(external_id)
    tweetsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    tweets = []
    while True:
        tweet = tweetsQ.fetch_row(1,1)
        if tweet == ():
            break
        urls = []
        urls = tweet[0]['urls'].split(',')
        hashtags = []
        hashtags = tweet[0]['hashtags'].split(',')
        #format time
        formattedTime=tweet[0]['created'].replace(' ', 'T')
        formattedTime=formattedTime+'0000'
        tweets.append({
            "stream_name" : 'twitter',
            "external_id" : tweet[0]['status_id'],
            "urls"        : urls,
            "created"     : formattedTime,
            "promoter_id" : tweet[0]['promoter_id'],
            "promoter"    : tweet[0]['promoter'],
            "thumbnail"   : tweet[0]['thumbnail'],
            "title"       : None,
            "summary"     : tweet[0]['summary'],
            "hashtags"    : hashtags,
            "discussion"  : [],
            "account_id"  : account_id,
        })
            
    sql = "DELETE FROM peepbuzz.twitter_queue WHERE external_id=" + str(external_id)
    tweetsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    return json.dumps(tweets)
Example #2
0
 def on_status(self, status):
     global follow_list
         
     #format time into 2011-02-23T16:42:40+0000 format ala facebook
     #Twitter Format: Wed Mar 23 22:51:50 +0000 2011
     formattedTime = self.formatTime(status['created_at'])
     hashtags = []
     
     if len(status['entities']['hashtags']):
         for val in status['entities']['hashtags']:
             hashtags.append(val['text'].replace("'", "\\'"))
     
     hashtag = ','.join(hashtags)
     
     urls = []
     if len(status['entities']['urls']):
         for val in status['entities']['urls']:
             urls.append(val['url'].replace("'", "\\'"))
     
     url = ','.join(urls)
     #print status['text']
     text = status['text'].replace("'", "\\'")
     if text[-1] == '\\':
     	text = text + " "
     if str(status['user']['id']) in follow_list:
         file_put_contents(str(status['user']['screen_name']) + " posted something")
         infoModule.info.site['dblink'] = mysql_tools.db_connect()
         
         sql = u"INSERT INTO `peepbuzz`.`twitter_queue` SET `status_id` = '" + str(status['id']) + "', `created` = '" + formattedTime + "', `promoter_id` = '" + str(status['user']['id']) + "', `promoter` = '" + status['user']['screen_name'] + "', `thumbnail` = '" + str(status['user']['profile_image_url']) + "', `summary` = '" + text + "', `external_id` = '" + str(status['user']['id']) + "', `hashtags` = '" + hashtag + "', `urls` = '" + url + "'";
         mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
         infoModule.info.site['dblink'].close()
     else:
         pass        
 def tearDown(self):
     if infoModule.info.site['dblink'] == None:
         #need this code to recover from db disconnect test
         infoModule.info.site['dblink'] = self.dblink
         
     sql = "delete from peepbuzz.blocked_accounts where ba_id=" + str(self.ba_id)
     mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
Example #4
0
def cleanup(days):
    link = mysql_tools.db_connect()
    query = 'SELECT filament_id, story_id FROM peepbuzz.filaments WHERE created <= DATE_SUB(NOW(), INTERVAL '+str(days)+' DAY)'
    result = mysql_tools.mysqlQuery(query, link)
    while (1):
        row = result.fetch_row(1,1)
        if row == ():
            break
        query = 'DELETE from peepbuzz.filaments WHERE filament_id = "'+str(row[0]['filament_id'])+'"'
        try:
            result2 = mysql_tools.mysqlQuery(query, link)
        except:
            pprint.pprint(query)
            sys.exit(1)
        if row[0]['story_id'] != None:
            query = 'SELECT count(*) from peepbuzz.filaments WHERE story_id = "'+str(row[0]['story_id'])+'"'
            try:
                result2 = mysql_tools.mysqlQuery(query, link)
            except:
               pprint.pprint(query)
               sys.exit(1)
            row = result2.fetch_row(1,1)
            if row == None:
                break
            if row[0] == 0:
                query = 'DELETE FROM peepbuzz.stories WHERE story_id = "'+str(row[0]['story_id'])+'"'
                try:
                    result2 = mysql_tools.mysqlQuery(query, link)
                except:
                    pprint.pprint(query)
                    sys.exit(1)
    return True
    def tearDown(self):
        if infoModule.info.site['dblink'] == None:
            #need this code to recover from db disconnect test
            infoModule.info.site['dblink'] = self.dblink

        sql = "delete from peepbuzz.blocked_accounts where ba_id=" + str(
            self.ba_id)
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
def createVideoStory(embed_code, v_celeb_list, source): #embed code, list of dicts, dict
    recognized = False
    v_source = {}
    v_page_info = []
    videoSource = []
    newVideoSub = []
    embed = re.search('<embed.*?src=\"http:\/\/www.youtube.com\/\w+\/([\w-]+).*?<\/embed>', embed_code, re.I)
    if embed:
        id = re.search('\/v\/[\w-]+', embed.group(0))
        url = 'http://www.youtube.com/watch?v=' + id.group(0).replace('/v/', '')

        youtube_page = urllib.urlopen(url).read()
        youtube_page = youtube_page.replace('\n', '').replace('\t', '')

        titleMatch = re.search('<title>Youtube ?- (.*?)</title>', youtube_page, re.I).group(0)
        titleMatch = titleMatch.replace('<title>', '').replace('</title>', '').replace('YouTube- ', '').replace('YouTube - ', '')

        v_page_info = { "title" : titleMatch, "url" : url, "embed_code": embed_code }

        bodyMatch = re.search('<meta name="description" content="(.*?)">', youtube_page, re.I).group(0)
        bodyMatch = bodyMatch.replace('<meta name="description" content="', '').replace('">', '')
        
        sql = "select * from db_celebrifi.sources where title='youtube' and parent=0"
        vSourceQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

        while True:
            v_s = vSourceQ.fetch_row(1,1)
            if v_s == ():
                break
            videoSource.append(v_s[0]['source_id'])
        recognized = True
        
        v_source['source_id'] = videoSource[0]
        v_source['publish_immediately'] = True

    if recognized : 
        sql = 'select * from db_celebrifi.subs where url like "' + v_page_info['url'] + '"'
        urlCheckQuery = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        if urlCheckQuery != False:
            if urlCheckQuery.num_rows() > 0:
                urlRows = urlCheckQuery.num_rows()
                plog("found video url.  incrementing score using: update subs set score=score + 1 where url='" + v_page_info['url']+"'", 2)
                while True:
                    n_v = urlCheckQuery.fetch_row(1,1)
                    if n_v == ():
                        break
                    newVideoSub.append(n_v[0]['sub_id'])
                sql = "insert into db_celebrifi.linkHistory set sub_id=" + newVideoSub[0] + ", feedIdx="+source['source_id'] +", linker_id=-1, linkerURL=" + embed_code + ", linkedOn=now(), userVote=false"
                if debugMode == False: 
                    linkHistory = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                sql = "update subs set score=score+1 where url='"+v_page_info['url']+"'"
                if debugMode == False:
                    updateSubs = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) 
            else:
                plog("adding standalone video story", 2)
                #addSub(bodyMatch, v_celeb_list, "", 0, v_source, v_page_info)        
       
    return 0
Example #7
0
def getNewEntities():
    ''' find latest id from story_entites to see when the last entity was ID'd'''
    query = 'select max(story_id) as max_story from peepbuzz.story_entities'
    try: 
        storyIDQ = mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    except:
        return False
    storyIDRow = storyIDQ.fetch_row(1,1)
    if storyIDRow == ():
        return False
    storyID = storyIDRow[0]['max_story']
    if storyID == None:
        storyID = 0
    else:
        storyID = 0
    query = 'select story_id, title, body from peepbuzz.stories where story_id > '+str(storyID)
    print query
    try:
        storiesQ = mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    except:
        return False
    while(1):
        row = storiesQ.fetch_row(1,1)
        if row==():
            break
        story_id = row[0]['story_id']
        title = row[0]['title']
        body = row[0]['body']
        if body == None or title == None:
            continue
        url = 'http://informifi.com/enhancer/APIGateway.php'
        request = {'command' : 'entities',
                   'byID' : 'true',
                   'title': title,
                   'searchText' : body}
        data = urllib.urlencode(request)
        req = urllib2.Request(url, data)
        response = urllib2.urlopen(req)
        res = response.read()
        try:
            entities = json.loads(res)
        except ValueError:
            continue
        entityCount = len(entities)
        print "found " + str(entityCount) + " entities"
        for ents in entities:
            primo = ents['primo']
            if(primo== "Y"):
                primo = 1
            elif(primo =="N"):
                primo = 10
            q = 'insert into peepbuzz.story_entities (story_id, entity_id, primo) values ("'+str(story_id)+'","'+ents['id']+'","'+str(primo)+'")'
            try:
                insertQ = mysql_tools.mysqlQuery(q, infoModule.info.site['dblink'])
            except:
                return False
    return True
Example #8
0
def insertHash(hashtag, type, type_short, block):
    query = 'insert into peepbuzz.hashtag_'+type+'_stats (hashtag, '+type_short+'_block, score) values ("'+hashtag+'","'+block+'","1")'
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    if(hashQ==False):
        return False
    else:
        return True
Example #9
0
def testUTF():
    dblink = infoModule.info.site["dblink"]
    sql = u"insert into peepbuzz.stories set title='foo faa \u2026 fum'"
    # mysql_tools.mysqlQuery(sql, dblink)
    sql = "select title from peepbuzz.stories"
    accountsQ = mysql_tools.mysqlQuery(sql, dblink)
    accounts = accountsQ.fetch_row(0, 1)
    print accounts[0]["title"]
    def setUp(self):
        infoModule.info.site['dblink'] = mysql_tools.db_connect()
        dblink = infoModule.info.site['dblink']
        #set up accounts for test
        # get valid account
        sql = "SELECT user_id from peepbuzz.users limit 1"
        userQ = mysql_tools.mysqlQuery(sql, dblink)
        user = userQ.fetch_row(1,1)

        sql = "SELECT account_id from peepbuzz.accounts limit 1"
        accountQ = mysql_tools.mysqlQuery(sql, dblink)
        account = accountQ.fetch_row(1,1)
        self.account_id = account[0]['account_id']
        self.user_id = user[0]['user_id']
        sql = "insert into peepbuzz.blocked_accounts set user_id=" + self.user_id + ", unknown_account_id=" + self.unknown_account_id + ", account_id=" + self.account_id
        testQ = mysql_tools.mysqlQuery(sql, dblink)
        self.ba_id = dblink.insert_id()
def store_discussion(discussion_json, filament_id):
    discussions = json.loads(discussion_json)

    # get stream_id
    sql = "SELECT stream_id FROM peepbuzz.filaments WHERE filament_id = " + str(filament_id)
    stream_idQ = mysql_tools.mysqlQuery(sql, infoModule.info.site["dblink"])
    row = stream_idQ.fetch_row(1, 1)
    if row == ():
        infoModule.info.errorList.append("ERROR: no such filament (" + str(filament_id) + ")")
        return False
    stream_id = row[0]["stream_id"]

    discussion_ids = []
    for discussion in discussions:
        if "count" in discussion:
            continue
        else:
            account_id, table = accountFinder(
                stream_id, discussion["user_id"], discussion["user_name"], discussion["thumbnail"]
            )
            created = discussion["comment_created"]
            body = discussion["body"]
            body = body.replace("'", "\\'")

            if table == "accounts":
                field = "account_id"

            mysql_tools.mysqlQuery(
                u"INSERT INTO peepbuzz.discussions SET filament_id="
                + str(filament_id)
                + ", "
                + field
                + "="
                + str(account_id)
                + ', created="'
                + created
                + '", body="'
                + body
                + '"',
                infoModule.info.site["dblink"],
            )
            discussion_id = infoModule.info.site["dblink"].insert_id()

            discussion_ids.append(int(discussion_id))

    return discussion_ids
Example #12
0
def getIds(sub_id):
    sql = 'SELECT celeb_id FROM ' + infoModule.info.site['database'] + '.subs_celebs WHERE sub_id = ' + str(sub_id)
    entityIdsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) 
    entityIds = entityIdsQ.fetch_row(0,1)

    entityRows = []
    for row in entityIds:
        infoModule.info.entityList[row['celeb_id']] = {'position': None, 'frequency': 0, 'primo' : 'N'}
Example #13
0
def testUTF():
    dblink = infoModule.info.site['dblink']
    sql = u'insert into peepbuzz.stories set title=\'foo faa \u2026 fum\''
    #mysql_tools.mysqlQuery(sql, dblink) 
    sql = 'select title from peepbuzz.stories'
    accountsQ = mysql_tools.mysqlQuery(sql, dblink)
    accounts = accountsQ.fetch_row(0,1)
    print accounts[0]['title']
    def setUp(self):
        infoModule.info.site['dblink'] = mysql_tools.db_connect()
        dblink = infoModule.info.site['dblink']
        #set up accounts for test
        # get valid account
        sql = "SELECT user_id from peepbuzz.users limit 1"
        userQ = mysql_tools.mysqlQuery(sql, dblink)
        user = userQ.fetch_row(1, 1)

        sql = "SELECT account_id from peepbuzz.accounts limit 1"
        accountQ = mysql_tools.mysqlQuery(sql, dblink)
        account = accountQ.fetch_row(1, 1)
        self.account_id = account[0]['account_id']
        self.user_id = user[0]['user_id']
        sql = "insert into peepbuzz.blocked_accounts set user_id=" + self.user_id + ", unknown_account_id=" + self.unknown_account_id + ", account_id=" + self.account_id
        testQ = mysql_tools.mysqlQuery(sql, dblink)
        self.ba_id = dblink.insert_id()
Example #15
0
def URLInStories(URL):
    dblink = infoModule.info.site["dblink"]
    sql = "select story_id from peepbuzz.stories where url='" + URL + "' or original_url='" + URL + "'"
    storyQ = mysql_tools.mysqlQuery(sql, dblink)
    story = storyQ.fetch_row(1, 1)
    if story == ():
        return False
    else:
        return int(story[0]["story_id"])
Example #16
0
def updateHash(hashtag, type, type_short, score, block):
    score = int(score)
    score = score + 1
    query = 'update peepbuzz.hashtag_'+type+'_stats set score="'+str(score)+'" where hashtag="'+hashtag+'" and '+type_short+'_block="'+block+'"'
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    if(hashQ==False):
        return False
    else:
        return True
Example #17
0
def URLInStories(URL):
    dblink = infoModule.info.site['dblink']
    sql = "select story_id from peepbuzz.stories where url='" + URL + "' or original_url='" + URL + "'"
    storyQ = mysql_tools.mysqlQuery(sql, dblink)
    story = storyQ.fetch_row(1,1)
    if story == ():
        return False
    else:
        return int(story[0]['story_id'])
Example #18
0
def getBlockedImages():
    blockedImages = []
    sql = "select * from " + infoModule.info.site['database'] + ".blocked_images"
    esr = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    while True:
        blocked = esr.fetch_row(1,1)
        if blocked == ():
            break
        blockedImages.append(blocked[0]['regex'])
    return blockedImages
Example #19
0
def addFollowing(user_id = None, account_id = None):
    # Make sure we have a user to check against
    if not user_id or not account_id:
        return False
        
    #Check to see if the follower exists yet
    check = mysql_tools.mysqlQuery("SELECT * FROM `peepbuzz`.`following` WHERE `user_id` = '" + str(user_id) + "' AND `account_id` = '" + str(account_id) + "'", infoModule.info.site['dblink'])
    if check.num_rows() > 0:
        return True
    
    # Since they did not exist let's add them
    sql = "INSERT INTO `peepbuzz`.`following` SET `user_id` = '" + str(user_id) + "', `account_id` = '" + str(account_id) + "'"
    print sql
    add = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    
    if not infoModule.info.site['dblink'].insert_id():
        return False
    
    return True
    def on_status(self, status):
        global follow_list

        #format time into 2011-02-23T16:42:40+0000 format ala facebook
        #Twitter Format: Wed Mar 23 22:51:50 +0000 2011
        formattedTime = self.formatTime(status['created_at'])
        hashtags = []

        if len(status['entities']['hashtags']):
            for val in status['entities']['hashtags']:
                hashtags.append(val['text'].replace("'", "\\'"))

        hashtag = ','.join(hashtags)

        urls = []
        if len(status['entities']['urls']):
            for val in status['entities']['urls']:
                urls.append(val['url'].replace("'", "\\'"))

        url = ','.join(urls)
        #print status['text']
        text = status['text'].replace("'", "\\'")
        if text[-1] == '\\':
            text = text + " "
        if str(status['user']['id']) in follow_list:
            file_put_contents(
                str(status['user']['screen_name']) + " posted something")
            infoModule.info.site['dblink'] = mysql_tools.db_connect()

            sql = u"INSERT INTO `peepbuzz`.`twitter_queue` SET `status_id` = '" + str(
                status['id']
            ) + "', `created` = '" + formattedTime + "', `promoter_id` = '" + str(
                status['user']['id']
            ) + "', `promoter` = '" + status['user'][
                'screen_name'] + "', `thumbnail` = '" + str(
                    status['user']['profile_image_url']
                ) + "', `summary` = '" + text + "', `external_id` = '" + str(
                    status['user']['id']
                ) + "', `hashtags` = '" + hashtag + "', `urls` = '" + url + "'"
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            infoModule.info.site['dblink'].close()
        else:
            pass
Example #21
0
def entityLibraryByUrl(lookupUrl, field):
    celeb_idQ = mysql_tools.mysqlQuery('select celeb_id from db_topics.celebs where lookupUrl = "'+lookupUrl+'"', infoModule.info.site['dblink'])
    celeb_id=celeb_idQ.fetch_row(1,1)
    if celeb_id == ():
        log.plog('no celeb_id found for ' + lookupUrl, 5)
        return False
    else:
        cid = celeb_id[0]['celeb_id']
    liveEntities[cid] = liveEntity(cid)
    return liveEntities[cid].getData(field)
Example #22
0
def getBlockedImages():
    blockedImages = []
    sql = "select * from " + infoModule.info.site[
        'database'] + ".blocked_images"
    esr = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    while True:
        blocked = esr.fetch_row(1, 1)
        if blocked == ():
            break
        blockedImages.append(blocked[0]['regex'])
    return blockedImages
Example #23
0
def getEntityTotals(entity_id):
    
    entDict = {}
    sql = 'select celeb_id, stories_total, storiesWeighted_total, vertical from db_topics.celebStatsTotals where celeb_id =' + str(entity_id)
    statsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    while (1):
        row = statsQ.fetch_row(1,1)
        if row == ():
            break;        
        idx = row[0]['vertical']
        entDict[idx] = {"stories_total": int(row[0]['stories_total']), "storiesWeighted_total": int(row[0]['storiesWeighted_total'])}
    return entDict
Example #24
0
def accountFinder(stream_id, external_id, user_name, thumbnail):
    ''' Checking for external accounts in known accounts '''
    sql = u"select account_id, thumbnail from peepbuzz.accounts where external_id='" + str(external_id) + "' and stream_id=" + str(stream_id) + " LIMIT 1"
    known_account = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    if known_account.num_rows() > 0: 
        ''' return account_id and known_accounts table '''
        
        row = known_account.fetch_row(1,1)
        if row == ():
            pass
        else:
            account_id = row[0]['account_id']
            #check thumbnail against reported thumbnail and update if changed
            if len(thumbnail) > 0 and thumbnail != row[0]['thumbnail']:
                sql = "update peepbuzz.accounts set thumbnail='" + thumbnail + "' where account_id=" + str(account_id)
                mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            return account_id, 'accounts'
    else:
        ''' no account, create it '''
        new_account = mysql_tools.mysqlQuery(u"insert into peepbuzz.accounts set external_id='"+str(external_id)+"', stream_id='"+str(stream_id)+"', user_name='"+user_name+"', thumbnail='" + thumbnail + "'", infoModule.info.site['dblink'])
        ''' return insert id from account with uaccounts table '''
        new_account_id = infoModule.info.site['dblink'].insert_id() 
        if len(thumbnail) > 0:
            sql = "update peepbuzz.accounts set thumbnail='" + thumbnail + "' where account_id=" + str(new_account_id)
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        return str(new_account_id), 'accounts'
def twitterPic(picture, smsID, caption, celebID, date):
    return
    
    url = picture.group(0)
    
    # Make sure that we have the http:// on the url
    if url[:7] != 'http://':
        url = 'http://' + url
        
    #Create an image record for this
    mysql_tools.mysqlQuery("INSERT INTO `db_celebrifi`.`images` SET `credit` = ''", link)
    imageID = link.insert_id()
    imageID = str(imageID)
        
    print "http://angelina.celebrifi.com:81/imageFetcher.php?image_id=" + imageID + "&type=feed&url=" + url
    imageFetch = "http://angelina.celebrifi.com:81/imageFetcher.php?image_id=" + imageID + "&type=feed&url=" + url
    
    try: content = urllib2.urlopen(imageFetch).read()
    except urllib2.URLError, e:
        # Looks like we had a problem... So lets remove the image
        mysql_tools.mysqlQuery("DELETE FROM `db_celebrifi`.`twitPics` WHERE `image_id` = '" + imageID + "'", link)
        return False
Example #26
0
def store_discussion(discussion_json, filament_id):
    discussions = json.loads(discussion_json)

    # get stream_id
    sql = 'SELECT stream_id FROM peepbuzz.filaments WHERE filament_id = ' + str(
        filament_id)
    stream_idQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    row = stream_idQ.fetch_row(1, 1)
    if row == ():
        infoModule.info.errorList.append("ERROR: no such filament (" +
                                         str(filament_id) + ")")
        return False
    stream_id = row[0]['stream_id']

    discussion_ids = []
    for discussion in discussions:
        if 'count' in discussion:
            continue
        else:
            account_id, table = accountFinder(stream_id, discussion['user_id'],
                                              discussion['user_name'],
                                              discussion['thumbnail'])
            created = discussion['comment_created']
            body = discussion['body']
            body = body.replace("'", "\\'")

            if table == 'accounts':
                field = 'account_id'

            mysql_tools.mysqlQuery(
                u'INSERT INTO peepbuzz.discussions SET filament_id=' +
                str(filament_id) + ', ' + field + '=' + str(account_id) +
                ', created="' + created + '", body="' + body + '"',
                infoModule.info.site['dblink'])
            discussion_id = infoModule.info.site['dblink'].insert_id()

            discussion_ids.append(int(discussion_id))

    return discussion_ids
def getIds(sub_id):
    sql = 'SELECT celeb_id FROM ' + infoModule.info.site[
        'database'] + '.subs_celebs WHERE sub_id = ' + str(sub_id)
    entityIdsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    entityIds = entityIdsQ.fetch_row(0, 1)

    entityRows = []
    for row in entityIds:
        infoModule.info.entityList[row['celeb_id']] = {
            'position': None,
            'frequency': 0,
            'primo': 'N'
        }
Example #28
0
def checkForHash(hashtag, type, type_short, block):
    # type: {daily, hourly}
    # block : time/secs
    query = 'select score from peepbuzz.hashtag_'+type+'_stats where '+type_short+'_block="'+block+'" and hashtag="'+hashtag+'"'
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    if(hashQ.num_rows()>0):
        while (1):
            row = hashQ.fetch_row(1,1)
            if row == ():
                break
            score = row[0]['score']
        return score
    else:
        return False
Example #29
0
def addFollowing(user_id=None, account_id=None):
    # Make sure we have a user to check against
    if not user_id or not account_id:
        return False

    #Check to see if the follower exists yet
    check = mysql_tools.mysqlQuery(
        "SELECT * FROM `peepbuzz`.`following` WHERE `user_id` = '" +
        str(user_id) + "' AND `account_id` = '" + str(account_id) + "'",
        infoModule.info.site['dblink'])
    if check.num_rows() > 0:
        return True

    # Since they did not exist let's add them
    sql = "INSERT INTO `peepbuzz`.`following` SET `user_id` = '" + str(
        user_id) + "', `account_id` = '" + str(account_id) + "'"
    print sql
    add = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    if not infoModule.info.site['dblink'].insert_id():
        return False

    return True
Example #30
0
def pop_from_queue(external_id=None, account_id=None):
    if account_id == None or external_id == None:
        return json.dumps([])
    sql = "SELECT * FROM peepbuzz.twitter_queue WHERE external_id=" + str(
        external_id)
    tweetsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    tweets = []
    while True:
        tweet = tweetsQ.fetch_row(1, 1)
        if tweet == ():
            break
        urls = []
        urls = tweet[0]['urls'].split(',')
        hashtags = []
        hashtags = tweet[0]['hashtags'].split(',')
        #format time
        formattedTime = tweet[0]['created'].replace(' ', 'T')
        formattedTime = formattedTime + '0000'
        tweets.append({
            "stream_name": 'twitter',
            "external_id": tweet[0]['status_id'],
            "urls": urls,
            "created": formattedTime,
            "promoter_id": tweet[0]['promoter_id'],
            "promoter": tweet[0]['promoter'],
            "thumbnail": tweet[0]['thumbnail'],
            "title": None,
            "summary": tweet[0]['summary'],
            "hashtags": hashtags,
            "discussion": [],
            "account_id": account_id,
        })

    sql = "DELETE FROM peepbuzz.twitter_queue WHERE external_id=" + str(
        external_id)
    tweetsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    return json.dumps(tweets)
Example #31
0
def cleanup(days):
    link = mysql_tools.db_connect()
    query = 'SELECT filament_id, story_id FROM peepbuzz.filaments WHERE created <= DATE_SUB(NOW(), INTERVAL ' + str(
        days) + ' DAY)'
    result = mysql_tools.mysqlQuery(query, link)
    while (1):
        row = result.fetch_row(1, 1)
        if row == ():
            break
        query = 'DELETE from peepbuzz.filaments WHERE filament_id = "' + str(
            row[0]['filament_id']) + '"'
        try:
            result2 = mysql_tools.mysqlQuery(query, link)
        except:
            pprint.pprint(query)
            sys.exit(1)
        if row[0]['story_id'] != None:
            query = 'SELECT count(*) from peepbuzz.filaments WHERE story_id = "' + str(
                row[0]['story_id']) + '"'
            try:
                result2 = mysql_tools.mysqlQuery(query, link)
            except:
                pprint.pprint(query)
                sys.exit(1)
            row = result2.fetch_row(1, 1)
            if row == None:
                break
            if row[0] == 0:
                query = 'DELETE FROM peepbuzz.stories WHERE story_id = "' + str(
                    row[0]['story_id']) + '"'
                try:
                    result2 = mysql_tools.mysqlQuery(query, link)
                except:
                    pprint.pprint(query)
                    sys.exit(1)
    return True
Example #32
0
def addEntity(nameIdx, mptype):
    phraseQ = mysql_tools.mysqlQuery("select phrase from db_topics.unknownNames where idx=" + str(nameIdx), infoModule.info.site['dblink'])
    if phraseQ != None:
        phrase = phraseQ.fetch_row(1,1)
        if phrase == ():
            log.plog("unknownNames query returned nothing", 4)
            return
    else:
        log.plog("unknownNames query failed", 4)
        return
    nameParts = re.split("\s+", phrase[0]['phrase'])
    if len(nameParts) == 2:
        fname = nameParts[0]
        mname = ""
        lname = nameParts[1]
        fname = fname.capitalize()
        mname = mname.capitalize()
        lname = lname.capitalize()
        lookupUrl = fname + "-" + lname
    elif len(nameParts) == 3:
        fname = nameParts[0]
        mname = nameParts[1]
        lname = nameParts[2]
        fname = fname.capitalize()
        mname = mname.capitalize()
        lname = lname.capitalize()
        lookupUrl = fname + "-" + mname + "-" + lname
    #final double check against notCelebs
    sql = "SELECT * from db_topics.notCelebs WHERE phrase='" + phrase[0]['phrase'] + "'"
    log.plog(sql, 2)
    notCelebsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    notCelebs = notCelebsQ.fetch_row(1,1)
    if notCelebs != ():
        log.plog("attempted to add new entity that is in notCelebs table: " + phrase[0]['phrase'], 4)
        return
    #to block against this celeb being added later
    
    
    mysql_tools.mysqlQuery("insert into db_topics.notCelebs set phrase='" + phrase[0]['phrase'] + "'", infoModule.info.site['dblink'])
    

    searchName = phrase[0]['phrase']

    checkCelebQ = mysql_tools.mysqlQuery("select * from db_topics.celebs where fullName='" + searchName + "'", infoModule.info.site['dblink'])
    foundCelebs = checkCelebQ.num_rows()
    log.plog("found " + str(foundCelebs) + " matching rows for '" + searchName + "'", 2)

    if foundCelebs == 0:
        log.plog("adding hidden entity '" + searchName + "'", 2)
        sql = "insert into db_topics.celebs set status='A', fname='" + fname + "', mname='" + mname + "', lname='" + lname + "', fullName='" + searchName + "', searchName='" + searchName + "', lookupUrl='" + lookupUrl + "', bio='No bio available', mptype_id=" + mptype[0]['mptype_id']+ ", created=now()"
        mptypeQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        # new entities should be added to the entities list for the page
        newCid =  infoModule.info.site['dblink'].insert_id()
        if newCid > 0:
            infoModule.info.entityList[newCid] = {'position': None, 'frequency': 1, 'primo' : 'N'}
            log.plog('new entity id is ' + str(newCid) + ' added to list for this story', 3)
Example #33
0
def nicknameFinder(workingText, stats, showNames=False):
    log.plog('NICKNAMES', 2)
    #args = ['/usr/bin/perl', 'nicknames.pl', workingText]
    #nn = subprocess.Popen(args, stdout=subprocess.PIPE).communicate()[0]
    #load balance nickname connection
    serverPicker = random.randint(0,1)
    dollyHosts = ['angelina.celebrifi.com', 'britney.celebrifi.com', 'hedwig.celebrifi.com', 'fergie.celebrifi.com', 'catherinez.celebrifi.com', 'dolly.celebrifi.com']
    machineName = os.uname()
    #go to one server if in production, other if not
    #for some cases, we need the nickname server to tell us WHY it found a nickname.  
    #that's what the showNames field is for
    if showNames:
        params = urllib.urlencode({'searchText': workingText, 'showNames':'true'})
    else:
        params = urllib.urlencode({'searchText': workingText})
    if machineName[1] in dollyHosts:    
        socket = urllib.urlopen('http://hedwig.informifi.com:81/sourceReader/nicknames.pl', params)
    else:
        socket = urllib.urlopen('http://dev.informifi.com/sourceReader/nicknames.pl', params)
    nn = socket.read()

    nicknames = nn[:-1]
    if nicknames == '':
        return
    log.plog('nickname Server returned ' + nicknames, 2)
    nicknames = nicknames.split(",")
    for i in range(len(nicknames)):
        vals = nicknames[i].split(':')
        eid = vals[0]
        firstFound = vals[1]
        if showNames:
            nameUsed = vals[2]
        if str(i) not in infoModule.info.entityList:
            #test against db.  If can't find entity in db, delete nickname
            #test entity list against db temporarily
            verifyQ = mysql_tools.mysqlQuery("select lookupUrl from db_topics.celebs where celeb_id=" + str(eid), infoModule.info.site['dblink'])
            if verifyQ.num_rows() == 0:
                log.plog("entity in list does not exist! %s." % str(eid), 5)
            else:
                log.plog("verified that %s is a real entity" % str(eid), 2)                            
                #different returns based on showNames setting
                if showNames:
                    infoModule.info.entityList[str(eid)] = {'position': None, 'frequency': 0, 'primo' : 'N', 'nameUsed' : nameUsed}
                else:
                    infoModule.info.entityList[str(eid)] = {'position': None, 'frequency': 0, 'primo' : 'N'}
                log.plog("found celeb: %s " % str(eid), 2)
                if stats and (infoModule.info.entityList[str(eid)] == None or int(firstFound) < infoModule.info.entityList[str(eid)]['position']):
                    infoModule.info.entityList[str(eid)]['position'] = firstFound                        
Example #34
0
def getEntityTotals(entity_id):

    entDict = {}
    sql = 'select celeb_id, stories_total, storiesWeighted_total, vertical from db_topics.celebStatsTotals where celeb_id =' + str(
        entity_id)
    statsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    while (1):
        row = statsQ.fetch_row(1, 1)
        if row == ():
            break
        idx = row[0]['vertical']
        entDict[idx] = {
            "stories_total": int(row[0]['stories_total']),
            "storiesWeighted_total": int(row[0]['storiesWeighted_total'])
        }
    return entDict
Example #35
0
def insertHash(hashtag, type, type_short, block):
    query = (
        "insert into peepbuzz.hashtag_"
        + type
        + "_stats (hashtag, "
        + type_short
        + '_block, score) values ("'
        + hashtag
        + '","'
        + block
        + '","1")'
    )
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site["dblink"])
    if hashQ == False:
        return False
    else:
        return True
Example #36
0
def loadStreams():
    query = mysql_tools.mysqlQuery("SELECT * FROM `peepbuzz`.`streams`", infoModule.info.site['dblink'])
    
    streams = {}
    if query.num_rows() > 0:
        while True:
            stream = query.fetch_row(1,1)
            if stream == ():
                break
            
            if stream[0]['name'] in streams:
                streams[stream[0]['name']] = stream[0]['stream_id']
            else:
                streams[stream[0]['name']] = {}
                streams[stream[0]['name']] = stream[0]['stream_id']
    else:
        return False
    
    return streams
Example #37
0
def loadStreams():
    query = mysql_tools.mysqlQuery("SELECT * FROM `peepbuzz`.`streams`",
                                   infoModule.info.site['dblink'])

    streams = {}
    if query.num_rows() > 0:
        while True:
            stream = query.fetch_row(1, 1)
            if stream == ():
                break

            if stream[0]['name'] in streams:
                streams[stream[0]['name']] = stream[0]['stream_id']
            else:
                streams[stream[0]['name']] = {}
                streams[stream[0]['name']] = stream[0]['stream_id']
    else:
        return False

    return streams
def checkBlacklist(user_id = None, account_id = None):
    # Make sure we have a user to check against
    if not user_id or not account_id:
        return False
    
    if infoModule.info.site['dblink'] == None:
        return False
    # Find which field we are checking
    account_where = "`account_id` = '" + str(account_id) + "'"
    
    # Assemble and run the query
    checkAccountQ = mysql_tools.mysqlQuery("SELECT * FROM `peepbuzz`.`blocked_accounts` WHERE " + account_where + " AND `user_id` = '" + str(user_id) + "'", infoModule.info.site['dblink'])
    
    # Now check to see if a record exists
    if checkAccountQ.num_rows() > 0:
        # Looks like they blocked this account
        return True
    else:
        # No blacklist record was found
        return False
Example #39
0
def updateHash(hashtag, type, type_short, score, block):
    score = int(score)
    score = score + 1
    query = (
        "update peepbuzz.hashtag_"
        + type
        + '_stats set score="'
        + str(score)
        + '" where hashtag="'
        + hashtag
        + '" and '
        + type_short
        + '_block="'
        + block
        + '"'
    )
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site["dblink"])
    if hashQ == False:
        return False
    else:
        return True
Example #40
0
def getUserList(demo=False):

    if demo:
		users =  "133880286,149198901,101695592,77779709,145125358,135421739,24417275,50393960,131805814,8161232,31348594,128155589,132385468,38403110,123287206,22184352,109275593,59808841,24929621,18839785,97169575,87118217,87170183,101311381,113419517,66606024,56631494,23047158,813286,70179948,24705126,23088711,17842366,81634880,73571482,55593594,80648576,65076023,67227697,83683860,82550972,71281224,31711374,67003520,80932205,58270815,78589660,84770446,43047397,66974773,65560119,51807659,71785951,36736716,81037355,85037143,38323168,17109111,25764613,15989467,23406744,21801881,21424347,22765820,21043020,25966862,17386471,21575516,27606345,30632568,39157919,25477690,23728664,29498919,26911579,20752370,24110202,24326028,25574567,20061192,16354940,19429373,21990842,29625563,30204765,23328321,18320874,30313925,21370475,16434558,23233419,27488712,22543979,23210425,18346856,244655353,27474384,14171957,27583520,131144091,31732799,23551895,27554803,17494118,8316102,42132842,28682172,32613916,25768462,29971169,24721128,19885070,29297677,18287787,15764644,44621097,18278092,6250582,17672407,21807738,20314162,28637856,16627822,18501269,17104244,21225694,28353149,14959344,17825734,17429421,18824182,25914362,4358521,23532875,25154750,14142269,15090987,19415715,10552522,24914623,18444963,30341243,14715999,18663330,23686579,19099133,24333126,29103898,27982454,15690193,23237369,16672734,33087265,31580676,31108370,21113062,158769098,16031927,20714308,6017542,1880781,78945513,202022019,21313855,24618431,20073874,41634245,11134252,3135241,147725246,56413858,20545835,14345062,40116885,10774652,16180004,114655637,60680268,39308549,69773058,73402979,196168350,37764422,103060329,19416225,16555200,18451200,20094138,28195764,4898091,16311797,65146567,19658614,1917731,90725320,19659031,86364947,15450996,7228682,18956073,2704951,46002396,18513241,19611483,66369181,21186125,10126672,16244647,8775672,31310158,28576135,36681590,5988062,25395727,16869718,23725283,63150856,15988710,108351678,20024413,2467791,15918353,63395170,30411537,87103895,15985455,14669951,14844867,57428429,45190383,9300262,10168082,30864583,19417492,16467567,17546958,19553409,15918708,18229117,18216752,26533227,7713202,5357812,9508922,18916432,18217624,18906561,1367531,15675138,3108351,16032925,18646108,23970102,25320005,17454769,22701818,19899088,17844611,156783914,21721528,43008595,26429932,41073372,14553905,71028123,16305330,39803397,159875784,17855061,34346559,16643805,256865137,21987613,13601372,23388896,28706024,51893943,19475309,88663606,138135579,19317307,20532520,55656355,39880788,54579254,15416739,6604072,195223934,65629866,74231747,15883275,47956412,13783772,17998573,189181305,36775887,14619211,37360911,11640472,14089195,9695312,21052372,126707999,6836602,18185605,21466569,113436270,234814458,14085798,23023236,70858251,93819415,153830702,9636632,15430198,134537602,61621735,29899836,15394688,46241448,52786408,73060532,26765003,39588173,78377692,4517721,240892859,22696335,14253479,214677862,10122292,262246353,18510860,180505807,146993421,38883394,33696409,16476000,213796133,243361655,17667607,7995512,44366931,19374728,246913104,141335663,15066958,224300799,150723226,45564482,19195914,140882350,134758540,15117711,18990065,14411731,176936320,225847395,82149912,121836937,26520417,10876852,8383592,1319181,40519997,32518531,35677276,140496824,14079576,755756,2363571,811303,1586501,12798822,23838203,14981380,211891109,59477295,164725958,48499629,14514804,12101862,8237482,67358777,199097888,196352451,19658826,41187603,145753059,34147744,28520903,18051038,15846407,124245139,15591578,67140324,14109167,1284921,29631095,15891023,115230067,16204491,114870386,14179105,15719490,15645093,11336782,150990112,1652541,18164420,34228779,115699970,158865339,18956903,26642006,2195241,22569599,30815404,765564,40283581,19289684,14019652,16361348,19548547,14138514,18875991,138594660,60025762,16460799,15155313,13027572,71051839,6129732,11880802,18130749,18201016,20689408,11348282,16376711,17904776,15205798,46249360,26921818,132811099,52848021,5360012,14281466,22802398,43194755,13058232,12925072,104528362,16302090,135962109,9855132,45577446,24223641,121812725,65987618,21414576,17439893,22737278,11178672,14150661,56792882,24807616,16029780,30544925,33342154,972651,7581972,115710614,3939681,14173084,21875834,14266598,15639642,16012783,27911608,10235102,16661296,7215512,19621110,38218953,62591681,807095,14634720,27740227,19058681,24457744,16356190,15224867,10340482,89309995,17220934,12726792,15458181,54176768,56495963,23596644,15223265,18713552,13201312,37703408,15575625,33248363,107318424,133093395,19929890,56687786,142722336,53556894,52339151,45796568,57928790,106034413,171117515,21425125,137616033,104740065,44599982,70903729,67712707,53355887,108936259,95917731,18681139,97865628,15037033,16425333,56336341,57998991,124258971,171145913,86254626,43017766,99642673,77888423,148248527,42908132,88856792,87484569,111615736,122995784,40884854,36412963,56312411,56685329,56304605,27602673,155151279,37168231,177643755,73635568,24167314,6708952,116763916,19915362,75069067,169021842,14499829,140114710,36683668,15173291,17232815,17539739,68307227,34889448,19918047,39511166,95938895,71873770,18038892,94143715,5120691,12369372,19038395,16120265,18091904,24741685,19683378,39514461,18225966,16348549,20602394,43849753,37926315,4432431,27830610,97219875,33933259,808627,15234407,42226885,35593203,16190898,20214495,20032100,816653,749963,14511951,5763262,14293310,17074440,9460682,21457289,17899109,14874480,20280065,14761795,17004618,114423418,75050496,20224928,6799042,38087951,2184141,190929822,16303106,163866503,62770378,22484810,105675866,17940246,216004224,15485441,125463397,177168334,41814169,181936784,87429873,12044602,66883090,107544554,24671422,140055917,116290235,109740182,111186088,111499749,105122834,26530138,115474823,122474200,122062166,122553145,120641563,96400556,112065034,14435000,15677252,119557197,56115426,16684951,122405991,49989191,28573657,69570423,107139187,45553679,41355103,37632896,43275808,56378022,19337439,78262617,21235661,38143441,52437460,51096668,16350228,63681020,17536755,28114190,23775585,19734494,91083585,20626359,17872077,15378041,81690879,16454856,5172121,16489986,14866691,16232553,15745139,7744882,16591424,18302410,22927130,28333890,65276277,24447643,5768872,19655584,32561466,26208862,19076596,45567297,36021653,29525603,33235771,35846777,15364197,11396522,21695241,21466703,19529816,14322435,20598533,16694619,51376979,195703895,181809164,135456572,228240073,34131007,34197952,37034483,428333,5402612,16542390,112683235,94816809,28694130,70931004,90684808,85312583,90680573,18134734,80543346,17231318,14582285,16143542,14868835,40885516,97611573,115341081,132842874,132094623,96283286,121440641,132859239,94163409,123774113,48511486,83611972,18872373,76294950,198478747,24177024,98932084,42604232,61755650,16542626,22262999,50243043,53718666,113278275,138002210,146385708,130949035,33891322,29951362,50544680,20609518,159039098,154493261,24008967,73638336,145338127,68399705,119326030,114785416,119327640,25478634,102594253,18863815,63438962,117732057,43568964,15044593,56935361,92211391,96101258,78941611,113681700,4836211,92332298,93622144,65036099,79415553,51690631,42464353,32862653,41097723,71992917,15106168,38479920,30829649,16507391,41799998,23061619,54464822,31109504,47753886,65759352,55520719,28011481,18009267,16417648,14620392,7104672,14774255,33893248,45162347,127034693,194534748,110360770,54902524,129262111,97614907,18628249,18962189,18905875,18955181,21387965,12873942,20139348,18349474,19782758,18908190,17643126,12765022,19977028,14914525,17751959,10401102,15523506,16700571,7799412,16042788,16704472,18536826,8039622,15504971,97937152,137418466,155659213,19895282,117063289,92708272,181383752,48410382,106345557,34130969,15274585,92724677,121046433,69914008,196235064,70613646,215572137,121224923,155842625,222339038,64678911,106996530,149593459,104540706,166158304,211425214,74783784,206871837,154576522,99448420,139022142,108998148,26264871,77222698,129136581,16998020,103770785,119601029,30631766,87174678,13838562,97396580,21453086,20136728,66778288,35104773,56761423,48277760,19183953,233287824,18222378,245891639,291,13,20,12,261467278,266860992,28904686,259601739,82952848,269649865,57865199,177468546,29784580,22223740,254211714,54794774,258539150,156828337,34775386,21200792,96821243,24966698,25253084,21070607,112517482,227152801,258691740,37620532,266259739,32837358,263319842,36139712,265820934,247424639,237279585,175157275,23779324,51670525,23621049,159544785,150120631,20456522,31141686,239189534,26589987,79940688,232378553,216299334,21532770,95386169,246319243,253784164,40262755,262191109,191690176,221123236,183023359,258928180,123044254,42663256,97199688,34811069,122160076,47339690,231216037,28041769,97804036,25932078,35280333,259506017,39933869,259379883,258969640,15676578,253322314,253223778,257970633,45302570,55353207,160127025,22330908,237043022,257911756,5490392,191640178,25169275,112057008,136943034,173182035,170079806,70157977,23277580,122826599,99167992,44785212,236021571,32453930,45025729,250551703,95386370,14813320,243070104,17082958,156132825,153684144,23588838,211894963,239160456,81761893,131228928,110365072,44592558,243865694,188039706,242567007,22256645,126177871,20750019,245950301,23125912,23741531,127493777,16857864,21450669,63909970,20014300,180326389,152696429,215138502,112596025,219243547,90700803,187412435,202469077,194748700,205545055,91041719,175298380,124556528,176700507,122292598,175334528,116438547,155443008,15439395,174866331,130684475,181547288,182401865,138108708,154843423,85436197,29576445,76643486,153264772,97893963,135323671,150619089,142598575,21926199,98969077,29098641,33443003,142957691,105971448,59281659,33551876,16297426,30440025,25807119,42610327,21469205,33458339,84542880,28534252,21089219,112578012,85170052,63172203,47571031,26682924,106961801,92424604,80808661,16128694,9237112,47940349,161679647,19675870,112099774,116412043,188273028,127169087,66334209,56637669,52690690,80018213,14087270,39736606,56039856,21957826,100462369,69303631,51416299,70652594,141797364,178060134,177449553,105468025,94263219,58173133,120998613,58809331,144755081,20346956,95377752,111871312,44588485,57107167,100123541,17717614,53790896,103603559,101314413,89202391,24775528,21078156,79915337,64160613,20033989,92726549,22910295,90413046,90222460,82949914,44345790,75714954,10810102,27238177,17461978,48990944,53555106,41364508,66075817,44635677,61197829,44409004,116683144,146169170,67551899,40441273,17685964,25082055,23592970,75854417,114501238,17753033,67602060,58153957,57947582,66421178,14075928,107012281,252008932,220228167,47127694,643443,110843872,43598400,184095031,35927341,118201804,15093629,32613163,128260991,115313754,116586108,15588657,88065828,96827376,83529586,156888235,116677732,121131508,123255923,122928781,146066986,15941979,92753384,6509832,78526519,113586227,120786203,108725613,54387680,70565268,99693145,54829997,50374439,39730630,26565946,94743748,91855786,92732248,58166482,33801019,72857723,42581523,19397785,184966814,84554399,102261535,188161678,77324878,52369385,11695892,104864058,83200505,50745015,84276772,151554374,21380243,134805090,134805458,149186514,99735551,52171246,33465070,15639696,17774849,68663578,262625092,260839582,17482727,8933572,120965579,101779040,14407089,46073276,84352985,250495841,138822469,242253020,17293897,17160680,18745932,122994639,18549724,17342842,34351681,11589192,219617448,77739297,118757160,41094453,15355775,53712478,47685065,17152685,53256946,12716032,80330381,50973651,68164273,16237067,29173795,21014078,50944856,14582087,30486243,110384744,46704429,12757912,36916922,17381587,63129578,18185582,6463042,4047721,43533997,37629039,75896448,19329136,8805142,84620593,50026402,18639738,16832632,18267528,17289530,44582818,44317593,10807392,12954612,21213956,22179994,23326165,21324258,1468401,11347122,15234546,21131816,20067354,14094741,19538986,19923144,19637934,12687952,14862077,158132144,48100887,265084763,130894721,46611379,183764416,143244854,25521487,249308339,113425681,63796828,249492639,231653600,140223443,16913521,94618543,66237835,35080643,29461062,25798558,113101267,17874544,30464067,104249727,23018333,16667262,18220175,71876190,23511303,38002432,74021398,20495104,27041120,19359780,16410830,14302111,15235581,19343576,59542606,15185743,15846527,93122069,76396464,159959486,18432691,14668524,21724225,24868643,18458309,15774871,14872190,23713968,16037825,30915787,205558062,20691661,164912274,166679800,24148980,14841371,16364793,14456675,15573613,120194563,32588032,14702835,162501940,24072419,85339466,16909049,93306677,154511355,98929990,20789748,24647812,41822025,26095370,30006160,153407964,23314049,15907720,20155760,32661253,24019955,25705373,61903300,648,193402132,48115928,119547230,159190716,7885972,16007259,15924107,133238164,73057335,67024873,81731553,78727701,76439500,76089009,72358084,47938250,14402059,73585885,47859817,49548303,27216225,14909713,52612298,65380162,68141256,68909328,50648473,31693895,15354216,38088246,64666523,48588200,68619238,57591415,70175558,91935208,19394188,87021276,72506502,20536157,17728580,156347053,70300310,59019406,80629654,213988401,103061572,23006794,135109677,136594811,123822668,176295495,18735898,181041916,41552881,65659343,106310615,165109462,161548943,108252113,118893858,49657979,25365536,92284903,58547872,40255499,79708561,66306024,95385188,60011582,56913467,68098838,41790315,55778775,58387890,22868769,52603975,37750850,27205294,63982262,73303195,67836471,16423109,19662280,22563769,14506253,10350,24019308,23680913,9930742,989,20109973,22461427,18002296,62295319,14824849,16829827,22940219,15916582,244766451,97720704,157439536,86250341,29856819,71735224,83482914,76975035,20453602,122584206,92820744,81162841,118649107,49320694,88127502,118462618,115946251,119298031,18100055,100784944,77436150,80215266,58813207,60162223,62909470,80857366,80346657,66153834,75647231,75044398,189484942,69826593,183147986,166985108,158639282,149003180,150970545,18306168,131258077,140393642,102979395,101028230,89251621,85329365,59776155,23375688,16409683,14230524,85603854,3004231,65992743,57024049,159992786,21289311,16933337,67995848,21447363,50751556,70659694,114259282,16023803,69382909,101675562,60349064,57405030,71298473,71150249,67224001,65026150,33931356,45866936,36022940,43871900,45001048,46619479,21986512,18626887,116423810,22179561,41025510,15499660,16736293,58460325,100808813,45766975,70631526,39430669,39081659,53041067,76496142,66099575,76271580,198608682,43151567,18924866,17344361,18929773,15749983,14462907,16548435,18393089,15290123,14244769,188265738,14587429,212099989,97867221,27385146,45533787,145773163,87735340,48928735,40056109,61232019,62011530,39459292,26705065,62678144,69813431,20357019,79406971,40536441,13262442,20108560,46079391,55840387,15521075,54952792,52671787,43869144,18850433,18105473,52309424,51964156,783214,15825547,7108192,43542679,46103496,48059907,32825358,37224882,34284467,37382893,42825044,131442917,87942403,68217476,78659919,87994171,94523999,118649170,106309287,92945681,184861637,103913173,168606635,133690223,45022448,28344257,75226979,75196163,71837973,70023070,69044253,67902548,67091953,66981966,22913560,18174776,22125285,23476572,32479720,18175567,47174655,46085006,37352470,28290073,62552583,58136738,99925059,96751605,80285804,77445473,86896314,97186138,100778293,51475215,46392686,17800797,17006157,48410093,187507648,21551205,27781484,15076390,16578309,55422711,23524507,68359712,68071114,17732661,76986804,15766655,16324504,48907063,80864151,117333266,24409746,98816681,129724841,165174290,88287823,147293037,96500633,128124036,156003699,162913443,79453919,56143594,70307067,20584873,72756542,61703078,52655094,77447919,16382194,65054736,67799531,42826666,28824737,185122050,147001819,131607491,88718155,38740909,63509154,95943913,112508240,22534086,18393773,22548403,18131039,15573707,10798802,158645880,18286505,19399719,153051851,117426684,47715374,20326436,33402909,30055564,17600124,25053299,25166595,22536055,5920532,68509753,31194444,44462811,18450106,11018442,20040003,69615397,22171728,50687788,15162193,41338503,65104337,204491797,60054156,24047612,19628693,29271362,42712551,75014376,16147150,14620190,205789121,14275290,79320096,14365068,18735040,58582341,18639734,18665800,759251,19790261,91478624,98841303,113676911,42519612,21008866,18673446,18222770,18222599,15781432,14379501,18845207,22912596,208019751,20727198,18127783,5503832,268163148,29483675,109713729,42858171,23362483,37440098,53756003,249941736,18704659,184647530,122444371,21464713,90420314,106292326,111463209,25013763,100381915,62718650,22110491,93935921,94360081,95515846,96851347,111690277,212782910,22992332,23520739,21267612,32087193,58513684,46570247,115796187,28638191,110886577,110867752,48363171,32192094,26507551,27592116,43768449,56783491,32749378,91242884,55403897,76052943,17372853,22307857,46297163,19383317,27778893,24285686,19329393,21111883,17919972,36726043,28094035,192747213,44924709,257262765,263359642,262734281,90537810,17512682,128744229,22270337,22964152,21950364,47462001,102441113,45215683,247008365,14746298,83723250,1503121,22135992,102476260,247752114,225919331,16971999,131979670,14409497,15054622,170186151,14864693,21597063,242153407,248815930,20756490,157778204,15261298,260064723,21633857,20212370,254186356,17694756,37396006,17911421,124763921,13821862,21062471,24363945,153168449,14260037,21303400,44494276,5568292,77026540,18025954,16898332,12372812,126722715,116387194,22876454,178071411,110905570,17235724,22505100,239571244,136568901,15447566,249854076,17783446,23334601,142901066,57358233,65533134,41911874,112740986,232880095,17599795,17890304,157380172,87313007,258014885,17416700,14198654,259345913,1240681,33935084,21217402,32623377,27275087,15771056,92394644,16522352,1947301,91518617,188154135,241201438,197594920,13623532,36719281,188591875,234826866,138787319,211921304,9855382,153149305,115911638,166252256,19674502,86697288,135230178,153810519,106448460,48706233,13491312,104324860,82161632,117821354,72631996,47279525,18990471,66369206,67378554,18215973,110823581,113420831,105174965,63246339,54546430,40300027,61853389,21857760,76936279,78408666,76348185,62595437,49706919,54655632,61762853,43920155,44615672,48398586,78138151,33998183,76091017,21783950,59204932,44777959,24752484,39657463,14224719,14091091,17961886,16245822,16116713,14350393,17409240,6450322,14079839,9109712,14287409,10894562,14616022,14479720,14505838,15170346,16028241,15647676,16669075,16027546,15649433,18020120,17451742,8487622,15460572,15743810,19109172,18923032,18058609,20456198,19006707,20436059,20179628,20437286,17967675,17974455,17968875,16878156,266830495,14858000,176944763,128225158,16571326,123974480,20776147,16625082,51462013,48029363,65502117,27311044,43881089,243000634,136389258,238259855,160276009,248945322,29235589,20221159,23199697,63877097,36804447,132986633,130945778,17856665,15826822,222140591,27948604,166608401,164383046,244212645,13215382,21844854,24404039,11856892,143798379,180090338,9717322,20733972,33919343,71022729,17494010,18365936,16664681,12701412,26837501,18080708,3952661,52954222,76707735,57770539,234979566,15017893,174263663,106514937,61220477,115979444,43409523,66873399,46469610,189550099,14226495,123738314,6449282,16125224,57768217,14552725,21768766,49620730,15399512,40260612,146167871,196824385,48388417,7212562,16326882,48043084,16081831,23176276,134843187,97474887,55355654,15670644,52774637,173999134,16261445,190298721,4207961,21840737,30907284,136550204,16224666,21146211,258928816,241990563,28260687,259905595,18727438,15371215,196631065,41219840,95493980,15498429,102786512,180859549,14677751,20818801,16250929,93069110,18891923,19670455,8370082,16640598,93876666,47688440,29795115,14327933,34022952,17391507,224464606,32155953,22957302,5392522,46612327,114497182,109287731,17169320,33986037,14129299,222205782,224424419,225234560,15265641,21178616,22771961,18873711,14275426,24295482,21457205,15901113,20713061,16789970,73238146,69181624,22772264,79743108,47694175,21674448,138479037,18686907,24892146,49337217,21699092,17786474,69218324,49573859,48038030,171129408,14804435,28479007,142419647,198504506,46764631,68801460,16340501,14413466,4119741,82643763,89269664,85591466,8979992,42860402,39524002,46161313,21334893,40906518,255762643,96496611,76653027,14836128,22602881,22295473,77215251,28428838,121265704,47136500,95243532,59738746,18773422,50156221,37016784,234293544,32631520,252726343,24954899,118805793,15244981,38236362,31072439,43907446,17373943,20317201,31079712,70758813,33838795,27233809,51258903,37951729,50415064,37395827,30022594,17214648,88295976,20651632,16123781,119957997,24024655,33601431,23090525,37683259,27678902,43766620,17721514,15353981,18619979,31058793,39578080,22571950,20854518,19330213,36106105,47401289,22780988,25324156,22646228,29711814,31468242,41438347,29785315,31606227,46229563,31967467,33651080,39726700,24949726,15353815,25425425,23629827,22485793,20537454,14325369,19728030,22044289,24927543,28393637,29867516,20829172,15386108,22303692,21128035,21699429,17198500,15131310,5741722,5248441,237488632,20407539,17797448,30704824,153741488,15809249,29028408,19212009,56051834,14062236,65495877,112624301,28408675,59058446,33755676,84370522,264459407,86387057,17837853,24741917,118752616,223100019,93783780,66593978,53162803,192019265,17782766,249253283,22330646,199941365,102271048,43018238,20012204,108797271,31119337,153474021,235083242,109660695,49860231,2425151,190170608,49861057,169686021,17771860,49669034,122737411,66891455,188468721,50557402,29840295,229965178,222708016,244243477,15934926,184596818,65773523,15169907,110840720,68524064,129575180,101928415,25626894,246163327,207010069,40735690,230547432,141326970,28669612,39015718,128090469,49197437,163126581,107818778,16929701,50531062,53193168,39936193,44821096,46829683,46313918,49034233,30124262,33009321,28067175,35624959,34097876,26892157,30043495,21119636,79039503,17869798,21619519,40047532,32765534,14412533,14246001,50325797,14529929,12811952,7313362,14688489,245510831,14498484,53197137,16212685,47964412,41144996,816214,15008449,24784891,27589818,39008816,113880573,930061,87904422,1365841,1501471,14413715,1242511,14326840,20333461,657863,1835411,16219126,3829151,109275829,81890700,30345764,8442372,41269920,18840896,101844850,8953122,209792690,40168339,18915795,15664289,133578965,13213122,263767927,71294756,120943272,137466464,40553282,135897307,21348880,18227519,21111896,52772661,91888403,25355724,247033019,25527966,83590349,117928875,21251668,234629912,62915920,17095452,14245378,23778898,196272316,248427077,36554387,224119738,17291393,80638910,40892016,19107878,7380872,56700964,50073507,63004503,21268897,24431892,21307076,21982720,18791763,27901418,18172905,9532402,16037856,85131054,15210410,51263592,43556096,26624619,130617778,52070270,158414847,14268812,92079350,24816848,50323173,187005921,183665224,234521957,47260400,18678924,7651862,58601997,23544596,33792634,91181033,14603515,19071682,20064236,7768572,149217510,188975706,22169931,145492157,14677919,114589771,14693823,15519544,201870414,17781165,14702533,13784592,39050020,167181650,153051008,14861285,30261067,44177383,65647594,28143425,15194013,18959163,28144343,16228479,18901122,36958781,187319644,118022756,16129106,54885400,1068831,68539526,15339201,1344951,2890961,119525215,85634177,7152572,185644175,5561412,14434063,11270992,15024563,16256269,21157904,15572155,31182553,15309804,16160352,7334402,16129920,17912982,16598226,35373847,56705653,250831586,27529145,26618468,238073909,40626514,234132208,235120302,20402945,216431881,35989792,229686366,23539358,169608502,268123287,225278024,261835843,28376631,79293791,6446742,56518880,18915240,180188862,17200116,32948225,25398075,25677409,41737887,18987651,29952857,15537451,24322371,19877186,23967578,46252298,28471026,27406401,33549858,14817982,40141037,24472704,14860448,26933384,82688781,35133435,264938614,172562639,4170491,17292143,16347506,143664435,23065354,19942991,19410817,229129090,237620586,228296619,30959811,21656599,261555701,29365013,105916405,188639874,173581919,22831662,47218790,37367041,16302242,39288259,15126367,25482459,19867571,229390349,35586563,105029433,183089853,23184896,38127255,16966162,151159882,217231723,48488561,23065057,231389485,58294430,254179370,127750483,5162861,187279871,190064796,78575667,21077000,25174913,94216840,108788135,111635980,23593446,267854863,78445977,232992031,163570705,150669746,14342564,19658936,19418459,40959360,84388888,30343716,260845829,263815367,193732179,221792092,19716781,38254095,73181712,65707359,9624742,234489403,248735463,168673083,247486443,249410485,30912937,249348006,246088673,235373000,246357149,210926192,158398519,137823987,33530012,28602948,236511574,234002627,153944899,85396297,240812994,233783568,199325935,119250381,237770636,161791703,242892689,239949176,242426145,229873610,227341953,216503958,24721108,132201224,217547360,208692908,48086014,30216513,80612021,126017061,148453195,192957076,24745957,188019606,24232525,140519774,103922067,28582749,39249305,16583468,15022633,75124560,125445568,34972633,84119348,107545479,138770045,36948268,115111016,153865861,28599820,161411080,33563161,56864092,137794015,150688795,24913074,27698046,45659261,243721842,88507556,247334603,16329577,18812301,12671692,6527442,53981494,22642788,5446992,24619328,47459584,19824430,10371822,117445762,128564422,23534104,22677790,242360696,19891675,54299209,211588974,20065936,74880863,15210284,105445396,93722614,53130511,24844525,14824473,18644548,40277679,159362503,23384516,119079595,20182089,203681891,15093646,15808765,47632905,98425584,15772733,20177097,198106297,14992591,21623531,158412741,23220034,21757965,51224438,15638637,150203689,17612289,104937383,16955870,17299063,68773980,176280674,19278537,95281047,12340562,16440738,54905208,10228272,24024778,19541375,7855622,52254824,237562389,267847170,202594374,8180562,258968955,110757459,151688371,54290523,4262791,6793362,198571335,215396550,144121755,248390035,216216791,119169760,124635414,238144920,236108642,199730264,46812100,220882223,143822047,15693634,172717529,147249049,133727476,203728509,731573,801202,38299186,239200202,125492611,110905955,137751549,71918994,205370456,81189096,37083812,37267497,52167970,4778211,175083160,8105482,118256463,132704571,16160650,114510872,994431,10052972,16194793,13860082,31237389,48920331,54578941,38100497,140935414,47850508,122975302,170316873,45497378,3551701,150115256,72972715,148939636,1120611,136414500,140538938,167624175,174027585,27641388,62015732,70346167,780159,175881965,37093819,19646485,140856303,77338274,187246188,122783603,50471406,11202862,151198642,76326090,104861015,133722469,121613012,47616949,14161128,819256,221019516,9766032,13836402,162104820,618593,47402960,85851159,142684449,63125213,166510517,240615854,269394406,252644913,38371041,136609376,76282043,70418699,185493136,17960806,38727269,234587458,31088763,147886032,265651031,212940155,40565664,201000503,197539670,170474186,263060411,254725084,89866540,130782156,223976506,260391020,19390708,124856935,99698716,52256215,260728760,156769030,40866430,35282604,261151943,58777905,150957265,86351265,179719319,189842436,204918670,203483715,258220853,261297092,204444285,54171939,214020774,89187513,170581751,42612282,44337867,212342630,105542341,190776107,36919153,134781128,47125833,244901765,242245196,194186291,242048469,245106788,215967395,140766042,158795894,116688855,19975710,109892942,95666954,32848956,240815975,77977289,216335730,256471145,247004791,175867315,247424331,107021348,29730710,38146165,237670656,191072401,125075147,15417049,219936176,41190759,179884440,41132274,253520310,73191618,40850899,42981176,112184431,87767749,162022136,193022239,186967108,84145752,227058289,641253,95994080,158848407,742143,41842768,242201454,22240612,19386622,23144737,20379995,254377876,16489591,30335156,13363282,249529225,17625292,151662442,245935903,82274636,15426758,17918808,136358789,169793441,117724817,15769161,157613794,20696985,216577574,116559622,130681675,74594552,7081242,189276359,96265654,20460757,14749606,231362108,222343099,235389922,36050060,44518888,115689700,235295798,189621457,226370793,15960627,20596269,6324592,82778942,43192807,34716594,36803580,52481056,11891542,199187259,111059621,36355699,56758031,15313857,17637614,20646277,100570170,24193017,117984119,22692199,6033532,116828327,20731304,106565458,83883736,17637553,23679582,29256864,214499442,140964771,15888570,44062282,18713,106145765,15255133,43333230,20998647,31311757,15855630,21008297,20334391,39348813,41264383,20660560,19343562,23116280,111444854,8039442,143454922,267206035,142977623,101879887,17565514,28932160,238480106,253632034,87471023,184910040,19226054,160817572,87650675,70246837,36600922,240860245,160926944,41329637,40981798,46345843,30563759,114225051,219920508,18918698,189283341,72694909,15790423,22440038,235120811,222658804,72547608,68838777,233749221,215952307,26668282,151895837,25456478,216995705,87416722,17612928,176417690,41218028,46715620,104571475,91390383,124283714,19026618,49393335,18950761,21526685,134206307,90098789,34192959,186865762,50600062,126418609,135303236,14522546,64220324,30517544,24083587,21258330,38475205,18953259,101192354,8973062,18676177,55544943,34108173,32360145,140894997,19818494,12085042,40389429,19982092,19524590,31239408,21869297,24450456,250463314,238295737,236917131,32397000,151631699,29257521,17235825,82960113,42491255,18044981,181988782,40058557,63350773,19320490,27246723,17119099,95921754,23069301,51865695,143549770,84460847,223724339,93904007,199555388,81929393,131727620,82444043,162580589,171998025,198334225,91171720,201426746,166316761,231206630,125382460,195885307,113678606,103000558,19019263,87307633,18860342,14705240,29160552,2885851,16185801,210693321,15276510,27762127,17147539,21019628,74712538,131315786,15012486,51193546,74735391,18335809,24796185,123331001,73134924,17193794,19432557,24872865,32128783,16622151,83939091,15321447,19733892,14517899,18140091,106506464,24663730,18145841,31282081,16324533,40894238,36893069,74502636,36975611,21375959,29337732,89642493,41407443,12138162,19198326,20692503,41135906,23532159,18148242,14326661,14423603,39806829,133966853,176467382,14463913,52185212,60680365,46743554,87989022,36143299,18815810,34002530,41888469,125415597,35667714,191250892,177343973,165802132,191798541,253342708,232878783,72168780,239613729,136353114,112937614,132618751,190475202,188579340,241837295,241805454,147609274,87871306,235433784,128295129,86566274,162571150,99002460,169719009,240858138,131246928,83900124,168410930,33623402,129254485,113593692,84358766,229643491,50841062,32730931,112074504,138871289,130734452,239623882,162254325,83237549,92817267,197874533,145476646,235792533,178941141,215819570,78145300,148504230,51646374,44758790,206922924,63896875,91437380,105320972,208777407,103080971,231279879,101844353,102607698,26395300,30152403,28050738,94225402,220252762,29091792,24967668,37986775,42082057,74853246,122158761,16234406,31115275,27195114,15942017,51158957,22498452,32482524,40286007,16870682,23618512,23832022,59286800,36860144,30022428,180999717,19028953,53153263,31668714,24621955,100611064,220675169,79551906,41676395,66714703,111673770,97807215,23819304,28605533,25695492,29537319,224325820,30810014,151291323,22100074,78687792,128472974,30113082,18803364,66003255,50572630,213840716,17915334,255591392,198613270,249359076,28110915,30630900,158887925,47945698,36425615,30689667,21541542,118612929,60401740,42203744,101578775,233697048,24293519,19042478,183879398,38773605,101424299,113508007,42371693,28702869,201445961,27060993,59606882,94370073,197220548,121269271,233395335,145742095,237955563,36897911,106375253,154423460,39002053,52963095,30441816,48649566,93928566,27198826,247495144,212479278,90721189,28303502,158393753,24100737,164818150,87623304,201989827,128078136,25465958,110473412,42099565,213707240,19312934,29533792,45419359,63311667,36126354,30228023,70343136,22733444,160027905,49666182,47562163,243010689,78507740,243516650,51219197,20955713,121512266,179767114,94784820,110585067,23586754,132223511,62644624,8713702,2148071,616113,7315022,822112,13348,16354128,45547548,85616044,10085882,21126003,16613870,65640210,11557642,14208914,14270999,20399609,99961552,32472561,39274758,19624506,3144281,90421077,7857652,46072516,4560311,97023892,76058815,5876342,29949063,1424831,10094,63133380,21786260,75616437,9745092,19226849,40880426,18484100,94587503,34406259,1614411,10016972,14140571,16044191,113116513,20169131,20217201,79502328,792646,270668475,53533566,157070412,59444261,39902637,165326620,123102622,265000864,52597678,3346971,49205840,112432370,167033855,129017028,132005200,58334111,93516296,168841469,229710631,175338338,241205936,116510779,84457604,264366428,52322656,234006638,258428607,168537997,95800032,143442028,82430417,84034563,13041552,185030770,32014425,85450815,29736553,53424957,172178623,14120187,67132984,74610929,73215961,140722325,65364950,71112180,112307482,122231942,189320995,178118343,251231223,266200838,70305266,131440156,262900579,249631963,33258173,34784263,105597663,101387077,37245723,48630048,143505274,153245594,61723876,39140545,215858327,168243936,30380192,39187698,235394328,62052874,143257853,77395953,26162191,191685671,40405121,267433029,50856543,168696795,84677497,30681499,261294795,265696487,183144850,127040414,29910549,50390358,30587056,78740274,19804553,53323885,32618047,49900650,144021058,241008487,237897985,75037065,203771716,42104946,111489389,227209417,44120318,234212815,38073608,97557168,162545959,108457267,261884913,204209879,129816753,121186812,161576069,38285013,30995591,60353632,30799610,181080474,239217629,254707723,212340162,46930437,257939339,32820524,89568746,16978961,235381394,93460034,21786322,175291914,185343485,76187493,36216833,46093969,164180736,95261792,25947430,168695261,156972971,196630780,102900059,62863521,25742007,171240557,35332069,50052920,127788481,111398486,254240011,32218646,176536528,139822631,98766044,19487670,236787450,137340994,128642272,20979106,225911802,248794623,75130364,264408558,15947602,225126598,237003654,19595532,265987070,66437032,14082828,11599312,253284344,78436108,122694066,17805567,207612806,34925804,171514924,188028765,122459707,95534157,217073451,20769938,223842388,29717916,259108421,240182448,248314631,73073062,164401022,57924802,20703607,251219812,198240811,40010200,228682873,47914608,777554,243833570,196167925,44877467,18212565,110950574,253664708,20767246,201747039,143415291,79743060,127635571,246738309,121845291,20508222,123979525,47650483,242655310,252488810,253069382,15020865,166926744,89702218,20081488,90276942,174717541,219716235,15906176,125378095,22707793,53454398,141598993,232821256,20906409,27066806,227631390,58842819,54937915,63968007,18764912,246263280,243920442,37376499,207707329,135178205,139853350,236581562,244593709,146787633,20373762,122714443,52487416,192218013,125820691,238956217,148354084,7064802,61133,14298281,20490861,809760,1493121,14336349,38073,14876536,6790312,205700196,200199318,15649570,15184418,51800278,19644571,29432246,18058704,175899673,89998965,51988166,87795815,14570292,15272882,19105442,7035532,112490145,13768732,47700913,241475497,157821181,14562275,20142938,180086129,237325382,16818325,21873261,241043955,16527490,240810497,115410741,150323560,36609309,29093456,17673732,16096448,216338061,235317040,63585408,236819345,93010530,190357418,240594439,239807683,165693574,211897738,112489818,47932614,55923778,225657320,186155101,16352458,240402102,71239119,34474398,239348795,234645414,240223810,141222424,240180276,15036744,94973632,54699775,196811548,141857015,238673608,97616524,224969703,238109234,231493350,239309716,238638637,218570379,39514187,158951763,16403358,239477369,154541100,239609301,239399824,121708861,195121864,72931614,142039374,239127196,233688482,107780778,20606286,65719885,162778946,95200214,215707078,156950036,15003756,7150932,71343661,11935332,200513252,79144576"
    else:
        
        sql = "SELECT `accounts`.`external_id` FROM `peepbuzz`.`curators` LEFT JOIN `peepbuzz`.`accounts` ON `accounts`.`account_id` = `curators`.`account_id` WHERE `accounts`.`external_id` IS NOT NULL AND `accounts`.`stream_id` = 1"
        curatorsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        
        users = None
		
        while True:
            curator = curatorsQ.fetch_row(1,1)
            if curator == ():
                break
                
            if users == None:
                users = curator[0]['external_id']
            else:
                users = users + "," + curator[0]['external_id']
    
    return [u for u in users.split(',')]
def getUserList(demo=False):

    if demo:
        users = "133880286,149198901,101695592,77779709,145125358,135421739,24417275,50393960,131805814,8161232,31348594,128155589,132385468,38403110,123287206,22184352,109275593,59808841,24929621,18839785,97169575,87118217,87170183,101311381,113419517,66606024,56631494,23047158,813286,70179948,24705126,23088711,17842366,81634880,73571482,55593594,80648576,65076023,67227697,83683860,82550972,71281224,31711374,67003520,80932205,58270815,78589660,84770446,43047397,66974773,65560119,51807659,71785951,36736716,81037355,85037143,38323168,17109111,25764613,15989467,23406744,21801881,21424347,22765820,21043020,25966862,17386471,21575516,27606345,30632568,39157919,25477690,23728664,29498919,26911579,20752370,24110202,24326028,25574567,20061192,16354940,19429373,21990842,29625563,30204765,23328321,18320874,30313925,21370475,16434558,23233419,27488712,22543979,23210425,18346856,244655353,27474384,14171957,27583520,131144091,31732799,23551895,27554803,17494118,8316102,42132842,28682172,32613916,25768462,29971169,24721128,19885070,29297677,18287787,15764644,44621097,18278092,6250582,17672407,21807738,20314162,28637856,16627822,18501269,17104244,21225694,28353149,14959344,17825734,17429421,18824182,25914362,4358521,23532875,25154750,14142269,15090987,19415715,10552522,24914623,18444963,30341243,14715999,18663330,23686579,19099133,24333126,29103898,27982454,15690193,23237369,16672734,33087265,31580676,31108370,21113062,158769098,16031927,20714308,6017542,1880781,78945513,202022019,21313855,24618431,20073874,41634245,11134252,3135241,147725246,56413858,20545835,14345062,40116885,10774652,16180004,114655637,60680268,39308549,69773058,73402979,196168350,37764422,103060329,19416225,16555200,18451200,20094138,28195764,4898091,16311797,65146567,19658614,1917731,90725320,19659031,86364947,15450996,7228682,18956073,2704951,46002396,18513241,19611483,66369181,21186125,10126672,16244647,8775672,31310158,28576135,36681590,5988062,25395727,16869718,23725283,63150856,15988710,108351678,20024413,2467791,15918353,63395170,30411537,87103895,15985455,14669951,14844867,57428429,45190383,9300262,10168082,30864583,19417492,16467567,17546958,19553409,15918708,18229117,18216752,26533227,7713202,5357812,9508922,18916432,18217624,18906561,1367531,15675138,3108351,16032925,18646108,23970102,25320005,17454769,22701818,19899088,17844611,156783914,21721528,43008595,26429932,41073372,14553905,71028123,16305330,39803397,159875784,17855061,34346559,16643805,256865137,21987613,13601372,23388896,28706024,51893943,19475309,88663606,138135579,19317307,20532520,55656355,39880788,54579254,15416739,6604072,195223934,65629866,74231747,15883275,47956412,13783772,17998573,189181305,36775887,14619211,37360911,11640472,14089195,9695312,21052372,126707999,6836602,18185605,21466569,113436270,234814458,14085798,23023236,70858251,93819415,153830702,9636632,15430198,134537602,61621735,29899836,15394688,46241448,52786408,73060532,26765003,39588173,78377692,4517721,240892859,22696335,14253479,214677862,10122292,262246353,18510860,180505807,146993421,38883394,33696409,16476000,213796133,243361655,17667607,7995512,44366931,19374728,246913104,141335663,15066958,224300799,150723226,45564482,19195914,140882350,134758540,15117711,18990065,14411731,176936320,225847395,82149912,121836937,26520417,10876852,8383592,1319181,40519997,32518531,35677276,140496824,14079576,755756,2363571,811303,1586501,12798822,23838203,14981380,211891109,59477295,164725958,48499629,14514804,12101862,8237482,67358777,199097888,196352451,19658826,41187603,145753059,34147744,28520903,18051038,15846407,124245139,15591578,67140324,14109167,1284921,29631095,15891023,115230067,16204491,114870386,14179105,15719490,15645093,11336782,150990112,1652541,18164420,34228779,115699970,158865339,18956903,26642006,2195241,22569599,30815404,765564,40283581,19289684,14019652,16361348,19548547,14138514,18875991,138594660,60025762,16460799,15155313,13027572,71051839,6129732,11880802,18130749,18201016,20689408,11348282,16376711,17904776,15205798,46249360,26921818,132811099,52848021,5360012,14281466,22802398,43194755,13058232,12925072,104528362,16302090,135962109,9855132,45577446,24223641,121812725,65987618,21414576,17439893,22737278,11178672,14150661,56792882,24807616,16029780,30544925,33342154,972651,7581972,115710614,3939681,14173084,21875834,14266598,15639642,16012783,27911608,10235102,16661296,7215512,19621110,38218953,62591681,807095,14634720,27740227,19058681,24457744,16356190,15224867,10340482,89309995,17220934,12726792,15458181,54176768,56495963,23596644,15223265,18713552,13201312,37703408,15575625,33248363,107318424,133093395,19929890,56687786,142722336,53556894,52339151,45796568,57928790,106034413,171117515,21425125,137616033,104740065,44599982,70903729,67712707,53355887,108936259,95917731,18681139,97865628,15037033,16425333,56336341,57998991,124258971,171145913,86254626,43017766,99642673,77888423,148248527,42908132,88856792,87484569,111615736,122995784,40884854,36412963,56312411,56685329,56304605,27602673,155151279,37168231,177643755,73635568,24167314,6708952,116763916,19915362,75069067,169021842,14499829,140114710,36683668,15173291,17232815,17539739,68307227,34889448,19918047,39511166,95938895,71873770,18038892,94143715,5120691,12369372,19038395,16120265,18091904,24741685,19683378,39514461,18225966,16348549,20602394,43849753,37926315,4432431,27830610,97219875,33933259,808627,15234407,42226885,35593203,16190898,20214495,20032100,816653,749963,14511951,5763262,14293310,17074440,9460682,21457289,17899109,14874480,20280065,14761795,17004618,114423418,75050496,20224928,6799042,38087951,2184141,190929822,16303106,163866503,62770378,22484810,105675866,17940246,216004224,15485441,125463397,177168334,41814169,181936784,87429873,12044602,66883090,107544554,24671422,140055917,116290235,109740182,111186088,111499749,105122834,26530138,115474823,122474200,122062166,122553145,120641563,96400556,112065034,14435000,15677252,119557197,56115426,16684951,122405991,49989191,28573657,69570423,107139187,45553679,41355103,37632896,43275808,56378022,19337439,78262617,21235661,38143441,52437460,51096668,16350228,63681020,17536755,28114190,23775585,19734494,91083585,20626359,17872077,15378041,81690879,16454856,5172121,16489986,14866691,16232553,15745139,7744882,16591424,18302410,22927130,28333890,65276277,24447643,5768872,19655584,32561466,26208862,19076596,45567297,36021653,29525603,33235771,35846777,15364197,11396522,21695241,21466703,19529816,14322435,20598533,16694619,51376979,195703895,181809164,135456572,228240073,34131007,34197952,37034483,428333,5402612,16542390,112683235,94816809,28694130,70931004,90684808,85312583,90680573,18134734,80543346,17231318,14582285,16143542,14868835,40885516,97611573,115341081,132842874,132094623,96283286,121440641,132859239,94163409,123774113,48511486,83611972,18872373,76294950,198478747,24177024,98932084,42604232,61755650,16542626,22262999,50243043,53718666,113278275,138002210,146385708,130949035,33891322,29951362,50544680,20609518,159039098,154493261,24008967,73638336,145338127,68399705,119326030,114785416,119327640,25478634,102594253,18863815,63438962,117732057,43568964,15044593,56935361,92211391,96101258,78941611,113681700,4836211,92332298,93622144,65036099,79415553,51690631,42464353,32862653,41097723,71992917,15106168,38479920,30829649,16507391,41799998,23061619,54464822,31109504,47753886,65759352,55520719,28011481,18009267,16417648,14620392,7104672,14774255,33893248,45162347,127034693,194534748,110360770,54902524,129262111,97614907,18628249,18962189,18905875,18955181,21387965,12873942,20139348,18349474,19782758,18908190,17643126,12765022,19977028,14914525,17751959,10401102,15523506,16700571,7799412,16042788,16704472,18536826,8039622,15504971,97937152,137418466,155659213,19895282,117063289,92708272,181383752,48410382,106345557,34130969,15274585,92724677,121046433,69914008,196235064,70613646,215572137,121224923,155842625,222339038,64678911,106996530,149593459,104540706,166158304,211425214,74783784,206871837,154576522,99448420,139022142,108998148,26264871,77222698,129136581,16998020,103770785,119601029,30631766,87174678,13838562,97396580,21453086,20136728,66778288,35104773,56761423,48277760,19183953,233287824,18222378,245891639,291,13,20,12,261467278,266860992,28904686,259601739,82952848,269649865,57865199,177468546,29784580,22223740,254211714,54794774,258539150,156828337,34775386,21200792,96821243,24966698,25253084,21070607,112517482,227152801,258691740,37620532,266259739,32837358,263319842,36139712,265820934,247424639,237279585,175157275,23779324,51670525,23621049,159544785,150120631,20456522,31141686,239189534,26589987,79940688,232378553,216299334,21532770,95386169,246319243,253784164,40262755,262191109,191690176,221123236,183023359,258928180,123044254,42663256,97199688,34811069,122160076,47339690,231216037,28041769,97804036,25932078,35280333,259506017,39933869,259379883,258969640,15676578,253322314,253223778,257970633,45302570,55353207,160127025,22330908,237043022,257911756,5490392,191640178,25169275,112057008,136943034,173182035,170079806,70157977,23277580,122826599,99167992,44785212,236021571,32453930,45025729,250551703,95386370,14813320,243070104,17082958,156132825,153684144,23588838,211894963,239160456,81761893,131228928,110365072,44592558,243865694,188039706,242567007,22256645,126177871,20750019,245950301,23125912,23741531,127493777,16857864,21450669,63909970,20014300,180326389,152696429,215138502,112596025,219243547,90700803,187412435,202469077,194748700,205545055,91041719,175298380,124556528,176700507,122292598,175334528,116438547,155443008,15439395,174866331,130684475,181547288,182401865,138108708,154843423,85436197,29576445,76643486,153264772,97893963,135323671,150619089,142598575,21926199,98969077,29098641,33443003,142957691,105971448,59281659,33551876,16297426,30440025,25807119,42610327,21469205,33458339,84542880,28534252,21089219,112578012,85170052,63172203,47571031,26682924,106961801,92424604,80808661,16128694,9237112,47940349,161679647,19675870,112099774,116412043,188273028,127169087,66334209,56637669,52690690,80018213,14087270,39736606,56039856,21957826,100462369,69303631,51416299,70652594,141797364,178060134,177449553,105468025,94263219,58173133,120998613,58809331,144755081,20346956,95377752,111871312,44588485,57107167,100123541,17717614,53790896,103603559,101314413,89202391,24775528,21078156,79915337,64160613,20033989,92726549,22910295,90413046,90222460,82949914,44345790,75714954,10810102,27238177,17461978,48990944,53555106,41364508,66075817,44635677,61197829,44409004,116683144,146169170,67551899,40441273,17685964,25082055,23592970,75854417,114501238,17753033,67602060,58153957,57947582,66421178,14075928,107012281,252008932,220228167,47127694,643443,110843872,43598400,184095031,35927341,118201804,15093629,32613163,128260991,115313754,116586108,15588657,88065828,96827376,83529586,156888235,116677732,121131508,123255923,122928781,146066986,15941979,92753384,6509832,78526519,113586227,120786203,108725613,54387680,70565268,99693145,54829997,50374439,39730630,26565946,94743748,91855786,92732248,58166482,33801019,72857723,42581523,19397785,184966814,84554399,102261535,188161678,77324878,52369385,11695892,104864058,83200505,50745015,84276772,151554374,21380243,134805090,134805458,149186514,99735551,52171246,33465070,15639696,17774849,68663578,262625092,260839582,17482727,8933572,120965579,101779040,14407089,46073276,84352985,250495841,138822469,242253020,17293897,17160680,18745932,122994639,18549724,17342842,34351681,11589192,219617448,77739297,118757160,41094453,15355775,53712478,47685065,17152685,53256946,12716032,80330381,50973651,68164273,16237067,29173795,21014078,50944856,14582087,30486243,110384744,46704429,12757912,36916922,17381587,63129578,18185582,6463042,4047721,43533997,37629039,75896448,19329136,8805142,84620593,50026402,18639738,16832632,18267528,17289530,44582818,44317593,10807392,12954612,21213956,22179994,23326165,21324258,1468401,11347122,15234546,21131816,20067354,14094741,19538986,19923144,19637934,12687952,14862077,158132144,48100887,265084763,130894721,46611379,183764416,143244854,25521487,249308339,113425681,63796828,249492639,231653600,140223443,16913521,94618543,66237835,35080643,29461062,25798558,113101267,17874544,30464067,104249727,23018333,16667262,18220175,71876190,23511303,38002432,74021398,20495104,27041120,19359780,16410830,14302111,15235581,19343576,59542606,15185743,15846527,93122069,76396464,159959486,18432691,14668524,21724225,24868643,18458309,15774871,14872190,23713968,16037825,30915787,205558062,20691661,164912274,166679800,24148980,14841371,16364793,14456675,15573613,120194563,32588032,14702835,162501940,24072419,85339466,16909049,93306677,154511355,98929990,20789748,24647812,41822025,26095370,30006160,153407964,23314049,15907720,20155760,32661253,24019955,25705373,61903300,648,193402132,48115928,119547230,159190716,7885972,16007259,15924107,133238164,73057335,67024873,81731553,78727701,76439500,76089009,72358084,47938250,14402059,73585885,47859817,49548303,27216225,14909713,52612298,65380162,68141256,68909328,50648473,31693895,15354216,38088246,64666523,48588200,68619238,57591415,70175558,91935208,19394188,87021276,72506502,20536157,17728580,156347053,70300310,59019406,80629654,213988401,103061572,23006794,135109677,136594811,123822668,176295495,18735898,181041916,41552881,65659343,106310615,165109462,161548943,108252113,118893858,49657979,25365536,92284903,58547872,40255499,79708561,66306024,95385188,60011582,56913467,68098838,41790315,55778775,58387890,22868769,52603975,37750850,27205294,63982262,73303195,67836471,16423109,19662280,22563769,14506253,10350,24019308,23680913,9930742,989,20109973,22461427,18002296,62295319,14824849,16829827,22940219,15916582,244766451,97720704,157439536,86250341,29856819,71735224,83482914,76975035,20453602,122584206,92820744,81162841,118649107,49320694,88127502,118462618,115946251,119298031,18100055,100784944,77436150,80215266,58813207,60162223,62909470,80857366,80346657,66153834,75647231,75044398,189484942,69826593,183147986,166985108,158639282,149003180,150970545,18306168,131258077,140393642,102979395,101028230,89251621,85329365,59776155,23375688,16409683,14230524,85603854,3004231,65992743,57024049,159992786,21289311,16933337,67995848,21447363,50751556,70659694,114259282,16023803,69382909,101675562,60349064,57405030,71298473,71150249,67224001,65026150,33931356,45866936,36022940,43871900,45001048,46619479,21986512,18626887,116423810,22179561,41025510,15499660,16736293,58460325,100808813,45766975,70631526,39430669,39081659,53041067,76496142,66099575,76271580,198608682,43151567,18924866,17344361,18929773,15749983,14462907,16548435,18393089,15290123,14244769,188265738,14587429,212099989,97867221,27385146,45533787,145773163,87735340,48928735,40056109,61232019,62011530,39459292,26705065,62678144,69813431,20357019,79406971,40536441,13262442,20108560,46079391,55840387,15521075,54952792,52671787,43869144,18850433,18105473,52309424,51964156,783214,15825547,7108192,43542679,46103496,48059907,32825358,37224882,34284467,37382893,42825044,131442917,87942403,68217476,78659919,87994171,94523999,118649170,106309287,92945681,184861637,103913173,168606635,133690223,45022448,28344257,75226979,75196163,71837973,70023070,69044253,67902548,67091953,66981966,22913560,18174776,22125285,23476572,32479720,18175567,47174655,46085006,37352470,28290073,62552583,58136738,99925059,96751605,80285804,77445473,86896314,97186138,100778293,51475215,46392686,17800797,17006157,48410093,187507648,21551205,27781484,15076390,16578309,55422711,23524507,68359712,68071114,17732661,76986804,15766655,16324504,48907063,80864151,117333266,24409746,98816681,129724841,165174290,88287823,147293037,96500633,128124036,156003699,162913443,79453919,56143594,70307067,20584873,72756542,61703078,52655094,77447919,16382194,65054736,67799531,42826666,28824737,185122050,147001819,131607491,88718155,38740909,63509154,95943913,112508240,22534086,18393773,22548403,18131039,15573707,10798802,158645880,18286505,19399719,153051851,117426684,47715374,20326436,33402909,30055564,17600124,25053299,25166595,22536055,5920532,68509753,31194444,44462811,18450106,11018442,20040003,69615397,22171728,50687788,15162193,41338503,65104337,204491797,60054156,24047612,19628693,29271362,42712551,75014376,16147150,14620190,205789121,14275290,79320096,14365068,18735040,58582341,18639734,18665800,759251,19790261,91478624,98841303,113676911,42519612,21008866,18673446,18222770,18222599,15781432,14379501,18845207,22912596,208019751,20727198,18127783,5503832,268163148,29483675,109713729,42858171,23362483,37440098,53756003,249941736,18704659,184647530,122444371,21464713,90420314,106292326,111463209,25013763,100381915,62718650,22110491,93935921,94360081,95515846,96851347,111690277,212782910,22992332,23520739,21267612,32087193,58513684,46570247,115796187,28638191,110886577,110867752,48363171,32192094,26507551,27592116,43768449,56783491,32749378,91242884,55403897,76052943,17372853,22307857,46297163,19383317,27778893,24285686,19329393,21111883,17919972,36726043,28094035,192747213,44924709,257262765,263359642,262734281,90537810,17512682,128744229,22270337,22964152,21950364,47462001,102441113,45215683,247008365,14746298,83723250,1503121,22135992,102476260,247752114,225919331,16971999,131979670,14409497,15054622,170186151,14864693,21597063,242153407,248815930,20756490,157778204,15261298,260064723,21633857,20212370,254186356,17694756,37396006,17911421,124763921,13821862,21062471,24363945,153168449,14260037,21303400,44494276,5568292,77026540,18025954,16898332,12372812,126722715,116387194,22876454,178071411,110905570,17235724,22505100,239571244,136568901,15447566,249854076,17783446,23334601,142901066,57358233,65533134,41911874,112740986,232880095,17599795,17890304,157380172,87313007,258014885,17416700,14198654,259345913,1240681,33935084,21217402,32623377,27275087,15771056,92394644,16522352,1947301,91518617,188154135,241201438,197594920,13623532,36719281,188591875,234826866,138787319,211921304,9855382,153149305,115911638,166252256,19674502,86697288,135230178,153810519,106448460,48706233,13491312,104324860,82161632,117821354,72631996,47279525,18990471,66369206,67378554,18215973,110823581,113420831,105174965,63246339,54546430,40300027,61853389,21857760,76936279,78408666,76348185,62595437,49706919,54655632,61762853,43920155,44615672,48398586,78138151,33998183,76091017,21783950,59204932,44777959,24752484,39657463,14224719,14091091,17961886,16245822,16116713,14350393,17409240,6450322,14079839,9109712,14287409,10894562,14616022,14479720,14505838,15170346,16028241,15647676,16669075,16027546,15649433,18020120,17451742,8487622,15460572,15743810,19109172,18923032,18058609,20456198,19006707,20436059,20179628,20437286,17967675,17974455,17968875,16878156,266830495,14858000,176944763,128225158,16571326,123974480,20776147,16625082,51462013,48029363,65502117,27311044,43881089,243000634,136389258,238259855,160276009,248945322,29235589,20221159,23199697,63877097,36804447,132986633,130945778,17856665,15826822,222140591,27948604,166608401,164383046,244212645,13215382,21844854,24404039,11856892,143798379,180090338,9717322,20733972,33919343,71022729,17494010,18365936,16664681,12701412,26837501,18080708,3952661,52954222,76707735,57770539,234979566,15017893,174263663,106514937,61220477,115979444,43409523,66873399,46469610,189550099,14226495,123738314,6449282,16125224,57768217,14552725,21768766,49620730,15399512,40260612,146167871,196824385,48388417,7212562,16326882,48043084,16081831,23176276,134843187,97474887,55355654,15670644,52774637,173999134,16261445,190298721,4207961,21840737,30907284,136550204,16224666,21146211,258928816,241990563,28260687,259905595,18727438,15371215,196631065,41219840,95493980,15498429,102786512,180859549,14677751,20818801,16250929,93069110,18891923,19670455,8370082,16640598,93876666,47688440,29795115,14327933,34022952,17391507,224464606,32155953,22957302,5392522,46612327,114497182,109287731,17169320,33986037,14129299,222205782,224424419,225234560,15265641,21178616,22771961,18873711,14275426,24295482,21457205,15901113,20713061,16789970,73238146,69181624,22772264,79743108,47694175,21674448,138479037,18686907,24892146,49337217,21699092,17786474,69218324,49573859,48038030,171129408,14804435,28479007,142419647,198504506,46764631,68801460,16340501,14413466,4119741,82643763,89269664,85591466,8979992,42860402,39524002,46161313,21334893,40906518,255762643,96496611,76653027,14836128,22602881,22295473,77215251,28428838,121265704,47136500,95243532,59738746,18773422,50156221,37016784,234293544,32631520,252726343,24954899,118805793,15244981,38236362,31072439,43907446,17373943,20317201,31079712,70758813,33838795,27233809,51258903,37951729,50415064,37395827,30022594,17214648,88295976,20651632,16123781,119957997,24024655,33601431,23090525,37683259,27678902,43766620,17721514,15353981,18619979,31058793,39578080,22571950,20854518,19330213,36106105,47401289,22780988,25324156,22646228,29711814,31468242,41438347,29785315,31606227,46229563,31967467,33651080,39726700,24949726,15353815,25425425,23629827,22485793,20537454,14325369,19728030,22044289,24927543,28393637,29867516,20829172,15386108,22303692,21128035,21699429,17198500,15131310,5741722,5248441,237488632,20407539,17797448,30704824,153741488,15809249,29028408,19212009,56051834,14062236,65495877,112624301,28408675,59058446,33755676,84370522,264459407,86387057,17837853,24741917,118752616,223100019,93783780,66593978,53162803,192019265,17782766,249253283,22330646,199941365,102271048,43018238,20012204,108797271,31119337,153474021,235083242,109660695,49860231,2425151,190170608,49861057,169686021,17771860,49669034,122737411,66891455,188468721,50557402,29840295,229965178,222708016,244243477,15934926,184596818,65773523,15169907,110840720,68524064,129575180,101928415,25626894,246163327,207010069,40735690,230547432,141326970,28669612,39015718,128090469,49197437,163126581,107818778,16929701,50531062,53193168,39936193,44821096,46829683,46313918,49034233,30124262,33009321,28067175,35624959,34097876,26892157,30043495,21119636,79039503,17869798,21619519,40047532,32765534,14412533,14246001,50325797,14529929,12811952,7313362,14688489,245510831,14498484,53197137,16212685,47964412,41144996,816214,15008449,24784891,27589818,39008816,113880573,930061,87904422,1365841,1501471,14413715,1242511,14326840,20333461,657863,1835411,16219126,3829151,109275829,81890700,30345764,8442372,41269920,18840896,101844850,8953122,209792690,40168339,18915795,15664289,133578965,13213122,263767927,71294756,120943272,137466464,40553282,135897307,21348880,18227519,21111896,52772661,91888403,25355724,247033019,25527966,83590349,117928875,21251668,234629912,62915920,17095452,14245378,23778898,196272316,248427077,36554387,224119738,17291393,80638910,40892016,19107878,7380872,56700964,50073507,63004503,21268897,24431892,21307076,21982720,18791763,27901418,18172905,9532402,16037856,85131054,15210410,51263592,43556096,26624619,130617778,52070270,158414847,14268812,92079350,24816848,50323173,187005921,183665224,234521957,47260400,18678924,7651862,58601997,23544596,33792634,91181033,14603515,19071682,20064236,7768572,149217510,188975706,22169931,145492157,14677919,114589771,14693823,15519544,201870414,17781165,14702533,13784592,39050020,167181650,153051008,14861285,30261067,44177383,65647594,28143425,15194013,18959163,28144343,16228479,18901122,36958781,187319644,118022756,16129106,54885400,1068831,68539526,15339201,1344951,2890961,119525215,85634177,7152572,185644175,5561412,14434063,11270992,15024563,16256269,21157904,15572155,31182553,15309804,16160352,7334402,16129920,17912982,16598226,35373847,56705653,250831586,27529145,26618468,238073909,40626514,234132208,235120302,20402945,216431881,35989792,229686366,23539358,169608502,268123287,225278024,261835843,28376631,79293791,6446742,56518880,18915240,180188862,17200116,32948225,25398075,25677409,41737887,18987651,29952857,15537451,24322371,19877186,23967578,46252298,28471026,27406401,33549858,14817982,40141037,24472704,14860448,26933384,82688781,35133435,264938614,172562639,4170491,17292143,16347506,143664435,23065354,19942991,19410817,229129090,237620586,228296619,30959811,21656599,261555701,29365013,105916405,188639874,173581919,22831662,47218790,37367041,16302242,39288259,15126367,25482459,19867571,229390349,35586563,105029433,183089853,23184896,38127255,16966162,151159882,217231723,48488561,23065057,231389485,58294430,254179370,127750483,5162861,187279871,190064796,78575667,21077000,25174913,94216840,108788135,111635980,23593446,267854863,78445977,232992031,163570705,150669746,14342564,19658936,19418459,40959360,84388888,30343716,260845829,263815367,193732179,221792092,19716781,38254095,73181712,65707359,9624742,234489403,248735463,168673083,247486443,249410485,30912937,249348006,246088673,235373000,246357149,210926192,158398519,137823987,33530012,28602948,236511574,234002627,153944899,85396297,240812994,233783568,199325935,119250381,237770636,161791703,242892689,239949176,242426145,229873610,227341953,216503958,24721108,132201224,217547360,208692908,48086014,30216513,80612021,126017061,148453195,192957076,24745957,188019606,24232525,140519774,103922067,28582749,39249305,16583468,15022633,75124560,125445568,34972633,84119348,107545479,138770045,36948268,115111016,153865861,28599820,161411080,33563161,56864092,137794015,150688795,24913074,27698046,45659261,243721842,88507556,247334603,16329577,18812301,12671692,6527442,53981494,22642788,5446992,24619328,47459584,19824430,10371822,117445762,128564422,23534104,22677790,242360696,19891675,54299209,211588974,20065936,74880863,15210284,105445396,93722614,53130511,24844525,14824473,18644548,40277679,159362503,23384516,119079595,20182089,203681891,15093646,15808765,47632905,98425584,15772733,20177097,198106297,14992591,21623531,158412741,23220034,21757965,51224438,15638637,150203689,17612289,104937383,16955870,17299063,68773980,176280674,19278537,95281047,12340562,16440738,54905208,10228272,24024778,19541375,7855622,52254824,237562389,267847170,202594374,8180562,258968955,110757459,151688371,54290523,4262791,6793362,198571335,215396550,144121755,248390035,216216791,119169760,124635414,238144920,236108642,199730264,46812100,220882223,143822047,15693634,172717529,147249049,133727476,203728509,731573,801202,38299186,239200202,125492611,110905955,137751549,71918994,205370456,81189096,37083812,37267497,52167970,4778211,175083160,8105482,118256463,132704571,16160650,114510872,994431,10052972,16194793,13860082,31237389,48920331,54578941,38100497,140935414,47850508,122975302,170316873,45497378,3551701,150115256,72972715,148939636,1120611,136414500,140538938,167624175,174027585,27641388,62015732,70346167,780159,175881965,37093819,19646485,140856303,77338274,187246188,122783603,50471406,11202862,151198642,76326090,104861015,133722469,121613012,47616949,14161128,819256,221019516,9766032,13836402,162104820,618593,47402960,85851159,142684449,63125213,166510517,240615854,269394406,252644913,38371041,136609376,76282043,70418699,185493136,17960806,38727269,234587458,31088763,147886032,265651031,212940155,40565664,201000503,197539670,170474186,263060411,254725084,89866540,130782156,223976506,260391020,19390708,124856935,99698716,52256215,260728760,156769030,40866430,35282604,261151943,58777905,150957265,86351265,179719319,189842436,204918670,203483715,258220853,261297092,204444285,54171939,214020774,89187513,170581751,42612282,44337867,212342630,105542341,190776107,36919153,134781128,47125833,244901765,242245196,194186291,242048469,245106788,215967395,140766042,158795894,116688855,19975710,109892942,95666954,32848956,240815975,77977289,216335730,256471145,247004791,175867315,247424331,107021348,29730710,38146165,237670656,191072401,125075147,15417049,219936176,41190759,179884440,41132274,253520310,73191618,40850899,42981176,112184431,87767749,162022136,193022239,186967108,84145752,227058289,641253,95994080,158848407,742143,41842768,242201454,22240612,19386622,23144737,20379995,254377876,16489591,30335156,13363282,249529225,17625292,151662442,245935903,82274636,15426758,17918808,136358789,169793441,117724817,15769161,157613794,20696985,216577574,116559622,130681675,74594552,7081242,189276359,96265654,20460757,14749606,231362108,222343099,235389922,36050060,44518888,115689700,235295798,189621457,226370793,15960627,20596269,6324592,82778942,43192807,34716594,36803580,52481056,11891542,199187259,111059621,36355699,56758031,15313857,17637614,20646277,100570170,24193017,117984119,22692199,6033532,116828327,20731304,106565458,83883736,17637553,23679582,29256864,214499442,140964771,15888570,44062282,18713,106145765,15255133,43333230,20998647,31311757,15855630,21008297,20334391,39348813,41264383,20660560,19343562,23116280,111444854,8039442,143454922,267206035,142977623,101879887,17565514,28932160,238480106,253632034,87471023,184910040,19226054,160817572,87650675,70246837,36600922,240860245,160926944,41329637,40981798,46345843,30563759,114225051,219920508,18918698,189283341,72694909,15790423,22440038,235120811,222658804,72547608,68838777,233749221,215952307,26668282,151895837,25456478,216995705,87416722,17612928,176417690,41218028,46715620,104571475,91390383,124283714,19026618,49393335,18950761,21526685,134206307,90098789,34192959,186865762,50600062,126418609,135303236,14522546,64220324,30517544,24083587,21258330,38475205,18953259,101192354,8973062,18676177,55544943,34108173,32360145,140894997,19818494,12085042,40389429,19982092,19524590,31239408,21869297,24450456,250463314,238295737,236917131,32397000,151631699,29257521,17235825,82960113,42491255,18044981,181988782,40058557,63350773,19320490,27246723,17119099,95921754,23069301,51865695,143549770,84460847,223724339,93904007,199555388,81929393,131727620,82444043,162580589,171998025,198334225,91171720,201426746,166316761,231206630,125382460,195885307,113678606,103000558,19019263,87307633,18860342,14705240,29160552,2885851,16185801,210693321,15276510,27762127,17147539,21019628,74712538,131315786,15012486,51193546,74735391,18335809,24796185,123331001,73134924,17193794,19432557,24872865,32128783,16622151,83939091,15321447,19733892,14517899,18140091,106506464,24663730,18145841,31282081,16324533,40894238,36893069,74502636,36975611,21375959,29337732,89642493,41407443,12138162,19198326,20692503,41135906,23532159,18148242,14326661,14423603,39806829,133966853,176467382,14463913,52185212,60680365,46743554,87989022,36143299,18815810,34002530,41888469,125415597,35667714,191250892,177343973,165802132,191798541,253342708,232878783,72168780,239613729,136353114,112937614,132618751,190475202,188579340,241837295,241805454,147609274,87871306,235433784,128295129,86566274,162571150,99002460,169719009,240858138,131246928,83900124,168410930,33623402,129254485,113593692,84358766,229643491,50841062,32730931,112074504,138871289,130734452,239623882,162254325,83237549,92817267,197874533,145476646,235792533,178941141,215819570,78145300,148504230,51646374,44758790,206922924,63896875,91437380,105320972,208777407,103080971,231279879,101844353,102607698,26395300,30152403,28050738,94225402,220252762,29091792,24967668,37986775,42082057,74853246,122158761,16234406,31115275,27195114,15942017,51158957,22498452,32482524,40286007,16870682,23618512,23832022,59286800,36860144,30022428,180999717,19028953,53153263,31668714,24621955,100611064,220675169,79551906,41676395,66714703,111673770,97807215,23819304,28605533,25695492,29537319,224325820,30810014,151291323,22100074,78687792,128472974,30113082,18803364,66003255,50572630,213840716,17915334,255591392,198613270,249359076,28110915,30630900,158887925,47945698,36425615,30689667,21541542,118612929,60401740,42203744,101578775,233697048,24293519,19042478,183879398,38773605,101424299,113508007,42371693,28702869,201445961,27060993,59606882,94370073,197220548,121269271,233395335,145742095,237955563,36897911,106375253,154423460,39002053,52963095,30441816,48649566,93928566,27198826,247495144,212479278,90721189,28303502,158393753,24100737,164818150,87623304,201989827,128078136,25465958,110473412,42099565,213707240,19312934,29533792,45419359,63311667,36126354,30228023,70343136,22733444,160027905,49666182,47562163,243010689,78507740,243516650,51219197,20955713,121512266,179767114,94784820,110585067,23586754,132223511,62644624,8713702,2148071,616113,7315022,822112,13348,16354128,45547548,85616044,10085882,21126003,16613870,65640210,11557642,14208914,14270999,20399609,99961552,32472561,39274758,19624506,3144281,90421077,7857652,46072516,4560311,97023892,76058815,5876342,29949063,1424831,10094,63133380,21786260,75616437,9745092,19226849,40880426,18484100,94587503,34406259,1614411,10016972,14140571,16044191,113116513,20169131,20217201,79502328,792646,270668475,53533566,157070412,59444261,39902637,165326620,123102622,265000864,52597678,3346971,49205840,112432370,167033855,129017028,132005200,58334111,93516296,168841469,229710631,175338338,241205936,116510779,84457604,264366428,52322656,234006638,258428607,168537997,95800032,143442028,82430417,84034563,13041552,185030770,32014425,85450815,29736553,53424957,172178623,14120187,67132984,74610929,73215961,140722325,65364950,71112180,112307482,122231942,189320995,178118343,251231223,266200838,70305266,131440156,262900579,249631963,33258173,34784263,105597663,101387077,37245723,48630048,143505274,153245594,61723876,39140545,215858327,168243936,30380192,39187698,235394328,62052874,143257853,77395953,26162191,191685671,40405121,267433029,50856543,168696795,84677497,30681499,261294795,265696487,183144850,127040414,29910549,50390358,30587056,78740274,19804553,53323885,32618047,49900650,144021058,241008487,237897985,75037065,203771716,42104946,111489389,227209417,44120318,234212815,38073608,97557168,162545959,108457267,261884913,204209879,129816753,121186812,161576069,38285013,30995591,60353632,30799610,181080474,239217629,254707723,212340162,46930437,257939339,32820524,89568746,16978961,235381394,93460034,21786322,175291914,185343485,76187493,36216833,46093969,164180736,95261792,25947430,168695261,156972971,196630780,102900059,62863521,25742007,171240557,35332069,50052920,127788481,111398486,254240011,32218646,176536528,139822631,98766044,19487670,236787450,137340994,128642272,20979106,225911802,248794623,75130364,264408558,15947602,225126598,237003654,19595532,265987070,66437032,14082828,11599312,253284344,78436108,122694066,17805567,207612806,34925804,171514924,188028765,122459707,95534157,217073451,20769938,223842388,29717916,259108421,240182448,248314631,73073062,164401022,57924802,20703607,251219812,198240811,40010200,228682873,47914608,777554,243833570,196167925,44877467,18212565,110950574,253664708,20767246,201747039,143415291,79743060,127635571,246738309,121845291,20508222,123979525,47650483,242655310,252488810,253069382,15020865,166926744,89702218,20081488,90276942,174717541,219716235,15906176,125378095,22707793,53454398,141598993,232821256,20906409,27066806,227631390,58842819,54937915,63968007,18764912,246263280,243920442,37376499,207707329,135178205,139853350,236581562,244593709,146787633,20373762,122714443,52487416,192218013,125820691,238956217,148354084,7064802,61133,14298281,20490861,809760,1493121,14336349,38073,14876536,6790312,205700196,200199318,15649570,15184418,51800278,19644571,29432246,18058704,175899673,89998965,51988166,87795815,14570292,15272882,19105442,7035532,112490145,13768732,47700913,241475497,157821181,14562275,20142938,180086129,237325382,16818325,21873261,241043955,16527490,240810497,115410741,150323560,36609309,29093456,17673732,16096448,216338061,235317040,63585408,236819345,93010530,190357418,240594439,239807683,165693574,211897738,112489818,47932614,55923778,225657320,186155101,16352458,240402102,71239119,34474398,239348795,234645414,240223810,141222424,240180276,15036744,94973632,54699775,196811548,141857015,238673608,97616524,224969703,238109234,231493350,239309716,238638637,218570379,39514187,158951763,16403358,239477369,154541100,239609301,239399824,121708861,195121864,72931614,142039374,239127196,233688482,107780778,20606286,65719885,162778946,95200214,215707078,156950036,15003756,7150932,71343661,11935332,200513252,79144576"
    else:

        sql = "SELECT `accounts`.`external_id` FROM `peepbuzz`.`curators` LEFT JOIN `peepbuzz`.`accounts` ON `accounts`.`account_id` = `curators`.`account_id` WHERE `accounts`.`external_id` IS NOT NULL AND `accounts`.`stream_id` = 1"
        curatorsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

        users = None

        while True:
            curator = curatorsQ.fetch_row(1, 1)
            if curator == ():
                break

            if users == None:
                users = curator[0]['external_id']
            else:
                users = users + "," + curator[0]['external_id']

    return [u for u in users.split(',')]
def checkBlacklist(user_id=None, account_id=None):
    # Make sure we have a user to check against
    if not user_id or not account_id:
        return False

    if infoModule.info.site['dblink'] == None:
        return False
    # Find which field we are checking
    account_where = "`account_id` = '" + str(account_id) + "'"

    # Assemble and run the query
    checkAccountQ = mysql_tools.mysqlQuery(
        "SELECT * FROM `peepbuzz`.`blocked_accounts` WHERE " + account_where +
        " AND `user_id` = '" + str(user_id) + "'",
        infoModule.info.site['dblink'])

    # Now check to see if a record exists
    if checkAccountQ.num_rows() > 0:
        # Looks like they blocked this account
        return True
    else:
        # No blacklist record was found
        return False
Example #43
0
def checkForHash(hashtag, type, type_short, block):
    # type: {daily, hourly}
    # block : time/secs
    query = (
        "select score from peepbuzz.hashtag_"
        + type
        + "_stats where "
        + type_short
        + '_block="'
        + block
        + '" and hashtag="'
        + hashtag
        + '"'
    )
    hashQ = mysql_tools.mysqlQuery(query, infoModule.info.site["dblink"])
    if hashQ.num_rows() > 0:
        while 1:
            row = hashQ.fetch_row(1, 1)
            if row == ():
                break
            score = row[0]["score"]
        return score
    else:
        return False
Example #44
0
def outboundLinks(html):

    siteDB = infoModule.info.site['database']

    blbQ = mysql_tools.mysqlQuery(
        "select * from " + siteDB + ".blindLinkBlacklist",
        infoModule.info.site['dblink'])
    blindLinkBlacklist = []
    while True:
        blb = blbQ.fetch_row(1, 1)
        if blb == ():
            break
        blindLinkBlacklist.append(blb[0]['url'])

    URLParts = urlparse.urlparse(infoModule.info.page['url'])
    myHost = URLParts[1]

    thisHostArray = myHost.split('.')
    matchHost = thisHostArray[-2:-1][0] + '.' + thisHostArray[-1]
    log.plog('host name for self link matching: ' + matchHost, 2)
    #blocked URLS
    urlBlockerQ = mysql_tools.mysqlQuery(
        "select * from " + siteDB + ".urlBlocker",
        infoModule.info.site['dblink'])
    blockedURLs = []
    while True:
        urlBlocker = urlBlockerQ.fetch_row(1, 1)
        if urlBlocker == ():
            break
        blockedURLs.append(urlBlocker[0]['regex'])

    #known urls for outbound links
    oblQuery = mysql_tools.mysqlQuery(
        "select source_id, url_regex from " + siteDB +
        ".sources where url_regex != '' and url_regex not like '%*%' order by length(url_regex) desc",
        infoModule.info.site['dblink'])

    oblRegexQuery = mysql_tools.mysqlQuery(
        "select source_id, url_regex from " + siteDB +
        ".sources where url_regex like '%*%' order by length(url_regex) desc",
        infoModule.info.site['dblink'])
    #differentiate between non-regex and regex urls to speed things up.
    knownURLs = []
    knownRegexURLs = []
    regexToSource = []
    regexToSourceRegex = []
    pageLinks = []
    while True:
        obl = oblQuery.fetch_row(1, 1)
        if obl == ():
            break
        knownURLs.append(obl[0]['url_regex'])
        regexToSource.append(obl[0]['source_id'])

    while True:
        obl = oblRegexQuery.fetch_row(1, 1)
        if obl == ():
            break
        knownRegexURLs.append(obl[0]['url_regex'])
        regexToSourceRegex.append(obl[0]['source_id'])

    while True:
        if isinstance(html, str) == False:
            log.plog('html in links is not a string', 4)
            return False
        foundLinks = re.search("href=\"(http.*?)\"", html, re.I)
        if foundLinks == None:
            break
        html = html.replace(foundLinks.group(0), 'x')

        #match root domain to avoid bloomberg.com matching search.bloomberg.com
        selfUrl = re.search(matchHost, foundLinks.group(1), re.I)
        if selfUrl == None:
            #url does not contain my own host
            pageLinks.append(foundLinks.group(1))
        else:
            log.plog(foundLinks.group(1) + " is current page's own url", 2)

    for link in pageLinks:
        # a number of tests learned from experience.  Don't question them
        log.plog('link is ' + link, 2)
        test1 = re.search('mt-search', link, re.I)
        if test1 != None:
            pageLinks.remove(link)
            continue

        test1 = re.search('eedproxy.google', link, re.I)
        if test1 != None:
            pageLinks.remove(link)
            continue

        test1 = re.search('eeds.usmagazine', link, re.I)
        if test1 != None:
            pageLinks.remove(link)
            continue

        #forbidden extensions
        forbiddenExtensions = [
            'css', 'pdf', 'flv', 'zip', 'mov', 'wav', 'ico', 'mp3', 'xml',
            'gif', 'jpg', 'png'
        ]
        isExtensionForbidden = False
        for extension in forbiddenExtensions:
            if str.lower(link[-4:]) == '.' + extension:
                isExtensionForbidden = True

        if isExtensionForbidden:
            #no reason to include css or pdf files
            pageLinks.remove(link)
            continue

        #block urls that match url blocker
        blocked = False
        for blockedRegex in blockedURLs:
            blockTest = re.search(blockedRegex, link)
            if blockTest != None:
                log.plog('url ' + link + " matches urlBlocker " + blockedRegex,
                         2)
                blocked = True
                break
        if blocked == True:
            continue

        linkParts = urlparse.urlparse(link)
        if linkParts[1] in blindLinkBlacklist:
            pageLinks.remove(link)
            log.plog(
                linkParts[1] +
                " not added as outbound link because of blacklist", 2)
            continue

        #do not read links that have only one string in them
        shortPath = re.search('^/\w+/*$', linkParts[2])
        lp = linkParts[2]
        if shortPath != None:
            log.plog(
                "link excluded because it only has a short path of characters: %s"
                % linkParts[2], 2)
            continue

        #how old is story based on url?
        possibleAgeInDays = dateGuesser.urlDateGuesser(link)
        if possibleAgeInDays != None:
            if int(possibleAgeInDays) > 5:
                log.plog(
                    "story is " + str(possibleAgeInDays) +
                    " days old.  Not adding", 2)
                continue

        if 'read_blind_stories' in infoModule.info.site and infoModule.info.site[
                'read_blind_stories']:
            urlRecognized = True
        else:
            urlRecognized = False

        urlSource = 0
        #go through all regex known urls
        for i in range(len(knownRegexURLs)):
            match = re.search(knownRegexURLs[i], link)
            if match != None and regexToSourceRegex[
                    i] != infoModule.info.source['source_id']:
                urlRecognized = True
                urlSource = regexToSourceRegex[i]
                log.plog("Text Link: " + link, 2)
                log.plog(
                    "matched known blog via regex: " + knownURLs[i] +
                    " source_id is " + urlSource, 2)
                break

        #if not found go through all non-regex url lookups (no '*' character in the url)
        if urlRecognized == False:
            for i in range(len(knownURLs)):
                if linkParts[1] == knownURLs[i]:
                    if regexToSource[i] != infoModule.info.source['source_id']:
                        urlRecognized = True
                        urlSource = regexToSource[i]
                        log.plog("Text Link: " + link, 2)
                        log.plog(
                            "matched known blog via regex: " + knownURLs[i] +
                            " source_id is " + urlSource, 2)
                        break

        if urlRecognized and len(linkParts[2]) > 1:
            #link must have path component so that base urls (yahoo.com) aren't pulled in
            alreadyThere = False
            URIParts = link.partition('#')
            URLOnly = URIParts[0]
            URLOnly = URLOnly.replace("'", "\\'")

            link = re.sub('\.html\?.*', '.html', link)
            link = link.replace("'", "\\'")

            sql = "select url, sub_id from " + siteDB + ".subs where url like '" + URLOnly + "%'"
            log.plog(sql, 2)
            alreadyInSubsQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInSubsQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in subs', 2)

            sql = "select url, sub_id from " + siteDB + ".newsroom where url like '" + URLOnly + "%'"
            log.plog(sql, 2)
            alreadyInNewsroomQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInNewsroomQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in newsroom', 2)

            #for now, checking against exact url too.
            sql = "select url, sub_id from " + siteDB + ".subs where url = '" + link + "'"
            log.plog(sql, 2)
            alreadyInSubsQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInSubsQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in subs', 2)

            sql = "select url, sub_id from " + siteDB + ".newsroom where url = '" + link + "'"
            log.plog(sql, 2)
            alreadyInNewsroomQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInNewsroomQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in newsroom', 2)

            sql = "select guid from " + siteDB + ".feedGuids where guid = '" + link + "'"
            alreadyInGuidQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInGuidQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in feedGuids', 2)

            sql = "select guid from " + siteDB + ".feedGuids where guid = '" + link + "'"
            alreadyInGuidQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInGuidQ.num_rows() > 0:
                alreadyThere = True
                log.plog('url is already in feedGuids', 2)

            # require two hits to make story go live.
            sql = "select url, idx, submittingURL, promoter, hits from " + siteDB + ".sourceReaderQueue where url like '" + URLOnly + "%'"
            alreadyInQueueQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInQueueQ.num_rows() > 0:
                alreadyThere = True
                SRID = alreadyInQueueQ.fetch_row(1, 1)
                # need to search through voting_urls instead and add 1 if it isn't there for this idx
                log.plog(
                    'url is already in source reader queue, checking host', 2)
                submittingURLParts = urlparse.urlparse(
                    SRID[0]['submittingURL'])
                if myHost == submittingURLParts[1]:
                    log.plog('no point added to link, host is the same', 2)
                else:
                    log.plog('adding point to link in sourceReaderQueue', 2)
                    log.plog(
                        'current hit count for this link is ' +
                        str(SRID[0]['hits']), 2)
                    if int(SRID[0]['hits']
                           ) + 1 > infoModule.info.site['votesToGoLive']:
                        sql = "update " + siteDB + ".sourceReaderQueue hits=hits+1, go_live=1 where idx=" + SRID[
                            0]['idx']
                        log.plog(sql, 2)
                        log.plog(
                            "setting go_live to 1 for sourceReaderQueue id:" +
                            SRID[0]['idx'])
                        log.plog('url to go live is ' + SRID[0]['url'])
                    else:
                        sql = "update " + siteDB + ".sourceReaderQueue hits=hits+1 where idx=" + SRID[
                            0]['idx']

                    sql2 = "insert into " + siteDB + ".voting_urls set promoter=" + SRID[
                        0]['promoter'] + ", submittingURL = " + SRID[0][
                            'submittingURL'] + ", idx=" + SRID[0][
                                'idx'] + ", added=NOW()"

                    if 'debug_mode' not in infoModule.info.site or infoModule.info.site[
                            'debug_mode'] == False:
                        mysql_tools.mysqlQuery(sql,
                                               infoModule.info.site['dblink'])
                        mysql_tools.mysqlQuery(sql2,
                                               infoModule.info.site['dblink'])

            if alreadyThere == False:
                sql = "insert into " + siteDB + ".sourceReaderQueue set added = now(), url='" + link + "', publishDate=now(), source_id=" + str(
                    urlSource
                ) + ", potentialTitle='', potentialOutline='', promoter=" + infoModule.info.source[
                    'source_id'] + ", submittingURL='" + infoModule.info.page[
                        'url'] + "', hits=1"
                if 'debug_mode' not in infoModule.info.site or infoModule.info.site[
                        'debug_mode'] == False:
                    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                    idx = infoModule.info.site['dblink'].insert_id()
                    sql2 = "insert into " + siteDB + ".voting_urls set added = NOW(), promoter=" + infoModule.info.source[
                        'source_id'] + ", submittingURL='" + infoModule.info.page[
                            'url'] + "', idx = " + str(idx)
                    log.plog(
                        "**** adding url " + link + " into sourceReaderQueue",
                        2)
                    log.plog("****\n" + sql2 + "\n****")
                    mysql_tools.mysqlQuery(sql2,
                                           infoModule.info.site['dblink'])
Example #45
0
def linkScoring(html, workingTable):
    # func: findLinks
    # param: html, workingTable
    # desc:  Scans passed html for links to stories that we already have in
    #      order to add score to stories.  workingTable is either subs
    #      or newsroom.  If newsroom, and there's a hit, make it go live
    # ret: none
    # auth: esr

    if 'url' not in infoModule.info.page or infoModule.info.page['url'] == '':
        log.plog('no url for this story in findLinks', 4)
        return False

    URLParts = urlparse.urlparse(infoModule.info.page['url'])
    myHost = URLParts[1]

    pageLinks = []

    siteDB = infoModule.info.site['database']
    if 'debug_mode' in infoModule.info.site and infoModule.info.site[
            'debug_mode'] == True:
        debugMode = True
    else:
        debugMode = False

    while True:
        if isinstance(html, str) == False:
            log.plog('html in links is not a string', 4)
            return False
        foundLinks = re.search("href=\"(http.*?)\"", html, re.I)

        if foundLinks == None:
            break
        html = html.replace(foundLinks.group(0), 'x')
        #match root domain to avoid bloomberg.com matching search.bloomberg.com
        thisHostArray = myHost.split('.')
        matchHost = thisHostArray[-2:-1][0] + '.' + thisHostArray[-1]

        selfUrl = re.search(matchHost, foundLinks.group(1), re.I)
        if selfUrl == None:
            #url does not contain my own host
            pageLinks.append(foundLinks.group(1))

    for link in pageLinks:
        link = link.replace("'", "\\'")
        ## if link has nothing but hostname then it's garbage
        linkParts = urlparse.urlparse(link)
        if linkParts[2] == '' or linkParts[2] == '/':
            log.plog('link ' + link + ' has no path component.  Skipping', 2)
            continue
        sql = "select sub_id, source_id from " + siteDB + "." + workingTable + " where url like '" + link + "%'"
        isLinkQuery = mysql_tools.mysqlQuery(sql,
                                             infoModule.info.site['dblink'])
        if isLinkQuery.num_rows() > 0:
            isLink = isLinkQuery.fetch_row(1, 1)
            log.plog("found link to one of our stories: " + link, 2)

            addPoint = True
            ### check against network.  If in network, do not add point
            if 'parent' in infoModule.info.source and infoModule.info.source[
                    'parent'] != '0':
                sql = "select * from " + siteDB + ".networkSourceLinks where source_id=" + infoModule.info.source[
                    'parent']
            else:
                sql = "select * from " + siteDB + ".networkSourceLinks where source_id=" + infoModule.info.source[
                    'source_id']

            net1Q = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            if net1Q.num_rows() > 0:
                log.plog('source is in a network', 2)
                networks = ''
                sep = ''
                while True:
                    net1 = net1Q.fetch_row(1, 1)
                    if net1 == ():
                        break
                    networks = networks + sep + net1[0]['networkIdx']
                    sep = ','
                sql = "select * from " + siteDB + ".networkSourceLinks where source_id=" + isLink[
                    0]['source_id'] + " and networkIdx in (" + networks + ")"
                net2Q = mysql_tools.mysqlQuery(sql,
                                               infoModule.info.site['dblink'])
                if net2Q.num_rows() > 0:
                    log.plog("in network.  Do not add point", 2)
                    addPoint = False

            ## don't add point if there's already a linkHistory entry

            sql = "select * from " + siteDB + ".linkHistory where sub_id=" + isLink[
                0]['sub_id'] + " and feedIdx=" + infoModule.info.source[
                    'source_id'] + " and linkerURL='" + infoModule.info.page[
                        'url'] + "'"
            alreadyInHistoryQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInHistoryQ != False and alreadyInHistoryQ.num_rows() > 0:
                log.plog("already in link history.  Do not add point", 2)
                addPoint = False

            #is linker base url already in link history?  do not add point
            urlHost = URLParts[1].replace("'", "\\'")
            sql = "select * from " + siteDB + ".linkHistory where sub_id=" + isLink[
                0]['sub_id'] + " and linkerUrl like '" + URLParts[
                    0] + "://" + urlHost + "%'"
            alreadyInHistoryQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            if alreadyInHistoryQ != False and alreadyInHistoryQ.num_rows() > 0:
                log.plog("base url already in link history.  Do not add point",
                         2)
                addPoint = False

            if addPoint:
                #everything checks out.  add point to existing story in subs
                log.plog("adding point to sub_id " + isLink[0]['sub_id'], 2)
                sql = "update " + siteDB + "." + workingTable + " set score = score + 1 where sub_id=" + isLink[
                    0]['sub_id']
                if debugMode == False:
                    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                #link history
                sql = "insert into " + siteDB + ".linkHistory set sub_id=" + isLink[
                    0]['sub_id'] + ", feedIdx=" + infoModule.info.source[
                        'source_id'] + ", linker_id=-1, linkerURL='" + infoModule.info.page[
                            'url'] + "', linkedOn=now(), userVote=false"
                if debugMode == False:
                    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
                if workingTable == 'newsroom':
                    ## send to subs
                    if debugMode == False:
                        mysql_tools.mysqlQuery(
                            "update " + siteDB +
                            ".newsroom set status='A' where sub_id=" +
                            isLink[0]['sub_id'],
                            infoModule.info.site['dblink'])
                        newsroomToSubs.promoteFromNewsroom(isLink[0]['sub_id'])
Example #46
0
import urllib
import pprint

link = mysql_tools.mysqlConnect("192.168.0.29", "rw_failover", "f@ilfa1l")
if link == False:
    print "no connection"
    sys.exit(0)


# set database
infoModule.info.site["database"] = "db_sportifi"
infoModule.info.site["dblink"] = link
infoModule.info.site["read_blind_stories"] = True
infoModule.info.site["debug_mode"] = False
infoModule.info.site["log_priority_threshold"] = 1
mysql_tools.mysqlQuery("set wait_timeout = 600", link)
mysql_tools.mysqlQuery("set interactive_timeout = 600", link)


sql = "select 'http://www.rantsports.com/redzonetalk/20100910-a-proposition-for-denver-broncos-tim-tebow/' as url, 6367 as source_id"
randomStoriesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site["dblink"])
ctr = 0
while True:
    randomStory = randomStoriesQ.fetch_row(1, 1)
    if randomStory == ():
        break
    print "=================================== " + str(ctr) + " ====================================="
    ctr += 1
    infoModule.info.page["url"] = randomStory[0]["url"]
    sourceQ = mysql_tools.mysqlQuery(
        "select * from " + infoModule.info.site["database"] + ".sources where source_id=" + randomStory[0]["source_id"],
Example #47
0
import strip_html
import infoModule
import entities
import pprint
import sys
import find_title
import body_extractor
import urllib
from alogClient import *
import find_story

link = mysql_tools.mysqlConnect('192.168.0.115', 'root', 'datafl0w')
if link == False :
	print "no connection"
	sys.exit(0)
mysql_tools.mysqlQuery("set wait_timeout = 600", link)
mysql_tools.mysqlQuery("set interactive_timeout = 600", link)


# set globals for site
infoModule.info.site['database'] = 'db_politifi'
infoModule.info.site['dblink'] = link
infoModule.info.site['read_blind_stories'] = True
infoModule.info.site['debug_mode'] = False
infoModule.info.site['log_priority_threshold'] = 1
infoModule.info.site['imageReceiver'] = "http://dev.celebrifi.com/"
infoModule.info.site['maxEntities'] = 15
infoModule.info.site['overrideImageMinSize'] = False
infoModule.info.site['imageMinSize'] = 300 * 255
	
infoModule.info.site['dblink'] = link
def get_links(user_id):
    sql = "SELECT block_regex, link_regex, url, account_id FROM peepbuzz.curated_sites WHERE user_id = %s" % user_id
    siteQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    row = siteQ.fetch_row(1,1)
    if row == ():
        infoModule.info.errorList.append("ERROR: something weird happened in get_curated_site_links.py, didn't find user_id %s" % user_id)
        return False
#    pprint.pprint(row)
    
    url = row[0]['url']
    # get site domain to prevent grabbing links in site domain
    domain_search = re.search('(https?:\/\/)?(.+)', url)
    domain = domain_search.group(2)

    account_id = row[0]['account_id']
    # block regex should always be in db
    block_regex = re.compile(row[0]['block_regex'], re.I | re.S)
    # link regex may not be specified
    link_regex_str = row[0]['link_regex']
    # use link_regex in db, otherwise use default
    link_regex_str = link_regex_str if link_regex_str else '<a href="(https?:\/\/.+?)">'
    link_regex = re.compile(link_regex_str, re.I | re.S)

    ## for debug: hacker news anchor tag right after "title" should grab link body and not header or upvote
##    block_regex_str = 'class="title">\s*(.+)<\/table>'
##    url = 'http://news.ycombinator.com'
    
    if not url:
        infoModule.info.errorList.append("No URL associated with curated_site_id %s, dying now") % curated_site_id
        sys.exit()
    else:
        http_req = urllib2.urlopen(url)
        http_res = http_req.read()
        
        # first define html block to search for links in
        block = re.search(block_regex, http_res)

        # then get links in that block
        matchlist = link_regex.finditer(block.group(1))

        
        page_info = []
        for match in matchlist:
            match_index = match.lastindex
            page_link = match.group(match_index)
            
            # if link in site's domain, we don't want it
            if domain in page_link:
                continue

            # build external_id for homepage links
            m = md5.new()
            m.update(page_link)
            md5_url = m.hexdigest()
            external_id = "homepage_%(user_id)s_%(md5_url)s" % { 'user_id': user_id, 'md5_url': md5_url }
            
            # build list to pass to func
            url_data = [page_link, external_id, account_id]
            page_info.append(url_data)

        zezted_filament_links = get_zezted_filament_links(page_info)
        return zezted_filament_links
Example #49
0
def main():
    sleep = 1 * 60  # Seconds to sleep
    pidPath = "/tmp/twitterUserStream.pid"
    streamPath = "twitterUserStream.py"
    pid = None
    userCount = None
    lastUserCount = None
    noCheck = False
    running = False

    # Create a loop
    while True:
        infoModule.info.site['dblink'] = mysql_tools.db_connect()
        # New Loop so move the user counts
        lastUserCount = userCount
        userCount = None

        # Check the file for a PID
        try:
            file = open(pidPath)
            while True:
                line = file.readline()
                if not line:
                    break
                pid = line

                if pid:
                    # Get the status of the PID
                    try:
                        os.kill(int(pid), 0)
                    except OSError:
                        running = False
                    else:
                        running = True

                    print str(pid) + " - is running?: " + str(running)
                else:
                    running = False

        except IOError:
            # We dont care if the file does not exist, since it will get created the first time around
            # So we can treat it as the deamon is not running
            running = False
            pid = 0

        # Get the count for how many users we are following
        sql = "SELECT count(*) as `userCount` FROM `peepbuzz`.`curators` LEFT JOIN `peepbuzz`.`accounts` ON `accounts`.`account_id` = `curators`.`account_id` WHERE `accounts`.`external_id` IS NOT NULL AND `accounts`.`stream_id` = 1"
        countQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

        try:
            userCountV = countQ.fetch_row(1, 1)
            userCount = userCountV[0]['userCount']
        except:
            print "Problem fetching the users from the database"
            sys.exit()

        # if the count has changed from last count OR the PID is not running start a new deamon
        if userCount != lastUserCount or not running:
            try:
                os.kill(int(pid), 9)
            except OSError:
                # Proc is aready dead
                pass

            print "Starting the Daemon"

            os.system("python2.7 " + streamPath + " &")

        infoModule.info.site['dblink'].close()
        time.sleep(sleep)
Example #50
0
def scanPage():
    siteDB = infoModule.info.site['database']

    if 'url' not in infoModule.info.page:
        log.plog('scan page called without url', 4)
        os._exit(0)
        
    urlBlockerQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".urlBlocker", infoModule.info.site['dblink'])
    while True:
        urlBlocker = urlBlockerQ.fetch_row(1,1)
        if urlBlocker == ():
            break
        blockTest = re.search(urlBlocker[0]['regex'], infoModule.info.page['url'])
        if blockTest != None:
            log.plog('url ' + infoModule.info.page['url'] + " matches urlBlocker " + urlBlocker[0]['regex'], 2)
            os._exit(0)
                
    log.plog("fetching " + infoModule.info.page['url'], 2)
    try:
        socket = urllib.urlopen(infoModule.info.page['url'])
    except IOError:
        log.plog('could not open ' + infoModule.info.page['url'], 4)
        return False
    responseCode = socket.getcode()
    log.plog('urllib response code: ' + str(responseCode), 2)
    if responseCode != 200 and responseCode != 302 and responseCode != 301 and responseCode != 303:
        log.plog('got failure response code from server', 4)
        return False
    headerInfo = socket.info()
    contentType = headerInfo.gettype()
    if contentType != 'text/html' and contentType != 'text/html, text/html':
        log.plog('content type: ' + contentType + '. not fetching', 4)
        return False
    # put in to account for WSJ -dpg
    if re.search("wsj\.com", infoModule.info.page['url'], re.S | re.M | re.I):
        infoModule.info.page['rawHTML'] = wsjAuthHack(infoModule.info.page['url'])
    elif re.search("nytimes\.com", infoModule.info.page['url'], re.S | re.M | re.I):
        infoModule.info.page['rawHTML'] = nytAuthHack(infoModule.info.page['url'])
    else:
       	infoModule.info.page['rawHTML'] = socket.read()
    redirURL = socket.geturl()
    if redirURL != infoModule.info.page['url']:
        log.plog('redirected to ' + redirURL, 2)
        infoModule.info.page['url'] = redirURL
        #redirected urls need to be blocked too
        urlBlockerQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".urlBlocker", infoModule.info.site['dblink'])
        while True:
            urlBlocker = urlBlockerQ.fetch_row(1,1)
            if urlBlocker == ():
                break
            blockTest = re.search(urlBlocker[0]['regex'], infoModule.info.page['url'])
            if blockTest != None:
                log.plog('url ' + infoModule.info.page['url'] + " matches urlBlocker " + urlBlocker[0]['regex'], 2)
                os._exit(0)

        ### and short url needs to be blocked
        #do not read links that have only one string in them
        linkParts = urlparse.urlparse(infoModule.info.page['url']) 
        shortPath = re.search('^/\w+/*$', linkParts[2])
        lp = linkParts[2]
        if shortPath != None:
            log.plog("link excluded because it only has a short path of characters: %s" % linkParts[2], 2)
            os._exit(0)

    ## anything in htmlBlacklist?
    htmlBlacklistQ = mysql_tools.mysqlQuery("select regex from " + siteDB + ".htmlBlacklist", infoModule.info.site['dblink'])
    while True:
        htmlBlacklist = htmlBlacklistQ.fetch_row(1,1)
        if htmlBlacklist == ():
            break
        badSeedHTML = re.search(htmlBlacklist[0]['regex'], infoModule.info.page['rawHTML'])
        if badSeedHTML != None:
            log.plog('html matches htmlBlocker regex: ' + htmlBlacklist[0]['regex'], 3)
            os._exit(0) 
    
    ###################################
    #special case for feedburner sources
    #ernst does not like special cases
    ###################################
    infoModule.info.page['url'] = re.sub('\?.*utm_source.*$', '', infoModule.info.page['url'])
    
    #check AGAIN to see if url is already in system
    escURL = infoModule.info.page['url'].replace("'", "\\'")
    urlCheckQ = mysql_tools.mysqlQuery("select sub_id from " + siteDB + ".newsroom where url='" + escURL + "'", infoModule.info.site['dblink'])
    #don't exit, return false so that a new story can be tried
    if urlCheckQ.num_rows() > 0:
        log.plog("scanpage-url already in newsroom: %s" % infoModule.info.page['url'] , 2)
        log.plog("newsroom_id: " + str(urlCheckQ.fetch_row(1,1)))
        return False
    urlCheckQ = mysql_tools.mysqlQuery("select sub_id from " + siteDB + ".subs where url='" + escURL + "'", infoModule.info.site['dblink'])
    if urlCheckQ.num_rows() > 0:
        
        log.plog("scanpage-url already in subs: %s" % infoModule.info.page['url'], 2)
        log.plog("sub_id: " + str(urlCheckQ.fetch_row(1,1)))
        return False


    ## if source is '0', try to find source
    if infoModule.info.source['source_id'] == '0':
        sourceRegexQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".sources where url_regex != ''", infoModule.info.site['dblink'])
        while True:
            sourceRegex = sourceRegexQ.fetch_row(1,1)
            if sourceRegex == ():
                break
            urlTest = re.search(sourceRegex[0]['url_regex'], infoModule.info.page['url'])
            if urlTest != None:
                log.plog('found source via regex: ' + sourceRegex[0]['title'], 2)
                infoModule.info.source = sourceRegex[0]
                for i in infoModule.info.source.keys():
                    ## this is sort of hack-y, but stupid python returns None for null
                    if infoModule.info.source[i] == None:
                        infoModule.info.source[i] = ''

                break
    
    ## maybe check last modified header and don't get stories older than 7 days?
    '''possibleAgeInDays = dateGuesser.urlDateGuesser(infoModule.info.page['url'])
    if possibleAgeInDays != None:
        log.plog("age of story might be: " + str(possibleAgeInDays) + " based on " + infoModule.info.page['url'], 2)
        if int(possibleAgeInDays) > 5:
            log.plog("story is " + str(possibleAgeInDays) + " days old.  Not reading", 2)
            return False
'''
    if len(infoModule.info.page['rawHTML']) > 500000:
        log.plog("article length exceeds 500k, probably not html", 2)
        os._exit(0)

    #add meta description into the mix
    infoModule.info.page['meta_description'] = ''
    meta_search = re.search('meta name="description" content="(.*?\s+.*?\s+.*?\s+.*?\s+).*?"', infoModule.info.page['rawHTML'], re.I | re.S)
    if meta_search != None:
        infoModule.info.page['meta_description'] = meta_search.group(1).decode('utf-8')
        log.plog("meta_description: " + infoModule.info.page['meta_description'], 2)
        

    log.plog('======================================= TITLE ================================', 2)
    # get title
    #set HTMLTitle first
    HTMLTitle = re.search('<title>(.*?)<\/title>', infoModule.info.page['rawHTML'], re.S | re.I)
    if HTMLTitle != None:
        infoModule.info.page['HTMLTitle'] = HTMLTitle.group(1)
        log.plog('html title found: ' + infoModule.info.page['HTMLTitle'], 2)
    else:
        infoModule.info.page['HTMLTitle'] = ""
    title = find_title.findTitle()
    if title != False:
        infoModule.info.page['title'] = title
        log.plog('title from regex', 2)
    if 'potential_title' in infoModule.info.page and len(infoModule.info.page['potential_title']) > 0:
        infoModule.info.page['title'] = strip_html.clearHTML(infoModule.info.page['potential_title'])
        log.plog('title from potential_title', 2)
    else:
        infoModule.info.page['title'] = real_title2.realTitle()
        if infoModule.info.page['title'] == False:
            infoModule.info.page['title'] = infoModule.info.page['HTMLTitle']
            log.plog('using html title', 2)
        else: 
            log.plog('title from realTitle', 2)

    if infoModule.info.page['title'] == '':
        log.plog('could not find title for page. Setting to HTML Title', 4)
        infoModule.info.page['title'] = infoModule.info.page['HTMLTitle']

    #clear html from title
    infoModule.info.page['title'] = strip_html.clearHTML(infoModule.info.page['title'])
    #also titleCase the title
    #infoModule.info.page['title'] = infoModule.info.page['title'].title()
    log.plog('final title: ' + infoModule.info.page['title'], 2)


    log.plog('======================================= OUTLINE ================================', 2)        
    ## fetch outline
    if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
        infoModule.info.page['plainText'] = strip_html.clearHTMLFeatures(infoModule.info.page['rawHTML'])
    else:
        infoModule.info.page['plainText'] = strip_html.clearHTML(infoModule.info.page['rawHTML'])
    
    outline = False
    #this toggle allows for ignoring regex in favor of body_extractor
    if infoModule.info.site['skipBodyRegex'] == False:
        storySearch = timeout.TimeoutFunction(find_story.findStoryViaRegex, 2)
        try:
            outline = storySearch()
            #set html block used for imaage, author and links to be what outline returns
            if outline != False:
                infoModule.info.page['imageHTML'] = infoModule.info.page['rawHTML'];
                infoModule.info.page['rawHTML'] = outline
        except TimeoutFunctionException:
            outline = False
            log.plog("ERROR regex timed out for %s" % infoModule.info.source['story_start_marker'], 5)

    #outline = find_story.findStoryViaRegex()
    if outline != False:
        if infoModule.info.page['promoter'] == '0' and infoModule.info.source['source_id'] != '0' and 'source_format' in infoModule.info.source and len(infoModule.info.source['source_format']) > 0:
            #link scoring only happens on rss feeds
            ## parse links in page only in regex block if we have regex
            log.plog('======================================= LINK SCORING ================================', 2)
            links.linkScoring(outline, 'subs')
            links.linkScoring(outline, 'newsroom')
            log.plog('======================================= OUTBOUND LINKS ================================', 2)
            #don't go more than one level deep on blind stories
            links.outboundLinks(outline)
        

        if 'featured_source' in infoModule.info.source and infoModule.info.source['featured_source'] == '1':
            infoModule.info.page['outline'] = strip_html.clearHTMLFeatures(outline)
        else:
            infoModule.info.page['outline'] = strip_html.clearHTML(outline)
    else:
        log.plog('searching for body using body extractor', 2)
        outline = body_extractor.extract(infoModule.info.page['plainText'])
        if outline != False:
            infoModule.info.page['imageHTML'] = infoModule.info.page['rawHTML'];
            abbreviatedHTML = html_body_extractor.html_body_extractor(infoModule.info.page['rawHTML'], outline)
            if abbreviatedHTML != None:
                infoModule.info.page['rawHTML'] = abbreviatedHTML
            infoModule.info.page['outline'] = outline
        else:
            log.plog('could not create an outline for this story!', 5)
            os._exit(0)
        ## parse links in page - no regex, so look in rawHTML for links
        ## if there are widgetBlockers, first clear them from the html
        linkHTML = infoModule.info.page['rawHTML']
        widgetBlacklistQ = mysql_tools.mysqlQuery("select * from " + siteDB + ".widgetBlacklist", infoModule.info.site['dblink'])
        while True:
            widgetBlacklist = widgetBlacklistQ.fetch_row(1,1)
            if widgetBlacklist == ():
                break
            if isinstance(linkHTML, str) == False:
                log.plog('linkHTML is not string', 5)
                os._exit(0)
            wblMatch = re.search(widgetBlacklist[0]['start_text'] + '.*?' + widgetBlacklist[0]['end_text'], linkHTML, re.S | re.I)
            if wblMatch != None:
                log.plog("found widget blacklist for " + widgetBlacklist[0]['start_text'] + '.*?' + widgetBlacklist[0]['end_text'], 2)
                linkHTML = linkHTML.replace(wblMatch.group(0), '')
                mysql_tools.mysqlQuery("update " + siteDB + ".widgetBlacklist set hits=hits+1 where widget_id=" + widgetBlacklist[0]['widget_id'], infoModule.info.site['dblink'])
                
        if infoModule.info.page['promoter'] == '0' and infoModule.info.source['source_id'] != '0' and 'source_format' in infoModule.info.source and len(infoModule.info.source['source_format']) > 0:
            #link scoring only happens on rss feeds
            log.plog('======================================= LINK SCORING ================================', 2)                
            links.linkScoring(linkHTML, 'subs')
            links.linkScoring(linkHTML, 'newsroom')
            log.plog('======================================= OUTBOUND LINKS ================================', 2)
            #don't go more than one level deep on blind stories
            links.outboundLinks(linkHTML)



    log.plog('======================================= IMAGES ================================', 2)
    #find images        
    if 'image_start_marker' in infoModule.info.source:
        image_start_marker = infoModule.info.source['image_start_marker']
    else:
        image_start_marker = ''

    if 'image_end_marker' in infoModule.info.source:
        image_end_marker = infoModule.info.source['image_end_marker']
    else:
        image_end_marker = ''
    imageArray = find_images.findImages(infoModule.info.page['imageHTML'], image_start_marker, image_end_marker)
    if imageArray == None:
        log.plog('could not find image', 3)    
    else:
        x = imageArray[0]
        y = imageArray[1]
        imageURL = imageArray[2]

        if imageURL == '':
            log.plog('could not find image', 3)
        else:
            log.plog('image found: ' + imageURL, 2)
            infoModule.info.page['largestImage'] = imageURL
            infoModule.info.page['maxSize'] = x * y

    log.plog('======================================= IMAGE CREDIT ================================', 2)
    ## image credit if any
    infoModule.info.page['imageSource'] = ''
    if 'image_source_start_marker' in infoModule.info.source and 'image_source_end_marker' in infoModule.info.source:
        imageSource = find_credit.findCredit(infoModule.info.page['rawHTML'], infoModule.info.source['image_source_start_marker'], infoModule.info.source['image_source_end_marker'])
        if imageSource != False:
            infoModule.info.page['imageSource'] = imageSource

    log.plog('======================================= VIDEOS ================================', 2)
    ###look for videos
    videoLink = find_video.findVideoEmbed(infoModule.info.page['rawHTML'])

    if videoLink == False:
        infoModule.info.page['vlink'] = ''
    else:
        log.plog('found video embed', 2)
        infoModule.info.page['vlink'] = videoLink
        vthumb = find_video.findVideoThumb(videoLink)
        if vthumb == False:
            infoModule.info.page['vthumb'] = ''
        else:
            log.plog('found video thumb', 2)
            infoModule.info.page['vthumb'] = vthumb

    log.plog('======================================= AUTHOR ================================', 2)    
    ##author in story?
    if 'author_start_marker' in infoModule.info.source and 'author_end_marker' in infoModule.info.source:
        author = find_author.findAuthor()
        if author != False:
            author = strip_html.clearHTML(author)
            infoModule.info.page['author'] = author
        else:
            infoModule.info.page['author'] = ''
    else:
        infoModule.info.page['author'] = ''
            
    log.plog('======================================= ENTITIES ================================', 2)
    #### find entities
    entities.entityFinder(infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'], True)
    nickname = False
    while nickname is False: 
        try:
            entities.nicknameFinder(infoModule.info.page['title'] + ' ' + infoModule.info.page['outline'], True)
	    nickname = True
        except:
            pass
    ## test cityToTeam
    #cityToTeam.getRelevantEntity()

    entities.setPrimo()

    #### chop outline to 500 chars unless featured
    if 'featured_source' not in infoModule.info.source or infoModule.info.source['featured_source'] == '0':
        infoModule.info.page['outline'] = infoModule.info.page['outline'][0:500] + '...'
    
    if len(infoModule.info.entityList) < 1:
        log.plog("no entities found in story!", 5)
        os._exit(0)

    log.plog('======================================= UNKNOWN ENTITIES ================================', 2)
    ## any unknown entities?
    entityFixedString = infoModule.info.page['title'] + ' ' + infoModule.info.page['outline']
    entityFixedString = entityFixedString.replace("'s", "")
    entityFixedString = re.sub('\W+', ' ', entityFixedString)
    
    find_new_entities.find_new_entities(entityFixedString)
    ## page must have at least one non-hidden entity            
    invisibleTypesQuery = mysql_tools.mysqlQuery("select mptype_id from db_topics.mptypes where visibility='invisible'", infoModule.info.site['dblink'])
    invisibleTypes = ''
    sep = ''
    while True:
        oneType = invisibleTypesQuery.fetch_row(1,1)
        if oneType == ():
            break
        invisibleTypes = invisibleTypes + sep + oneType[0]['mptype_id']
        sep = ','

    sep = ''
    cclist = ''
    for eKey in infoModule.info.entityList.keys():
        cclist = cclist + sep + str(eKey)
        sep = ','


    sql = "select celeb_id from db_topics.celebs where celeb_id in (" + cclist + ") and mptype_id not in (" + invisibleTypes + ")"
    nonHiddenEntitiesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    if nonHiddenEntitiesQ.num_rows() == 0:
        log.plog('no non-hidden entities found in story!', 4)
        os._exit(0)
     
    newSubId = addStory.addStory()
    if newSubId == False:
        log.plog('addStory failed', 5)
        
    else:
        log.plog("Story added.  New sub_id: " + str(newSubId), 2)
        
    os._exit(0)
Example #51
0
''' 
findCelebVerticals 
Takes entities
for each entity, comes up with a percentage for each vertical.  celebrifi / total, politifi / total, sportifi / total
then we add up the percentages for each entity, highest percentage and that's the vertical we get
'''


link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga', 'badromance')
#link = mysql_tools.mysqlConnect(user="******", password="******")
if link == False:
    print "no connection"
    sys.exit(0)
infoModule.info.site['dblink'] = link
infoModule.info.site['log_priority_threshold'] = 3
mysql_tools.mysqlQuery("set wait_timeout = 600", link)
mysql_tools.mysqlQuery("set interactive_timeout = 600", link)

'''
getEntityTotals
takes a single entity id
returns a dict with vertical as key, and value as another dict of stories_total, and storiesWeighted_total
'''
def getEntityTotals(entity_id):
    
    entDict = {}
    sql = 'select celeb_id, stories_total, storiesWeighted_total, vertical from db_topics.celebStatsTotals where celeb_id =' + str(entity_id)
    statsQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    while (1):
        row = statsQ.fetch_row(1,1)
        if row == ():
Example #52
0
def promoteFromNewsroom(sub_id):
    ###################################################################
    ##func: promoteFromNewsroom
    ##param: sub_id
    ##desc: takes newsroom story and makes it life
    ##ret: bool
    ##auth: esr
    ##################################################################
    # if debugmode, this func doesn't run at all
    if 'debug_mode' in infoModule.info.site and infoModule.info.site[
            'debug_mode'] == True:
        log.plog("debug mode.  Not running promoteFromNewsroom", 3)
        return True

    # if newsroom sdatetime > 0000 then copy that to sdatetime, not now()
    siteDB = infoModule.info.site['database']
    if 'source_id' not in infoModule.info.source:
        log.plog("cannot convert newsroom to subs without source_id", 5)
        return False

    sub_id = str(sub_id)

    #check to make sure this newsroom id exists
    sql = "select sub_id from " + siteDB + ".newsroom where sub_id=" + sub_id
    sanityCheckQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    if sanityCheckQ == False or sanityCheckQ.num_rows() == 0:
        log.plog(
            "no such sub_id " + sub_id + " when converting newsroom to sub", 5)
        return False

    log.plog('promoting ' + str(sub_id) + ' from newsroom', 3)
    sql = "insert into " + siteDB + ".subs (status, user_id, sdatetime, firstPosted, votes, score, imgsrc, image_id, title, outline, url, vlink, topPick, vthumb, source_id, sourceType, celebrifier, siteMatrix, title_source, site_1, site_2, site_3, site_4, site_5, site_6, site_7, site_8, site_9, site_10, site_11, site_12, site_13, site_14, site_15, author) select status, user_id, sdatetime, firstPosted, votes, 1, imgsrc, image_id, title, outline, url, vlink, topPick, vthumb, source_id, 'feed', " + infoModule.info.source[
        'source_id'] + ", siteMatrix, title_source, site_1, site_2, site_3, site_4, site_5, site_6, site_7, site_8, site_9, site_10, site_11, site_12, site_13, site_14, site_15, author from " + siteDB + ".newsroom where sub_id=" + str(
            sub_id)
    log.plog(sql, 2)
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    newSubId = infoModule.info.site['dblink'].insert_id()
    if newSubId == 0:
        log.plog("error creating sub from newsroom id " + sub_id, 5)
        return False
    newSubId = str(newSubId)
    log.plog("@~!!~@ : new sub id: " + newSubId, 2)

    #copy over new subs_sites_x sub_site joiner table
    #for each table that exists, copy data
    sql = "show tables from " + siteDB + " like 'newsroom_sites_%'"
    joinerTableQuery = mysql_tools.mysqlQuery(sql,
                                              infoModule.info.site['dblink'])
    while True:
        joiner = joinerTableQuery.fetch_row(1, 1)
        if joiner == ():
            break
        subSiteId = joiner[0]['Tables_in_' + siteDB +
                              ' (newsroom_sites_%)'][15:]
        # select from newsroom_sites_x.  If newsroom id exists there, insert NEW sub ID into subs_sites_x
        sql = "select sub_id from " + siteDB + ".newsroom_sites_" + subSiteId + " where sub_id=" + str(
            sub_id)
        newsroomSubSiteQ = mysql_tools.mysqlQuery(
            sql, infoModule.info.site['dblink'])
        if newsroomSubSiteQ.num_rows() > 0:
            sql = "insert into " + siteDB + ".subs_sites_" + subSiteId + " set sub_id=" + str(
                newSubId)
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            #and delete old record from newsroom_sites_x
            sql = "delete from " + siteDB + ".newsroom_sites_" + subSiteId + " where sub_id=" + str(
                sub_id)
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "insert into " + siteDB + ".subs_celebs (sub_id, celeb_id, primo) select " + newSubId + ", celeb_id, primo from " + siteDB + ".newsroom_celebs where sub_id=" + sub_id
    log.plog(sql, 2)
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "select imgsrc, vlink from " + siteDB + ".newsroom where sub_id=" + sub_id
    picsVideosFlagsQ = mysql_tools.mysqlQuery(sql,
                                              infoModule.info.site['dblink'])
    if picsVideosFlagsQ == False:
        log.plog("picsVideosFlags query failed", 5)
        return False
    picsVideosFlags = picsVideosFlagsQ.fetch_row(1, 1)
    if picsVideosFlags == ():
        log.plog("picsVideosFlags query failed", 5)
        return False

    sql = "delete from " + siteDB + ".newsroom_celebs where sub_id=" + sub_id
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "select celeb_id, primo from " + siteDB + ".subs_celebs where sub_id=" + newSubId
    newStoryEntitiesQ = mysql_tools.mysqlQuery(sql,
                                               infoModule.info.site['dblink'])
    while True:
        newStoryEntity = newStoryEntitiesQ.fetch_row(1, 1)
        if newStoryEntity == ():
            break

        entity = newStoryEntity[0]['celeb_id']
        primo = newStoryEntity[0]['primo']
        sql = "select celeb_id from " + siteDB + ".celebStats where celeb_id=" + entity
        statsExistsQ = mysql_tools.mysqlQuery(sql,
                                              infoModule.info.site['dblink'])
        videoPoints = 0
        picPoints = 0
        if picsVideosFlags[0]['vlink'] != '':
            videoPoints = 1
        if picsVideosFlags[0]['imgsrc'] == 'U':
            picPoints = 1
        if statsExistsQ.num_rows > 0:
            sql = "update " + siteDB + ".celebStats set photos = photos + " + str(
                picPoints) + ", videos = videos + " + str(
                    videoPoints
                ) + ", stories = stories + 1 where celeb_id = " + entity
        else:
            sql = "insert into " + siteDB + ".celebStats set photos = " + str(
                picPoints) + ", videos = " + str(
                    videoPoints) + ", stories = 1, celeb_id = " + entity
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

        #update hourlystats here because newsroom hourly stats aren't counted
        storiesWeighted = 1
        if primo == 'Y':
            storiesWeighted = 10
        elif primo == '2':
            storiesWeighted = 7
        elif primo == '3':
            storiesWeighted = 5
        elif primo == '4':
            storiesWeighted = 3
        #break up stats time by hourly blocks
        hourBlock = int(math.floor(time.time() / 3600))
        statsExistsQ = mysql_tools.mysqlQuery(
            "select celeb_id from " + siteDB +
            ".celebHourlyStats where celeb_id=" + str(entity) +
            " and hourBlock = " + str(hourBlock),
            infoModule.info.site['dblink'])
        if statsExistsQ.num_rows() > 0:
            sql = "update " + siteDB + ".celebHourlyStats set photos = photos + " + str(
                picPoints
            ) + ", videos = videos + " + str(
                videoPoints
            ) + ", stories = stories + 1, storiesWeighted = storiesWeighted  + " + str(
                storiesWeighted) + " where celeb_id = " + str(
                    entity) + " and hourBlock = " + str(hourBlock)
        else:
            sql = "insert into " + siteDB + ".celebHourlyStats set photos = " + str(
                picPoints) + ", videos = " + str(
                    videoPoints) + ", stories = 1, storiesWeighted = " + str(
                        storiesWeighted) + ",  celeb_id = " + str(
                            entity) + ", hourBlock = " + str(hourBlock)

        log.plog(sql, 2)
        #don't count stories towards stats if they are in newsroom
        if 'debug_mode' not in infoModule.info.site or infoModule.info.site[
                'debug_mode'] == False:
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            #update celeb popularity

    sql = "insert into " + siteDB + ".relatedSubs set sub_id1=" + newSubId + ", sub_id2=-1, table1='subs'"
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "delete from " + siteDB + ".newsroom where sub_id=" + sub_id
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "update " + siteDB + ".relatedSubs set sub_id1 = " + newSubId + ", table1='subs' where sub_id1=" + sub_id + " and table1='newsroom'"
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    sql = "update " + siteDB + ".relatedSubs set sub_id2 = " + newSubId + ", table2='subs' where sub_id2=" + sub_id + " and table2='newsroom'"
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    #every time a new story is created, build the relationship xml
    sql = "insert into " + siteDB + ".atom_queue set placed=now(), sub_id=" + str(
        newSubId)
    mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])

    return True
Example #53
0
import sys
import _mysql
import mysql_tools

link = mysql_tools.mysqlConnect('127.0.0.1', 'root', '')
if link == False:
    print 'no connection'
    sys.exit(0)

sql = 'UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2'
mysql_tools.mysqlQuery(sql, link)
print 'DONE - UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2'

print 'START - deleting dupes'
sql = "SELECT COUNT(*) AS count, CONCAT(cid_1, CONCAT('_', cid_2)) AS concat FROM db_topics.celebs_related GROUP BY CONCAT(cid_1, CONCAT('_', cid_2)) HAVING COUNT(*) > 1"
dupeQ = mysql_tools.mysqlQuery(sql, link)

while (1):
    row = dupeQ.fetch_row(1, 1)
    if row == ():
        break
    cid_1, cid_2 = row[0]['concat'].split('_')
    count = int(row[0]['count'])
    if count > 1:
        sql = 'DELETE FROM db_topics.celebs_related WHERE cid_1 = ' + cid_1 + ' AND cid_2 = ' + cid_2 + ' ORDER BY relevance ASC LIMIT ' + str(
            count - 1)
        mysql_tools.mysqlQuery(sql, link)
print "DONE - deleting dupes"

sql = 'DELETE FROM db_topics.celebs_related WHERE relevance IS NULL'
mysql_tools.mysqlQuery(sql, link)
Example #54
0
def addMicrodata(text, entities, fullNames=False):

    # mptypes by category
    organizations = [37, 43, 44, 48, 49, 66, 76, 79, 104, 110, 120]

    if len(entities) == 0:
        log.plog("no entities passed to addMicrodata", 5)
        return text

    if (text == None or text == ''):
        log.plog("no text passed to addMicrodata", 5)
        return text

    #celebList should be sorted from long to short
    entityNameArray = []

    for entity in entities:
        entityNameArray.append(
            [entity,
             len(entityLib.entityLibrary(entity, 'entityName'))])

    entityNameArray = sorted(entityNameArray,
                             key=lambda nameLen: nameLen[1],
                             reverse=True)

    htmlBlocks = []
    #set aside all html so that celeb matches are not made inside
    ctr = 0
    while True:
        reres = re.search('<.*?>', text)
        if reres == None:
            break
        htmlBlocks.append(reres.group(0))
        text = text.replace(reres.group(0), "~*~%d~*~" % ctr)
        #        print"TEXT: " + text
        ctr += 1
    for entityTuple in entityNameArray:
        pprint.pprint(entityTuple)
        if entityLib.entityLibrary(entityTuple[0],
                                   'visibility') != 'invisible':
            name = entityLib.entityLibrary(entityTuple[0], 'entityName')
            name = name.strip()

            itemtype = itemprop = None
            # Person microdata
            if (name != '' and
                (entityLib.entityLibrary(entityTuple[0], 'human') == str(1))):
                itemtype = 'Person'
                itemprop = 'name'
            if int(entityLib.entityLibrary(entityTuple[0],
                                           'mptype_id')) in organizations:
                itemtype = 'Person'
                itemprop = 'affiliation'
            if itemtype and itemprop != None:
                itemscope = '''<span itemscope itemtype="http://www.data-vocabulary.org/''' + itemtype + '''">'''
                step1 = re.search(
                    "([^\]=-])\\b" + name + "('*s*)\\b([^\[]){1}", text, re.I)
                if step1 != None:
                    text = text.replace(
                        step1.group(0),
                        step1.group(1) + itemscope + '<span itemprop="' +
                        itemprop + '">' + name + '</span></span>' +
                        step1.group(2) + step1.group(3))
                step2 = re.search("^" + name + "(\'*s*)\\b([^\[]){1}", text)
                if step2 != None:
                    text = text.replace(
                        step2.group(0), itemscope + '<span itemprop="' +
                        itemprop + '">' + name + '</span></span>' +
                        step2.group(1) + step2.group(2))
                step3 = re.search('\\b' + name + '$', text, re.I)
                if step3 != None:
                    text = text.replace(
                        step3.group(0), itemscope + '<span itemprop="' +
                        itemprop + '">' + name + '</span></span>')

    # now check by first and last name
    #in between passes, alter text to have ~(num)~ where the celeb blocks are
    tmpArray = []
    tmpCtr = 0
    # the num points to the array containing the bit so that it can be rebuilt after nicknames are run.
    if fullNames == False:
        #only do first and last name if the fullNames (meaning require full name) isn't set to true
        while True:
            squareBlocks = re.search('<span itemscope.*?<\/span><\/span>',
                                     text)
            pprint.pprint(squareBlocks)
            if squareBlocks == None:
                break
            tmpArray.append(squareBlocks.group(0))
            text = text.replace(squareBlocks.group(0),
                                "~#~" + str(tmpCtr) + "~#~")
            tmpCtr += 1

        for entityTuple in entityNameArray:
            pprint.pprint(entityTuple)
            if entityLib.entityLibrary(entityTuple[0],
                                       'visibility') != 'invisible':
                lname = entityLib.entityLibrary(entityTuple[0], 'lname')
                if lname != None:
                    lname = lname.strip()
                fname = entityLib.entityLibrary(entityTuple[0], 'fname')
                if fname != None:
                    fname = fname.strip()

                itemtype = itemprop = None
                if (lname != '' and (entityLib.entityLibrary(
                        entityTuple[0], 'human') == str(1))):
                    itemtype = 'Person'
                    itemprop = 'name'
                if (fname != '' and (entityLib.entityLibrary(
                        entityTuple[0], 'human') == str(1))):
                    itemtype = 'Person'
                    itemprop = 'name'
                # affiliation microdata
                # Get mytype_id and see if res is in organization array
                if int(entityLib.entityLibrary(entityTuple[0],
                                               'mptype_id')) in organizations:
                    itemtype = 'Person'
                    itemprop = 'affiliation'
                if itemtype and itemprop != None:
                    itemscope = '''<span itemscope itemtype="http://www.data-vocabulary.org/''' + itemtype + '''">'''
                    # don't use first and last name if fname or lname is a number
                    try:
                        int(fname)
                    except ValueError:
                        if fname != None and fname != '':
                            step1 = re.search(
                                "([^\]=-])\\b" + fname + "('*s*)\\b([^\[]){1}",
                                text, re.I)
                            if step1 != None:
                                text = text.replace(
                                    step1.group(0),
                                    step1.group(1) + itemscope +
                                    '<span itemprop="' + itemprop + '">' +
                                    fname + '</span></span>' + step1.group(2) +
                                    step1.group(3))
                            step2 = re.search(
                                "^" + fname + "(\'*s*)\\b([^\[]){1}", text)
                            if step2 != None:
                                text = text.replace(
                                    step2.group(0),
                                    itemscope + '<span itemprop="' + itemprop +
                                    '">' + fname + '</span></span>' +
                                    step2.group(1) + step2.group(2))
                            step3 = re.search('\\b' + fname + '$', text, re.I)
                            if step3 != None:
                                text = text.replace(
                                    step3.group(0),
                                    itemscope + '<span itemprop="' + itemprop +
                                    '">' + fname + '</span></span>')
                    try:
                        int(lname)
                    except ValueError:
                        if lname != None and lname != '':
                            step1 = re.search(
                                "([^\]=-])\\b" + lname + "('*s*)\\b([^\[]){1}",
                                text, re.I)
                            if step1 != None:
                                text = text.replace(
                                    step1.group(0),
                                    step1.group(1) + itemscope +
                                    '<span itemprop="' + itemprop + '">' +
                                    lname + '</span></span>' + step1.group(2) +
                                    step1.group(3))
                            step2 = re.search(
                                "^" + lname + "(\'*s*)\\b([^\[]){1}", text)
                            if step2 != None:
                                text = text.replace(
                                    step2.group(0),
                                    itemscope + '<span itemprop="' + itemprop +
                                    '">' + lname + '</span></span>' +
                                    step2.group(1) + step2.group(2))
                            step3 = re.search('\\b' + lname + '$', text, re.I)
                            if step3 != None:
                                text = text.replace(
                                    step3.group(0),
                                    itemscope + '<span itemprop="' + itemprop +
                                    '">' + lname + '</span></span>')

    #nicknames, after the rest are done
    #to prep for nicknames, alter text to have ~(num)~ where the celeb blocks are
    # the num points to the array containing the bit so that it can be rebuilt after nicknames are run.
    while True:
        squareBlocks = re.search('<span itemscope.*?<\/span><\/span>', text)
        if squareBlocks == None:
            break
        tmpArray.append(squareBlocks.group(0))
        text = text.replace(squareBlocks.group(0), "~#~" + str(tmpCtr) + "~#~")
        tmpCtr += 1

    for entityTuple in entityNameArray:
        if entityLib.entityLibrary(entityTuple[0],
                                   'visibility') != 'invisible':
            itemtype = itemprop = None
            if entityLib.entityLibrary(entityTuple[0], 'human') == str(1):
                itemtype = 'Person'
                itemprop = 'name'
            # affiliation microdata
            # Get mytype_id and see if res is in organization array
            if int(entityLib.entityLibrary(entityTuple[0],
                                           'mptype_id')) in organizations:
                itemtype = 'Person'
                itemprop = 'affiliation'
            if itemtype and itemprop != None:
                itemscope = '''<span itemscope itemtype="http://www.data-vocabulary.org/''' + itemtype + '''">'''
                nicknamesQ = mysql_tools.mysqlQuery(
                    "select name, case_sensitive from db_topics.nicknames where cid_1="
                    + str(entityTuple[0]) + " or cid_2=" +
                    str(entityTuple[0]) + " or cid_3=" + str(entityTuple[0]),
                    infoModule.info.site['dblink'])
                while True:
                    nicknameRow = nicknamesQ.fetch_row(1, 1)
                    if nicknameRow == ():
                        break
                    if nicknameRow[0]['case_sensitive'] == 1:
                        nicknameMatch = re.search(
                            '\\b' + nicknameRow[0]['name'] + '\\b', text, re.I)
                    else:
                        nicknameMatch = re.search(
                            '\\b' + nicknameRow[0]['name'] + '\\b', text)
                    if nicknameMatch and itemtype != None:
                        text = text.replace(
                            nicknameMatch.group(0),
                            itemscope + '<span itemprop="' + itemprop + '">' +
                            nicknameMatch.group(0) + '</span></span>')

                    #take it out and store it off to prevent more dupes
                    while True:
                        squareBlocks = re.search(
                            '<span itemscope.*?<\/span><\/span>', text)
                        if squareBlocks == None:
                            break
                        tmpArray.append(squareBlocks.group(0))
                        text = text.replace(squareBlocks.group(0),
                                            "~#~" + str(tmpCtr) + "~#~")
                        tmpCtr += 1

    while True:
        repBlock = re.search('~#~(\d+)~#~', text)
        if repBlock == None:
            break
        text = text.replace(repBlock.group(0),
                            tmpArray[int(repBlock.group(1))])

#    text = text.replace("[/celeb]'s", "'s[/celeb]")
#    text = text.replace("[/celeb]s", "s[/celeb]")

    for i in range(len(htmlBlocks)):
        text = text.replace('~*~' + str(i) + '~*~', htmlBlocks[i])

    return text
Example #55
0
def getTeamFromCity():
    ###################################################################
    ##func: getRelevantEntity
    ##param: none (takes entities from infoModule.info.entityList)
    ##desc: determines missing team given city(s) and athlete(s), by
    ##      summing the relevance scores of story city(s)/all teams with
    ##      story athlete(s)/all teams.
    ##ret: cid of team with highest relevance
    ##auth: mdk
    ##################################################################

    cityIds = []
    athleteIds = []

    for cid in infoModule.info.entityList.keys():
        cidType = entityLib.entityLibrary(cid, 'celeb_type')
        if cidType == 'Team':
            return False
        if cidType == 'City':
            cityIds.append(cid)
        elif cidType == 'Athlete':
            athleteIds.append(cid)
#        else:
#            print cidType + " " + str(cid)

# exit if no cities identified
    if len(cityIds) == 0:
        return False

    # exit if no athletes identified
    if len(athleteIds) == 0:
        return False

    #log.plog('cityIds: ' + cityIds + 'athleteIds: ' + athleteIds, 2)

    athleteDict = {}
    cityDict = {}

    if len(cityIds) > 1:
        cityidIndexCnt = len(cityIds)
    else:
        cityidIndexCnt = 1
    cityIterator = 1
    while cityIterator <= cityidIndexCnt:
        for id in cityIds:
            cityDict[cityIterator] = {}
            # create dict of team => relevance
            sql = 'SELECT cid_2, relevance FROM db_topics.celebs_related, db_topics.celebs WHERE celebs_related.cid_2=celebs.celeb_id and cid_1 = ' + id + ' AND mptype_id = 75'
            cityTeamRelQ = mysql_tools.mysqlQuery(
                sql, infoModule.info.site['dblink'])
            while (1):
                cityTeamRel = cityTeamRelQ.fetch_row(1, 1)
                if cityTeamRel == ():
                    break
                key = int(cityTeamRel[0]['cid_2'])
                value = int(cityTeamRel[0]['relevance'])
                cityDict[cityIterator][key] = value
            cityIterator += 1

    for id in athleteIds:
        sql = 'SELECT cid_2, relevance FROM db_topics.celebs_related, db_topics.celebs WHERE celebs_related.cid_2=celebs.celeb_id and cid_1 = ' + id + ' AND mptype_id = 75'
        athleteTeamRelQ = mysql_tools.mysqlQuery(
            sql, infoModule.info.site['dblink'])
        athleteTeamRel = athleteTeamRelQ.fetch_row(1, 1)
        while (1):
            athleteTeamRel = athleteTeamRelQ.fetch_row(1, 1)
            if athleteTeamRel == ():
                break
            key = int(athleteTeamRel[0]['cid_2'])
            value = int(athleteTeamRel[0]['relevance'])
            if key in athleteDict:
                athleteDict[key] += value
            else:
                athleteDict[key] = value

    if not athleteDict and not cityDict:
        return False
    # add cityDict values to athleteDict values to get one master dict of team => relevance
    if cityDict and athleteDict:
        cityIterator = 1
        while cityIterator <= cityidIndexCnt:
            for key, value in cityDict[cityIterator].items():
                if key in athleteDict:
                    athleteDict[key] += cityDict[cityIterator][key]
                else:
                    athleteDict[key] = value
            sortedFinalDict = sorted(athleteDict.iteritems(),
                                     key=operator.itemgetter(1),
                                     reverse=True)
            log.plog('picked team =>  ' + str(sortedFinalDict[0]), 2)
            return sortedFinalDict[0]
Example #56
0
import imageCredit
import urllib

## warning.  This sucker messes with the stories.  Don't test it on the live site.

link = mysql_tools.mysqlConnect('192.168.0.115', 'dev_rw', 'dev_rw_pw')
if link == False:
    print "no connection"
    sys.exit(0)

# set database
infoModule.info.site['database'] = 'db_celebrifi'
infoModule.info.site['dblink'] = link

randSourceQ = mysql_tools.mysqlQuery(
    "select * from db_celebrifi.sources where image_source_start_marker != '' order by rand() limit 1",
    link)
randSource = randSourceQ.fetch_row(1, 1)
for key in randSource[0].keys():
    infoModule.info.source[key] = randSource[0][key]

#find recent story from that source
randStoryQ = mysql_tools.mysqlQuery(
    "select url from db_celebrifi.subs where source_id=" +
    randSource[0]['source_id'] + " order by sub_id desc limit 1", link)
randStory = randStoryQ.fetch_row(1, 1)
url = randStory[0]['url']

print url

urlSocket = urllib.urlopen(url)
Example #57
0
def addContentsToDB(json_contents):
    #pprint.pprint(json_contents)
    original_url = json_contents['original_url']
    #if return is status error
    if 'status' not in json_contents['body_extractor']:
        print "ERROR in json_contents format: "
        pprint.pprint(json_contents)
    if json_contents['body_extractor']['status'] == 'ERROR' and json_contents['body_extractor']['error'] == 'Server Busy':
        #server was broken.  add to explode queue for later
        sql = "INSERT IGNORE INTO peepbuzz.explode_queue SET added=now(), original_url='" + original_url + "'"
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        return False
    try:
        be = json_contents['body_extractor']['story']
    except KeyError:
        # failure, so delete story
        infoModule.info.errorList.append("could not read " + original_url + ", deleting")
        sql = "SELECT story_id FROM peepbuzz.stories where original_url='" + original_url + "'"
        filament_query = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        while True:
            filament_id = filament_query.fetch_row(1,1)
            if filament_id == ():
                break
            sql = "DELETE FROM peepbuzz.filaments where story_id=" + str(filament_id[0]['story_id'])
            mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            
        
        sql = "DELETE from peepbuzz.stories where original_url='" + original_url + "'"
        mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
        # log failure to failed_urls
        try:
            fail_reason = json_contents['body_extractor']['error']
        except:
            fail_reason = 'json_contents error not available'
        if 'first_error' in json_contents['body_extractor']:
            fail_reason = json_contents['body_extractor']['first_error'] + ' / ' + fail_reason
        fail_reason = fail_reason.replace("'", "\\\'")
        sql = "INSERT IGNORE into peepbuzz.failed_urls set original_url='" + original_url + "', failed_on=now(), reason='" + fail_reason + "'"
        mysql_tools.mysqlQuery(sql,  infoModule.info.site['dblink'])
        return False
    url = be['url']
    title = be['title']
    outline = be['outline']
    try:
        title = title.replace("'", "\\'")
    except AttributeError:
        return False
    try:
        outline = outline.replace("'", "\\'")
    except AttributeError:
        return False

    
    query = u'UPDATE peepbuzz.stories set title=\''+title+'\', body=\'' + outline + '\', url=\'' + url + '\', sdatetime=NOW() where original_url=\''+original_url+'\''
    try:
        mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
    except:
        return False
    
    sql = "SELECT story_id from peepbuzz.stories WHERE original_url='" + original_url + "'"
    print sql
    storyIDQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
    storyIDRow = storyIDQ.fetch_row(1,1)
    if storyIDRow == ():
        #major error! No story with that original URL?
        return False
        
    story_id = storyIDRow[0]['story_id']
    # chck images
    if(len(be['images'])>0):
        for img in be['images']:
            query = u'insert into peepbuzz.story_images (story_id, url, width, height) values ("'+str(story_id)+'","'+str(img['url'])+'","'+str(img['width'])+'","'+str(img['height'])+'")'
            print query
            try:
                mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
            except:
                return False
    # check videos
    if(len(be['videos'])>0):
        for vid in be['videos']:
            #check for dupes
            sql = "SELECT video_id FROM peepbuzz.story_videos WHERE story_id=" + str(story_id) + " and url='" + str(vid['url']) + "'"
            video_dupe_check_q = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
            if video_dupe_check_q.num_rows() == 0:
                embed_code = vid['embed_code'].replace("'", "\\'")
                query = u"insert into peepbuzz.story_videos (story_id, url, embed_code, width, height) values ('"+str(story_id)+"','"+str(vid['url'])+"','"+embed_code+"','"+str(vid['width'])+"','"+str(vid['height'])+"')"
                try:
                    mysql_tools.mysqlQuery(query, infoModule.info.site['dblink'])
                except:
                    infoModule.info.errorList.append("failed to add video")
                    infoModule.info.errorList.append(query)
    return True