import re import urllib import mysql_tools import infoModule from log import * # comments : # moved vlink (php) variable to embed_code link = mysql_tools.mysqlConnect("192.168.0.115", "dev_rw", "dev_rw_pw") if link == False: # print "no connection" sys.exit(0) infoModule.info.site["dblink"] = link infoModule.info.site["database"] = "db_celebrifi" debugMode = True def createVideoStory(embed_code, v_celeb_list, source): # embed code, list of dicts, dict recognized = False v_source = {} v_page_info = [] videoSource = [] newVideoSub = [] embed = re.search('<embed.*?src="http:\/\/www.youtube.com\/\w+\/([\w-]+).*?<\/embed>', embed_code, re.I) if embed: id = re.search("\/v\/[\w-]+", embed.group(0))
import infoModule import mysql_tools import _mysql from alogClient import * import sys import find_story import urllib ## warning. This sucker messes with the stories. Don't test it on the live site. link = mysql_tools.mysqlConnect('192.168.0.29', 'rw_failover', 'f@ilfa1l') if link == False : print "no connection" sys.exit(0) # set database infoModule.info.site['database'] = 'db_celebrifi' infoModule.info.site['dblink'] = link randSourceQ = mysql_tools.mysqlQuery("select * from db_sportifi.sources where source_id=6479", link) randSource = randSourceQ.fetch_row(1,1) for key in randSource[0].keys(): infoModule.info.source[key] = randSource[0][key] #find recent story from that source randStoryQ = mysql_tools.mysqlQuery("select url from db_celebrifi.subs where source_id=" + randSource[0]['source_id'] + " order by sub_id desc limit 1", link) randStory = randStoryQ.fetch_row(1,1) url = randStory[0]['url'] url = 'http://nhlhotstove.com/the-price-was-right-and-price-it-is/?utm_source=feedburner&utm_medium=feed&utm_campaign=Feed%3A+NHLHotStove+%28The+Hot+Stove%29'
import mysql_tools import infoModule import entities import pprint import sys import string import urllib from entityLib import * from alogClient import * import cgi from datetime import datetime import time import json #link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga', 'badromance') link = mysql_tools.mysqlConnect('192.168.0.115', 'root', 'datafl0w') #link = mysql_tools.mysqlConnect('127.0.0.1', 'root', 'sidd') if link == False: print "no connection" sys.exit(0) mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) infoModule.info.site['log_priority_threshold'] = 100 infoModule.info.site['dblink'] = link def read_timeFrame(json_data): data = json.loads(json_data) length = len(data) stor_dict = {}
import sys import _mysql import mysql_tools link = mysql_tools.mysqlConnect('127.0.0.1', 'root', '') if link == False: print 'no connection' sys.exit(0) sql = 'UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2' mysql_tools.mysqlQuery(sql, link) print 'DONE - UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2' print 'START - deleting dupes' sql = "SELECT COUNT(*) AS count, CONCAT(cid_1, CONCAT('_', cid_2)) AS concat FROM db_topics.celebs_related GROUP BY CONCAT(cid_1, CONCAT('_', cid_2)) HAVING COUNT(*) > 1" dupeQ = mysql_tools.mysqlQuery(sql, link) while (1): row = dupeQ.fetch_row(1, 1) if row == (): break cid_1, cid_2 = row[0]['concat'].split('_') count = int(row[0]['count']) if count > 1: sql = 'DELETE FROM db_topics.celebs_related WHERE cid_1 = ' + cid_1 + ' AND cid_2 = ' + cid_2 + ' ORDER BY relevance ASC LIMIT ' + str( count - 1) mysql_tools.mysqlQuery(sql, link) print "DONE - deleting dupes" sql = 'DELETE FROM db_topics.celebs_related WHERE relevance IS NULL' mysql_tools.mysqlQuery(sql, link)
from alogClient import * import infoModule import mysql_tools import _mysql import links import urllib import pprint link = mysql_tools.mysqlConnect("192.168.0.29", "rw_failover", "f@ilfa1l") if link == False: print "no connection" sys.exit(0) # set database infoModule.info.site["database"] = "db_sportifi" infoModule.info.site["dblink"] = link infoModule.info.site["read_blind_stories"] = True infoModule.info.site["debug_mode"] = False infoModule.info.site["log_priority_threshold"] = 1 mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) sql = "select 'http://www.rantsports.com/redzonetalk/20100910-a-proposition-for-denver-broncos-tim-tebow/' as url, 6367 as source_id" randomStoriesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site["dblink"]) ctr = 0 while True: randomStory = randomStoriesQ.fetch_row(1, 1) if randomStory == ():
import infoModule infoModule.info.site['remoteLogging'] = False import mysql_tools import _mysql from getNewEntities import * infoModule.info.source['body_extractor_host'] = 'angelina.celebrifi.com' infoModule.info.source['body_extractor_port'] = '1348' link = mysql_tools.mysqlConnect('localhost', 'root', '4rf5tg') if link == False: print "no connection" sys.exit(0) infoModule.info.site['database'] = 'peepbuzz' infoModule.info.site['dblink'] = link if (getNewEntities()): print("hoorah") else: print("boo")
import infoModule import mysql_tools import _mysql from alogClient import * import sys import imageCredit import urllib ## warning. This sucker messes with the stories. Don't test it on the live site. link = mysql_tools.mysqlConnect('192.168.0.115', 'dev_rw', 'dev_rw_pw') if link == False: print "no connection" sys.exit(0) # set database infoModule.info.site['database'] = 'db_celebrifi' infoModule.info.site['dblink'] = link randSourceQ = mysql_tools.mysqlQuery( "select * from db_celebrifi.sources where image_source_start_marker != '' order by rand() limit 1", link) randSource = randSourceQ.fetch_row(1, 1) for key in randSource[0].keys(): infoModule.info.source[key] = randSource[0][key] #find recent story from that source randStoryQ = mysql_tools.mysqlQuery( "select url from db_celebrifi.subs where source_id=" + randSource[0]['source_id'] + " order by sub_id desc limit 1", link) randStory = randStoryQ.fetch_row(1, 1)
# Get the time into a usable format unformattedTime = time.strptime(date, '%Y-%m-%dT%H:%M:%S') theirTimeInSecs = time.mktime(unformattedTime) ourTimeInSecs = time.time() if theirTimeInSecs > ourTimeInSecs: #thank you facebook for putting time in the future unformattedTime = time.localtime() return time.strftime('%Y-%m-%d %H:%M:%S', unformattedTime) if __name__ == "__main__": db_host = '192.168.0.115' db_user = '******' db_pass = '******' link = mysql_tools.mysqlConnect(db_host, db_user, db_pass) if link == False: log.plog("no connection", 5) sys.exit(0) print storeFilament( 1, json.dumps({ "created": "2011-02-23T16:42:40+0000", "story_id": 1, "account_id": 1, "external_id": "1", "summary": "test", "title": None, 'stream_name': "twitter" }))
infoModule.info.page['rawHTML']) outline = find_story.findStoryViaRegex() if outline != False: if 'featured_source' in infoModule.info.source and infoModule.info.source[ 'featured_source'] == '1': infoModule.info.page['outline'] = strip_html.clearHTMLFeatures( outline) else: infoModule.info.page['outline'] = strip_html.clearHTML(outline) print "<b>Outline:</b> " + infoModule.info.page['outline'] + "<br />" print "<hr>" link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga_rw', 'gaga_rw_pass') if link == False: #print "no connection" sys.exit(0) site = sys.argv[1] if site == 'celebrifi': infoModule.info.site['database'] = 'db_celebrifi' infoModule.info.site['dblink'] = link infoModule.info.site['read_blind_stories'] = True infoModule.info.site['debug_mode'] = False infoModule.info.site['log_priority_threshold'] = 100 infoModule.info.site['imageReceiver'] = "http://dev.celebrifi.com/" infoModule.info.site['maxEntities'] = 15 infoModule.info.site['overrideImageMinSize'] = False
import mysql_tools import _mysql import infoModule import urllib from alogClient import * import time import highlightEntities #import create_video_story import addStory ################################# link = mysql_tools.mysqlConnect('192.168.0.115', 'dev_rw', 'dev_rw_pw') if link == False : print "no connection" sys.exit(0) infoModule.info.site['database'] = 'db_celebrifi' infoModule.info.site['dblink'] = link infoModule.info.site['maxEntities'] = 15 infoModule.info.site['read_blind_stories'] = False infoModule.info.site['debug_mode'] = False infoModule.info.site['log_priority_threshold'] = 2 infoModule.info.page['outline'] = 'this was the outline' + chr(255) + ' and here wasn\'t a special character in the middle' infoModule.info.page['title'] = 'this is a title ak' infoModule.info.page['url'] = 'http://www2.newdisorder.com/ak' addStory.addStory() print infoModule.info.page['outline']
from store_discussion_element import * import infoModule import _mysql import mysql_tools link = mysql_tools.mysqlConnect("localhost", "root", "") if link == False: print "no connection" sys.exit(0) infoModule.info.site["database"] = "peepbuzz" infoModule.info.site["dblink"] = link json = '{"discussions": [{"body": "Lucky duck. I wish I had a bad-ass radio show :)", "comment_created": "2011-02-23T17:23:58+0000", "user_name": "Jeri Casper", "user_id": "612585195"}, {"body": "The closer you are to a story", "comment_created": "2011-02-23T17:25:24+0000", "user_name": "James Elliott", "user_id": "1052944544"}]}' filament_id = 1 res = store_discussion_element(json, filament_id) print res
from real_title import * import pprint import os from strip_html import clearHTML log.plog( "#####################################################################") log.plog( "#####################################################################") log.plog( "#####################################################################") log.plog( "#####################################################################") log.plog( "#####################################################################") link = mysql_tools.mysqlConnect("192.168.0.115", "dev_rw", "dev_rw_pw") link.select_db("db_celebrifi") if link == False: print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 2 try: story_url = sys.argv[1] if story_url: print "passed in story url" + story_url try: storyQ = mysql_tools.mysqlQuery( '''select sub_id from db_celebrifi.subs where url="%s"''' %
from alogClient import * import infoModule import mysql_tools import _mysql import links import urllib import pprint link = mysql_tools.mysqlConnect('192.168.0.29', 'rw_failover', 'f@ilfa1l') if link == False: print "no connection" sys.exit(0) # set database infoModule.info.site['database'] = 'db_sportifi' infoModule.info.site['dblink'] = link infoModule.info.site['read_blind_stories'] = True infoModule.info.site['debug_mode'] = False infoModule.info.site['log_priority_threshold'] = 1 mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) sql = "select 'http://www.rantsports.com/redzonetalk/20100910-a-proposition-for-denver-broncos-tim-tebow/' as url, 6367 as source_id" randomStoriesQ = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink']) ctr = 0 while True: randomStory = randomStoriesQ.fetch_row(1, 1) if randomStory == (): break print "=================================== " + str(
plixiURL = [ 'http://plixi.com/p/92656574' ] tweetymailURL = [ 'http://tweetymail.com/users/DomusWeb/pics/1302886344_0.jpeg' ] vimeoURLs = [ 'http://vimeo.com/22088150', 'http://www.vimeo.com/22044571', ] if __name__ == "__main__": infoModule.info.site['dblink'] = mysql_tools.mysqlConnect('127.0.0.1','root','') for url in twitPicURLs: print twitPicExtract(url) #for url in imglyURLs: # imglyPicExtract(url) #for url in yfrogURL: # print yfrogPicExtract(url) #for url in plixiURL: # print plixiPicExtract(url) #for url in tweetymailURL: # print tweetymailPicExtract(url)
infoModule.info.page['outline'] = strip_html.clearHTMLFeatures(outline) else: infoModule.info.page['outline'] = strip_html.clearHTML(outline) infoModule.info.page['outline'] = infoModule.info.page['outline'].decode('utf-8') infoModule.info.page['outline'] = infoModule.info.page['outline'].encode('ascii', 'xmlcharrefreplace') print str(step) print startMarker print endMarker print infoModule.info.page['outline'] else: print "no match" link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga_rw', 'gaga_rw_pass') if link == False : print "no connection" sys.exit(0) # set globals for site site = sys.argv[1] #site = 'sportifi' if site == 'celebrifi': infoModule.info.site['database'] = 'db_celebrifi' infoModule.info.site['dblink'] = link infoModule.info.site['read_blind_stories'] = True infoModule.info.site['debug_mode'] = False
import infoModule import mysql_tools import entityLib import sys ## takes no arg or argv[1]=features to define which clearHTML function to run link = mysql_tools.mysqlConnect() if link == False : print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 3 if len(sys.argv) > 1 and int(sys.argv[1]) > 0: #get specific sub_id print "looking up entity_id: " + sys.argv[1] cid = sys.argv[1] print "%s:" % cid print "\t%s" % entityLib.entityLibrary(int(cid), 'entityName') print "\t%s" % entityLib.entityLibrary(int(cid), 'lookupUrl') print "\t%s" % entityLib.entityLibrary(int(cid), 'bio') else: sql = "select celeb_id from db_topics.celebs order by rand() limit 20" er = mysql_tools.mysqlQuery(sql, infoModule.info.site['dblink'])
import mysql_tools import infoModule from alogClient import * import sys from feedfetcher import * import pprint import os link = mysql_tools.mysqlConnect("localhost","root","hamm1234") link.select_db("db_celebrifi") if link == False : #print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 2 cr = mysql_tools.mysqlQuery("""select * from sources where source_format = 'atom' limit 50""",link) if cr == False: #print "query failed" sys.exit(0) while(1): row = cr.fetch_row(1,1) if row==(): break url = row[0]['feed_url'] pid = os.fork() if pid: #print "url: " + url obj = getFeed(url)
# Convert 2011-02-23T16:42:40+0000 into a timestamp for mysql (2011-02-24 17:37:08) def convertDate(date=None): date = date[:-5] # Get the time into a usable format unformattedTime = time.strptime(date, '%Y-%m-%dT%H:%M:%S') theirTimeInSecs = time.mktime(unformattedTime) ourTimeInSecs = time.time() if theirTimeInSecs > ourTimeInSecs: #thank you facebook for putting time in the future unformattedTime = time.localtime() return time.strftime('%Y-%m-%d %H:%M:%S', unformattedTime) if __name__ == "__main__": db_host = '192.168.0.115' db_user = '******' db_pass = '******' link = mysql_tools.mysqlConnect(db_host, db_user, db_pass) if link == False : log.plog("no connection", 5) sys.exit(0) print storeFilament(1, json.dumps({ "created" : "2011-02-23T16:42:40+0000", "story_id" : 1, "account_id" : 1, "external_id" : "1", "summary" : "test", "title" : None, 'stream_name' : "twitter" }))
'foodifi', 'geekifi', 'healthifi', 'politifi', 'sportifi', 'travelifi'] ''' findCelebVerticals Takes entities for each entity, comes up with a percentage for each vertical. celebrifi / total, politifi / total, sportifi / total then we add up the percentages for each entity, highest percentage and that's the vertical we get ''' link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga', 'badromance') #link = mysql_tools.mysqlConnect(user="******", password="******") if link == False: print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 3 mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) ''' getEntityTotals takes a single entity id returns a dict with vertical as key, and value as another dict of stories_total, and storiesWeighted_total ''' def getEntityTotals(entity_id):
import sys import _mysql import mysql_tools link = mysql_tools.mysqlConnect('127.0.0.1', 'root', '') if link == False: print 'no connection' sys.exit(0) sql = 'UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2' mysql_tools.mysqlQuery(sql, link) print 'DONE - UPDATE db_topics.celebs_related SET cid_1=cid_2, cid_2=cid_1 where cid_1 > cid_2' print 'START - deleting dupes' sql = "SELECT COUNT(*) AS count, CONCAT(cid_1, CONCAT('_', cid_2)) AS concat FROM db_topics.celebs_related GROUP BY CONCAT(cid_1, CONCAT('_', cid_2)) HAVING COUNT(*) > 1" dupeQ = mysql_tools.mysqlQuery(sql, link) while (1): row = dupeQ.fetch_row(1,1) if row == (): break cid_1, cid_2 = row[0]['concat'].split('_') count = int(row[0]['count']) if count > 1: sql = 'DELETE FROM db_topics.celebs_related WHERE cid_1 = ' + cid_1 + ' AND cid_2 = ' + cid_2 + ' ORDER BY relevance ASC LIMIT ' + str(count - 1) mysql_tools.mysqlQuery(sql, link) print "DONE - deleting dupes" sql = 'DELETE FROM db_topics.celebs_related WHERE relevance IS NULL' mysql_tools.mysqlQuery(sql, link) print "DONE - deleting NULL relevance instances"
facebookURL = [ 'http://photos-a.ak.fbcdn.net/hphotos-ak-ash4/215281_10150152865511828_43225541827_7197739_1886369_s.jpg' ] plixiURL = ['http://plixi.com/p/92656574'] tweetymailURL = ['http://tweetymail.com/users/DomusWeb/pics/1302886344_0.jpeg'] vimeoURLs = [ 'http://vimeo.com/22088150', 'http://www.vimeo.com/22044571', ] if __name__ == "__main__": infoModule.info.site['dblink'] = mysql_tools.mysqlConnect( '127.0.0.1', 'root', '') for url in twitPicURLs: print twitPicExtract(url) #for url in imglyURLs: # imglyPicExtract(url) #for url in yfrogURL: # print yfrogPicExtract(url) #for url in plixiURL: # print plixiPicExtract(url) #for url in tweetymailURL: # print tweetymailPicExtract(url)
import infoModule import mysql_tools import _mysql from alogClient import * import sys import newsroomToSubs import pprint ## warning. This sucker messes with the stories. Don't test it on the live site. link = mysql_tools.mysqlConnect(host='127.0.0.1', user='******', password='') if link == False: print "no connection" sys.exit(0) sql = "show tables from db_celebrifi like 'sub%'" joinerTableQuery = mysql_tools.mysqlQuery(sql, link) while True: joiner = joinerTableQuery.fetch_row(1, 1) if joiner == (): break pprint.pprint(joiner) print joiner[0]['Tables_in_db_celebrifi (sub%)'] # set database #infoModule.info.site['database'] = 'db_sportifi' #infoModule.info.site['dblink'] = link #infoModule.info.source['source_id'] = '15' #newsroomToSubs.promoteFromNewsroom(sys.argv[1])
import mysql_tools import strip_html import infoModule import entities import pprint import sys import find_title import body_extractor import urllib from alogClient import * import find_story link = mysql_tools.mysqlConnect('192.168.0.115', 'root', 'datafl0w') if link == False : print "no connection" sys.exit(0) mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) # set globals for site infoModule.info.site['database'] = 'db_politifi' infoModule.info.site['dblink'] = link infoModule.info.site['read_blind_stories'] = True infoModule.info.site['debug_mode'] = False infoModule.info.site['log_priority_threshold'] = 1 infoModule.info.site['imageReceiver'] = "http://dev.celebrifi.com/" infoModule.info.site['maxEntities'] = 15 infoModule.info.site['overrideImageMinSize'] = False infoModule.info.site['imageMinSize'] = 300 * 255
import infoModule import mysql_tools import _mysql from alogClient import * import sys import newsroomToSubs import pprint ## warning. This sucker messes with the stories. Don't test it on the live site. link = mysql_tools.mysqlConnect(host='127.0.0.1', user='******', password='') if link == False : print "no connection" sys.exit(0) sql = "show tables from db_celebrifi like 'sub%'" joinerTableQuery = mysql_tools.mysqlQuery(sql, link) while True: joiner = joinerTableQuery.fetch_row(1,1) if joiner == (): break pprint.pprint(joiner) print joiner[0]['Tables_in_db_celebrifi (sub%)'] # set database #infoModule.info.site['database'] = 'db_sportifi' #infoModule.info.site['dblink'] = link #infoModule.info.source['source_id'] = '15' #newsroomToSubs.promoteFromNewsroom(sys.argv[1])
import infoModule import mysql_tools import _mysql from hashtagUpdate import * link = mysql_tools.mysqlConnect('localhost', 'root', '4rf5tg') if link == False : print "no connection" sys.exit(0) infoModule.info.site['database'] = 'peepbuzz' infoModule.info.site['dblink'] = link hashtagUpdate("suckerfree")
import json import mysql_tools import re verticals = [ 'bizifi', 'carifi', 'celebrifi', 'foodifi', 'geekifi', 'healthifi', 'politifi', 'sportifi', 'travelifi' ] ''' findCelebVerticals Takes entities for each entity, comes up with a percentage for each vertical. celebrifi / total, politifi / total, sportifi / total then we add up the percentages for each entity, highest percentage and that's the vertical we get ''' link = mysql_tools.mysqlConnect('192.168.0.99', 'gaga', 'badromance') #link = mysql_tools.mysqlConnect(user="******", password="******") if link == False: print "no connection" sys.exit(0) infoModule.info.site['dblink'] = link infoModule.info.site['log_priority_threshold'] = 3 mysql_tools.mysqlQuery("set wait_timeout = 600", link) mysql_tools.mysqlQuery("set interactive_timeout = 600", link) ''' getEntityTotals takes a single entity id returns a dict with vertical as key, and value as another dict of stories_total, and storiesWeighted_total '''