コード例 #1
0
def reset_db(db='test', **db_settings):
    if db == 'test':
        db_settings = {
            'host': 'localhost',
            'db': 'tweets_test',
            'user': '******',
            'passwd': environ.get('TWEETS_TESTDB_PASSWORD')
        }
        redis_settings = {'host': 'localhost', 'db': '0'}
    elif db == 'mbp2':
        db_settings = {
            'host': 'localhost',
            'db': 'twitter',
            'user': '******',
            'passwd': environ.get('TWEETS_DB_PASSWORD')
        }
        redis_settings = {'host': 'localhost', 'db': 2}
    else:
        if not db_settings:
            raise ValueError("Must supply valid db name or settings")
    cache = SafeRedis(**redis_settings)
    cache.flushdb()
    print "Flushed Redis cache"
    db = MySQLdb.connect(**db_settings)
    cursor = db.cursor()
    print 'Wiping {db_name}'.format(db_name=db)
    try:
        for table in ('cluster', 'hashtag', 'hashtag_to_cluster',
                      'hashtag_to_tweet', 'tag_word', 'tag_word_to_hashtag',
                      'url', 'url_to_tweet', 'user_to_tweet', 'user_to_url',
                      'word', 'word_to_tweet', 'twitter_user', 'tweet'):
            cursor.execute("""DELETE FROM `{table}`;""".format(table=table))
            print "Wiped {table}".format(table=table)
    finally:
        db.commit()
コード例 #2
0
 def __init__(self, use_testdb=False, show_sql=True):
     """
     Initialize the Inserter.
     :param test: whether to use the test db/Redis settings
     :type test: bool
     """
     if use_testdb:
         db_settings = {
             'host': 'localhost',
             'db': 'tweets_test',
             'user': '******',
             'passwd': environ.get('TWEETS_TESTDB_PASSWORD')
         }
         redis_settings = {'host': 'localhost', 'db': '0'}
     else:
         db_settings = {
             'host': 'localhost',
             'db': 'twitter',
             'user': '******',
             'passwd': environ.get('TWEETS_DB_PASSWORD')
         }
         redis_settings = {'host': 'localhost', 'db': '2'}
     self.show_sql = show_sql
     self.DB = MySQLdb.connect(**db_settings)
     self.cursor = self.DB.cursor()
     self.cache = SafeRedis(**redis_settings)
     for table in ('cluster', 'hashtag', 'tag_word', 'url', 'word'):
         key = 'pk_{table}'.format(table=table)
         if self.cache.get(key) is None:
             self.cache.set(key, 0)
     self.escape_word = self.DB.escape
コード例 #3
0
ファイル: fix_urls.py プロジェクト: swizzard/tweet_stuff
ROUTER_IP = '24.186.113.22'
REDIS_HOST = '24.186.113.22'
REDIS_PORT = '6666'
REDIS_DB = 1

DOMAIN_PAT = re.compile(r'https?://([\w\d\.\-]+\.\w{2,3})')
ERR_PAT = re.compile(r'host=\'([\w\d\.]+)\'')

if os.path.exists(os.path.expanduser("~/PycharmProjects/tweet_stuff")):
    home_dir = os.path.expanduser("~/PycharmProjects/tweet_stuff")
elif os.path.exists(os.path.expanduser("~/tweet_stuff")):
    home_dir = os.path.expanduser("~/tweet_stuff")
IN_DIR = os.path.join(home_dir, "extracted2")
OUT_DIR = os.path.join(home_dir, "fixed")

CONN = SafeRedis(host=REDIS_HOST, port=REDIS_PORT, db=REDIS_DB)


def resolve_redirects(url):
    print "Resolving {}".format(url)
    cached = CONN.get(url)
    if cached:
        return cached
    session = requests.session()
    try:
        with closing(session.head(url, timeout=300)) as req:
            r = req
    except (exceptions.RequestException, socket.error) as e:
        try:
            requests.head('http://{}'.format(ROUTER_IP)).close()
            CONN.set(url, url)