Example #1
0
 def watchlist(self):
     '''Watch a fixed list of user_id'''
     # Clean up database first
     db = misc.mysql_db(self.config['db_server'],
                        self.config['db_username'],
                        self.config['db_password'],
                        self.config['db_database'], self.logger)
     stmt = 'DELETE FROM target_users'
     db.execute(stmt)
     # TODO still have problem
     stmt = ('LOAD DATA LOCAL INFILE "seed.lst" INTO TABLE target_users '
             'FIELDS TERMINATED BY \"\\t\" LINES TERMINATED BY \"\\n\"')
     db.execute(stmt)
     # Get that list first
     self.crawl('seed.lst')
     # Get that list's friend second
     stmt = ('SELECT DISTINCT friend_id FROM friends, target_users '
             'WHERE friends.user_id = target_users.user_id')
     db.execute(stmt)
     results = db.cursor.fetchall()
     db.__del__()
     misc.write_to_files(results, 'initial_friends',
                         self.config['seed_per_file'], 'utf')
     # Enter the generate-crawl-update loop
     self.twalerloop()
Example #2
0
 def __init__(self, config, logger):
     self.config = config
     self.logger = logger
     dir_seeds = self.config['dir_seeds']
     try:
         if not os.path.exists(dir_seeds):
             os.makedirs(dir_seeds)
         self.db = misc.mysql_db(self.config['db_server'],
                 self.config['db_username'],
                 self.config['db_password'],
                 self.config['db_database'], self.logger)
     except Exception as e:
         traceback.print_stack()
         self.logger.error(str(e))