def test_mark_as_read(self): oth = Reddit(USER_AGENT) oth.login('PyApiTestUser3', '1111') msg = oth.user.get_unread(limit=1).next() # pylint: disable-msg=E1101 msg.mark_as_read() self.assertTrue(msg not in list(oth.user.get_unread(limit=5)))
def main(): global device global graphname print(socket.gethostname()) seed = 0 if not download: mp.set_start_method('spawn', force=True) outputs = None if "OMPI_COMM_WORLD_RANK" in os.environ.keys(): os.environ["RANK"] = os.environ["OMPI_COMM_WORLD_RANK"] # Initialize distributed environment with SLURM if "SLURM_PROCID" in os.environ.keys(): os.environ["RANK"] = os.environ["SLURM_PROCID"] if "SLURM_NTASKS" in os.environ.keys(): os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"] if "MASTER_ADDR" not in os.environ.keys(): os.environ["MASTER_ADDR"] = "127.0.0.1" os.environ["MASTER_PORT"] = "1234" dist.init_process_group(backend='nccl') rank = dist.get_rank() size = dist.get_world_size() print("Processes: " + str(size)) # device = torch.device('cpu') devid = rank_to_devid(rank, acc_per_rank) device = torch.device('cuda:{}'.format(devid)) print(f"device: {device}") torch.cuda.set_device(device) curr_devid = torch.cuda.current_device() # print(f"curr_devid: {curr_devid}", flush=True) devcount = torch.cuda.device_count() if graphname == "Cora": path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', graphname) dataset = Planetoid(path, graphname, transform=T.NormalizeFeatures()) data = dataset[0] data = data.to(device) data.x.requires_grad = True inputs = data.x.to(device) inputs.requires_grad = True data.y = data.y.to(device) edge_index = data.edge_index num_features = dataset.num_features num_classes = dataset.num_classes elif graphname == "Reddit": path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', graphname) dataset = Reddit(path, T.NormalizeFeatures()) data = dataset[0] data = data.to(device) data.x.requires_grad = True inputs = data.x.to(device) inputs.requires_grad = True data.y = data.y.to(device) edge_index = data.edge_index num_features = dataset.num_features num_classes = dataset.num_classes elif graphname == 'Amazon': # path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', graphname) # edge_index = torch.load(path + "/processed/amazon_graph.pt") # edge_index = torch.load("/gpfs/alpine/bif115/scratch/alokt/Amazon/processed/amazon_graph_jsongz.pt") # edge_index = edge_index.t_() print(f"Loading coo...", flush=True) edge_index = torch.load("../data/Amazon/processed/data.pt") print(f"Done loading coo", flush=True) # n = 9430088 n = 14249639 num_features = 300 num_classes = 24 # mid_layer = 24 inputs = torch.rand(n, num_features) data = Data() data.y = torch.rand(n).uniform_(0, num_classes - 1).long() data.train_mask = torch.ones(n).long() # edge_index = edge_index.to(device) print(f"edge_index.size: {edge_index.size()}", flush=True) print(f"edge_index: {edge_index}", flush=True) data = data.to(device) # inputs = inputs.to(device) inputs.requires_grad = True data.y = data.y.to(device) elif graphname == 'subgraph3': # path = "/gpfs/alpine/bif115/scratch/alokt/HipMCL/" # print(f"Loading coo...", flush=True) # edge_index = torch.load(path + "/processed/subgraph3_graph.pt") # print(f"Done loading coo", flush=True) print(f"Loading coo...", flush=True) edge_index = torch.load("../data/subgraph3/processed/data.pt") print(f"Done loading coo", flush=True) n = 8745542 num_features = 128 # mid_layer = 512 # mid_layer = 64 num_classes = 256 inputs = torch.rand(n, num_features) data = Data() data.y = torch.rand(n).uniform_(0, num_classes - 1).long() data.train_mask = torch.ones(n).long() print(f"edge_index.size: {edge_index.size()}", flush=True) data = data.to(device) inputs.requires_grad = True data.y = data.y.to(device) if download: exit() if normalization: adj_matrix, _ = add_remaining_self_loops(edge_index, num_nodes=inputs.size(0)) else: adj_matrix = edge_index init_process(rank, size, inputs, adj_matrix, data, num_features, num_classes, device, outputs, run) if outputs is not None: return outputs[0]
def run(self, host, port): self.reddit = Reddit(creds.key, creds.secret, creds.username, creds.password, creds.redirect_uri) self.reddit.updateToken() self.reddit.testAccess() sleeptime = 0 while True: if sleeptime > 10: time.sleep(10) elif sleeptime > 1: time.sleep(1) # Connect to host:port, get the fp fp = self.connect(host, port) # Send hostname of client over initially hostname = socket.getfqdn() fp.write(hostname + '\n') fp.flush() if debug: print 'Sent hostname' # Recv all the urls reqlist = [] newline = False while True: line = fp.readline() line = line.strip() if line != '': reqlist.append(line.split(',')) else: if newline == True: break newline = True fp.flush() print host + ' >> ' + str(reqlist) # See if any urls were sent, close if zero if len(reqlist) == 0: if debug: print 'No requests' self.close() sleeptime += 1 continue sleeptime = 0 if debug: print 'Downloading requests' # Download all the urls otherwise self.download_data(reqlist) # targzip the data targz = self.targz() # Send the data targz_fp = open(targz, 'rb') targz_data = targz_fp.read() fp.write(targz_data) fp.flush() print host + ' << archive.tar.gz' self.close() self.cleanup()
@perms.owner() async def lock(ctx: commands.Context, *reason: str): # TODO stick this into the db global lockdown_mode lockdown_mode = not lockdown_mode for server in db['server']: await bot.get_channel( server.get('archive_channel') ).send('Lockdown mode ' + ('deactivated\n' if not lockdown_mode else 'activated.\n') + (('Reason:' + ' '.join(reason)) if len(reason) else '')) @bot.command(brief='Restarts the bot.') @perms.owner() async def restart(ctx: commands.Context): await ctx.send('Restarting...') try: await bot.close() except: pass finally: os.system('python main.py') bot.add_cog(Reddit(bot, db)) bot.add_cog(Instagram(bot, db)) bot.run(db['settings'].find_one(name='token')['value'])
#!/usr/bin/env python """ reddit.py - Jenni Reddit Module Author: cdsboy A Jenni Module to recognize reddit links and print their titles """ from tsun import tsunsay from reddit.objects import Submission from reddit import Reddit import re r = Reddit(user_agent="Jenni Irc Bot") @tsunsay() def reddit_link_title(jenni, input): match = re.search(r'https?://[-a-zA-Z0-9.?$!%&/=_~#.,:;+]*', input) if match: try: info = Submission.get_info(r, match.group(0)) except ValueError: return jenni.say('Reddit: %s' % info.title) reddit_link_title.rule = r'(?u).*((?<!!)https?://www\.reddit\.com/r/[A-Za-z0-0]+/comments/)' reddit_link_title.priority = 'high' if __name__ == '__main__': print __doc__.strip()
#!/usr/bin/env python # -*- coding: utf-8 -*- # author: Navdevl """Telegram Bot that could fetch me what I wanted..""" from telegram.ext import Updater, CommandHandler import logging from reddit import Reddit # Enable logging logging.basicConfig( format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', level=logging.INFO) logger = logging.getLogger(__name__) reddit = Reddit() def start(bot, update): update.message.reply_text( 'Hey Dumbass! Use /set <seconds> to set an interval') def push(bot, job): posts = reddit.get_latest_post() for subreddit, post in posts: message = "#{0}\n {1} \n {2}".format(subreddit, post.title, post.url) bot.send_message(job.context, text=message) def set_timer(bot, update, args, job_queue, chat_data):
def main(): args = argparse.ArgumentParser() args.add_argument('--nfl', action='store_true', dest='nfl', help="Create table from ESPN's NFL schedule") args.add_argument('--nba', action='store_true', dest='nba', help="Create table from ESPN's NBA schedule") args.add_argument('--ncaaf', action='store_true', dest='ncaaf', help="Create table from ESPN's NCAA football schedule") args.add_argument('--scores', action='store_true', dest='score', help="Include finished games in table") args.add_argument('--schedule', action='store_true', dest='schedule', help="Include future games in table") args.add_argument( '--date', dest='date', help= "Date of schedule to parse, NBA: yearmonthday eg. --nba --date 20171108, NFL: week, eg. --nfl --date 7" ) args.add_argument('--username', dest='username', default=None, help="accout username") args.add_argument('--password', dest='password', default=None, help="account username") args.add_argument('--secret', dest='secret', default=None, help="app secret") args.add_argument('--update', dest='subreddit', default=None, help="subreddit to update") option = args.parse_args() schedule_url = nfl_url league_header = "" if option.nfl: league_header = nfl_header schedule_url = nfl_url elif option.nba: league_header = nba_header schedule_url = nba_url elif option.ncaaf: league_header = ncaaf_header schedule_url = ncaaf_url else: league_header = nfl_header if option.date is not None: if option.nfl or option.ncaaf: schedule_url += "/_/week/" + option.date elif option.nba: schedule_url += "/_/date/" + option.date html = obtainHTML(schedule_url) parser = ESPNParser() parser.feed(html) schedule_results = [] if option.score: schedule_results.append(("score", "\n\n**Game Results**")) if option.schedule: schedule_results.append(("time", "\n\n**Upcoming Game Schedule**")) formatter = ScheduleFormatter() schedule = formatter.createRedditScheduleTable(parser, league_header, schedule_results) subreddit = option.subreddit if subreddit is not None: if option.username is not None and option.password is not None and option.secret is not None: reddit = Reddit() reddit.username = option.username reddit.password = option.password reddit.client_secret = option.secret reddit.request_new_token() settings = reddit.obtain_sub_settings(subreddit) old_sidebar = "" try: old_sidebar = settings["description"] except: pass sidebar = formatter.updateSidebarSchedule(old_sidebar, schedule) if old_sidebar == sidebar: print subreddit, ": Sidebar up to date" else: print subreddit, ": Updating sidebar" settings["description"] = sidebar settings["sr"] = settings["subreddit_id"] settings["type"] = settings["subreddit_type"] settings["link_type"] = settings["content_options"] reddit.set_sub_settings(settings) reddit.revoke_token() else: print "Unable to update", option.subreddit, ": Username and/or password not given" else: print schedule exit(0)
def configure(self): self.r = Reddit('reddit_api test suite') self.sr = 'reddit_api_test' self.un = 'PyApiTestUser2'
def posts2csv(post_f, authors=None, subreddits=None, seen_posts=set(), verbose=True, limit=1000): reddit = Reddit(config.data_location) subreddits = [reddit.get_subreddit(s) for s in subreddits] authors = [reddit.get_user(a) for a in authors] subredditset = set() # subreddit info doesn't seem to have the "subreddit_id". To do : get that with r/subreddit/<name>/about # for now, use subreddit name as forum identifier csvp = csv.writer(post_f) csvp.writerow( "id,replyto,username,user_annotation_flairtext,annotation_over18,annotation_score,forum,discourse,title,when,dataset_file,post" .split(",")) for subreddit in subreddits: print(subreddit.name) postids = set(subreddit.post_ids) - seen_posts for i, idd in enumerate(postids): post = subreddit.post(idd) if i % 1000 == 999: print("post", i, "of", len(postids), limit, "to go") if "selftext" not in post or post["selftext"] == "": continue # Skip URL-only posts if "subreddit" not in post: print("No subreddit in post " + post["id"]) continue if post["id"] in seen_posts: continue csvp.writerow([ post["id"], None, post["author"], post["author_flair_text"], str(post["over_18"]), str(post["score"]), post["subreddit"], "Reddit", post["title"], datetime.fromtimestamp(post["created"], tz).isoformat(), "reddit", post.get("selftext", post["url"]) ]) limit -= 1 if limit == 0: return for author in authors: print(author.name) postids = set(author.post_ids) - seen_posts for i, post in enumerate([author.post(id) for id in postids]): if i % 1000 == 999: print("post", i, "of", len(postids), limit, "to go") if "selftext" not in post or post["selftext"] == "": continue # Skip URL-only posts if "subreddit" not in post: print("No subreddit in post " + post["id"]) continue if post["id"] in seen_posts: continue csvp.writerow([ post["id"], None, post["author"], post["author_flair_text"], str(post["over_18"]), str(post["score"]), post["subreddit"], "Reddit", post["title"], datetime.fromtimestamp(post["created"], tz).isoformat(), "reddit", post.get("selftext", post["url"]) ]) limit -= 1 if limit == 0: return
} proxy = os.environ.get("TELEGRAM_PROXY") if proxy: request_kwargs["proxy_url"] = proxy logger.info(f"Run with proxy: {proxy}") else: logger.warning("Running bot without proxy!") bot = MQBot( token, request=Request(**request_kwargs), mqueue=MessageQueue(all_burst_limit=29, all_time_limit_ms=1017), ) bot.db = db bot.reddit = Reddit() bot.admin = int(os.environ.get("TELEGRAM_ADMIN_ID", 0)) bot.tz_api = None google_api_key = os.environ.get("GOOGLE_API_KEY") if google_api_key: bot.tz_api = TimeZoneAPI(google_api_key) bot.news_time = time(hour=8, minute=00) bot.news_time = bot.news_time.hour * 60 + bot.news_time.minute # get minutes updater = Updater(bot=bot, use_context=True) dp = updater.dispatcher jobs = updater.job_queue dp.add_handler(CommandHandler("start", handlers.start_handler)) dp.add_handler(CommandHandler("r", handlers.r_handler))
skipped += 1 print('SKIPPED: ', skipped) if __name__ == '__main__': """ Runs in a loop. Every two hours, collects data, then sleeps. """ while True: print('Collecting data...') # Does the data file exist? If not, create it. # The data file contains a list of posts we've already seen. if os.path.isfile('data.pkl'): with open('data.pkl', 'rb') as f: dat = pickle.load(f) else: dat = Data() # Create the Reddit instance. reddit = Reddit().reddit # Retrieve the data. grab_posts(dat, reddit) # Write the data file to disk. with open('data.pkl', 'wb') as f: pickle.dump(dat, f) time.sleep(7200)
from reddit import Reddit from kafka import KafkaProducer timeout_in_sec=30 brokers = ["broker:9092"] topic = "twitter" kafka_options = { 'bootstrap_servers':brokers, 'value_serializer':lambda v: v.encode('utf-8') } producer = KafkaProducer(**kafka_options) for posts in Reddit().poll(): for post_title in posts: producer.send(topic, post_title).get(timeout=timeout_in_sec)