Example #1
0
 def test_mark_as_read(self):
     oth = Reddit(USER_AGENT)
     oth.login('PyApiTestUser3', '1111')
     msg = oth.user.get_unread(limit=1).next()  # pylint: disable-msg=E1101
     msg.mark_as_read()
     self.assertTrue(msg not in list(oth.user.get_unread(limit=5)))
Example #2
0
def main():
    global device
    global graphname

    print(socket.gethostname())
    seed = 0

    if not download:
        mp.set_start_method('spawn', force=True)
        outputs = None
        if "OMPI_COMM_WORLD_RANK" in os.environ.keys():
            os.environ["RANK"] = os.environ["OMPI_COMM_WORLD_RANK"]

        # Initialize distributed environment with SLURM
        if "SLURM_PROCID" in os.environ.keys():
            os.environ["RANK"] = os.environ["SLURM_PROCID"]

        if "SLURM_NTASKS" in os.environ.keys():
            os.environ["WORLD_SIZE"] = os.environ["SLURM_NTASKS"]

        if "MASTER_ADDR" not in os.environ.keys():
            os.environ["MASTER_ADDR"] = "127.0.0.1"

        os.environ["MASTER_PORT"] = "1234"
        dist.init_process_group(backend='nccl')
        rank = dist.get_rank()
        size = dist.get_world_size()
        print("Processes: " + str(size))

        # device = torch.device('cpu')
        devid = rank_to_devid(rank, acc_per_rank)
        device = torch.device('cuda:{}'.format(devid))
        print(f"device: {device}")
        torch.cuda.set_device(device)
        curr_devid = torch.cuda.current_device()
        # print(f"curr_devid: {curr_devid}", flush=True)
        devcount = torch.cuda.device_count()

    if graphname == "Cora":
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        graphname)
        dataset = Planetoid(path, graphname, transform=T.NormalizeFeatures())
        data = dataset[0]
        data = data.to(device)
        data.x.requires_grad = True
        inputs = data.x.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
        edge_index = data.edge_index
        num_features = dataset.num_features
        num_classes = dataset.num_classes
    elif graphname == "Reddit":
        path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data',
                        graphname)
        dataset = Reddit(path, T.NormalizeFeatures())
        data = dataset[0]
        data = data.to(device)
        data.x.requires_grad = True
        inputs = data.x.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
        edge_index = data.edge_index
        num_features = dataset.num_features
        num_classes = dataset.num_classes
    elif graphname == 'Amazon':
        # path = osp.join(osp.dirname(osp.realpath(__file__)), '..', 'data', graphname)
        # edge_index = torch.load(path + "/processed/amazon_graph.pt")
        # edge_index = torch.load("/gpfs/alpine/bif115/scratch/alokt/Amazon/processed/amazon_graph_jsongz.pt")
        # edge_index = edge_index.t_()
        print(f"Loading coo...", flush=True)
        edge_index = torch.load("../data/Amazon/processed/data.pt")
        print(f"Done loading coo", flush=True)
        # n = 9430088
        n = 14249639
        num_features = 300
        num_classes = 24
        # mid_layer = 24
        inputs = torch.rand(n, num_features)
        data = Data()
        data.y = torch.rand(n).uniform_(0, num_classes - 1).long()
        data.train_mask = torch.ones(n).long()
        # edge_index = edge_index.to(device)
        print(f"edge_index.size: {edge_index.size()}", flush=True)
        print(f"edge_index: {edge_index}", flush=True)
        data = data.to(device)
        # inputs = inputs.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)
    elif graphname == 'subgraph3':
        # path = "/gpfs/alpine/bif115/scratch/alokt/HipMCL/"
        # print(f"Loading coo...", flush=True)
        # edge_index = torch.load(path + "/processed/subgraph3_graph.pt")
        # print(f"Done loading coo", flush=True)
        print(f"Loading coo...", flush=True)
        edge_index = torch.load("../data/subgraph3/processed/data.pt")
        print(f"Done loading coo", flush=True)
        n = 8745542
        num_features = 128
        # mid_layer = 512
        # mid_layer = 64
        num_classes = 256
        inputs = torch.rand(n, num_features)
        data = Data()
        data.y = torch.rand(n).uniform_(0, num_classes - 1).long()
        data.train_mask = torch.ones(n).long()
        print(f"edge_index.size: {edge_index.size()}", flush=True)
        data = data.to(device)
        inputs.requires_grad = True
        data.y = data.y.to(device)

    if download:
        exit()

    if normalization:
        adj_matrix, _ = add_remaining_self_loops(edge_index,
                                                 num_nodes=inputs.size(0))
    else:
        adj_matrix = edge_index

    init_process(rank, size, inputs, adj_matrix, data, num_features,
                 num_classes, device, outputs, run)

    if outputs is not None:
        return outputs[0]
Example #3
0
    def run(self, host, port):
        self.reddit = Reddit(creds.key, creds.secret, creds.username,
                             creds.password, creds.redirect_uri)
        self.reddit.updateToken()
        self.reddit.testAccess()
        sleeptime = 0
        while True:
            if sleeptime > 10:
                time.sleep(10)
            elif sleeptime > 1:
                time.sleep(1)
            # Connect to host:port, get the fp
            fp = self.connect(host, port)

            # Send hostname of client over initially
            hostname = socket.getfqdn()
            fp.write(hostname + '\n')
            fp.flush()
            if debug:
                print 'Sent hostname'

            # Recv all the urls
            reqlist = []
            newline = False
            while True:
                line = fp.readline()
                line = line.strip()
                if line != '':
                    reqlist.append(line.split(','))
                else:
                    if newline == True:
                        break
                    newline = True
                fp.flush()

            print host + ' >> ' + str(reqlist)
            # See if any urls were sent, close if zero
            if len(reqlist) == 0:
                if debug:
                    print 'No requests'
                self.close()
                sleeptime += 1
                continue
            sleeptime = 0

            if debug:
                print 'Downloading requests'
            # Download all the urls otherwise
            self.download_data(reqlist)

            # targzip the data
            targz = self.targz()

            # Send the data
            targz_fp = open(targz, 'rb')
            targz_data = targz_fp.read()
            fp.write(targz_data)
            fp.flush()
            print host + ' << archive.tar.gz'
            self.close()
            self.cleanup()
Example #4
0
@perms.owner()
async def lock(ctx: commands.Context, *reason: str):
    # TODO stick this into the db
    global lockdown_mode

    lockdown_mode = not lockdown_mode
    for server in db['server']:
        await bot.get_channel(
            server.get('archive_channel')
        ).send('Lockdown mode ' +
               ('deactivated\n' if not lockdown_mode else 'activated.\n') +
               (('Reason:' + ' '.join(reason)) if len(reason) else ''))


@bot.command(brief='Restarts the bot.')
@perms.owner()
async def restart(ctx: commands.Context):
    await ctx.send('Restarting...')

    try:
        await bot.close()
    except:
        pass
    finally:
        os.system('python main.py')


bot.add_cog(Reddit(bot, db))
bot.add_cog(Instagram(bot, db))
bot.run(db['settings'].find_one(name='token')['value'])
Example #5
0
#!/usr/bin/env python
"""
reddit.py - Jenni Reddit Module
Author: cdsboy
A Jenni Module to recognize reddit links and print their titles
"""

from tsun import tsunsay
from reddit.objects import Submission
from reddit import Reddit
import re

r = Reddit(user_agent="Jenni Irc Bot")


@tsunsay()
def reddit_link_title(jenni, input):
    match = re.search(r'https?://[-a-zA-Z0-9.?$!%&/=_~#.,:;+]*', input)
    if match:
        try:
            info = Submission.get_info(r, match.group(0))
        except ValueError:
            return
        jenni.say('Reddit: %s' % info.title)


reddit_link_title.rule = r'(?u).*((?<!!)https?://www\.reddit\.com/r/[A-Za-z0-0]+/comments/)'
reddit_link_title.priority = 'high'

if __name__ == '__main__':
    print __doc__.strip()
Example #6
0
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# author: Navdevl
"""Telegram Bot that could fetch me what I wanted.."""

from telegram.ext import Updater, CommandHandler
import logging
from reddit import Reddit

# Enable logging
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO)

logger = logging.getLogger(__name__)
reddit = Reddit()


def start(bot, update):
    update.message.reply_text(
        'Hey Dumbass! Use /set <seconds> to set an interval')


def push(bot, job):
    posts = reddit.get_latest_post()
    for subreddit, post in posts:
        message = "#{0}\n {1} \n {2}".format(subreddit, post.title, post.url)
        bot.send_message(job.context, text=message)


def set_timer(bot, update, args, job_queue, chat_data):
Example #7
0
def main():

    args = argparse.ArgumentParser()
    args.add_argument('--nfl',
                      action='store_true',
                      dest='nfl',
                      help="Create table from ESPN's NFL schedule")
    args.add_argument('--nba',
                      action='store_true',
                      dest='nba',
                      help="Create table from ESPN's NBA schedule")
    args.add_argument('--ncaaf',
                      action='store_true',
                      dest='ncaaf',
                      help="Create table from ESPN's NCAA football schedule")
    args.add_argument('--scores',
                      action='store_true',
                      dest='score',
                      help="Include finished games in table")
    args.add_argument('--schedule',
                      action='store_true',
                      dest='schedule',
                      help="Include future games in table")
    args.add_argument(
        '--date',
        dest='date',
        help=
        "Date of schedule to parse, NBA: yearmonthday eg. --nba --date 20171108, NFL: week, eg. --nfl --date 7"
    )
    args.add_argument('--username',
                      dest='username',
                      default=None,
                      help="accout username")
    args.add_argument('--password',
                      dest='password',
                      default=None,
                      help="account username")
    args.add_argument('--secret',
                      dest='secret',
                      default=None,
                      help="app secret")
    args.add_argument('--update',
                      dest='subreddit',
                      default=None,
                      help="subreddit to update")

    option = args.parse_args()

    schedule_url = nfl_url
    league_header = ""

    if option.nfl:
        league_header = nfl_header
        schedule_url = nfl_url

    elif option.nba:
        league_header = nba_header
        schedule_url = nba_url

    elif option.ncaaf:
        league_header = ncaaf_header
        schedule_url = ncaaf_url

    else:
        league_header = nfl_header

    if option.date is not None:
        if option.nfl or option.ncaaf:
            schedule_url += "/_/week/" + option.date
        elif option.nba:
            schedule_url += "/_/date/" + option.date

    html = obtainHTML(schedule_url)

    parser = ESPNParser()
    parser.feed(html)

    schedule_results = []

    if option.score:
        schedule_results.append(("score", "\n\n**Game Results**"))
    if option.schedule:
        schedule_results.append(("time", "\n\n**Upcoming Game Schedule**"))

    formatter = ScheduleFormatter()

    schedule = formatter.createRedditScheduleTable(parser, league_header,
                                                   schedule_results)
    subreddit = option.subreddit

    if subreddit is not None:
        if option.username is not None and option.password is not None and option.secret is not None:
            reddit = Reddit()
            reddit.username = option.username
            reddit.password = option.password
            reddit.client_secret = option.secret

            reddit.request_new_token()

            settings = reddit.obtain_sub_settings(subreddit)
            old_sidebar = ""

            try:
                old_sidebar = settings["description"]
            except:
                pass

            sidebar = formatter.updateSidebarSchedule(old_sidebar, schedule)

            if old_sidebar == sidebar:
                print subreddit, ": Sidebar up to date"
            else:
                print subreddit, ": Updating sidebar"

                settings["description"] = sidebar
                settings["sr"] = settings["subreddit_id"]
                settings["type"] = settings["subreddit_type"]
                settings["link_type"] = settings["content_options"]

                reddit.set_sub_settings(settings)

            reddit.revoke_token()

        else:
            print "Unable to update", option.subreddit, ": Username and/or password not given"

    else:
        print schedule

    exit(0)
 def configure(self):
     self.r = Reddit('reddit_api test suite')
     self.sr = 'reddit_api_test'
     self.un = 'PyApiTestUser2'
Example #9
0
def posts2csv(post_f,
              authors=None,
              subreddits=None,
              seen_posts=set(),
              verbose=True,
              limit=1000):
    reddit = Reddit(config.data_location)

    subreddits = [reddit.get_subreddit(s) for s in subreddits]
    authors = [reddit.get_user(a) for a in authors]

    subredditset = set()

    # subreddit info doesn't seem to have the "subreddit_id".   To do : get that with r/subreddit/<name>/about
    # for now, use subreddit name as forum identifier
    csvp = csv.writer(post_f)
    csvp.writerow(
        "id,replyto,username,user_annotation_flairtext,annotation_over18,annotation_score,forum,discourse,title,when,dataset_file,post"
        .split(","))

    for subreddit in subreddits:
        print(subreddit.name)
        postids = set(subreddit.post_ids) - seen_posts
        for i, idd in enumerate(postids):
            post = subreddit.post(idd)
            if i % 1000 == 999:
                print("post", i, "of", len(postids), limit, "to go")
            if "selftext" not in post or post["selftext"] == "":
                continue  # Skip URL-only posts
            if "subreddit" not in post:
                print("No subreddit in post " + post["id"])
                continue
            if post["id"] in seen_posts: continue
            csvp.writerow([
                post["id"], None, post["author"], post["author_flair_text"],
                str(post["over_18"]),
                str(post["score"]), post["subreddit"], "Reddit", post["title"],
                datetime.fromtimestamp(post["created"], tz).isoformat(),
                "reddit",
                post.get("selftext", post["url"])
            ])
            limit -= 1
            if limit == 0: return

    for author in authors:
        print(author.name)
        postids = set(author.post_ids) - seen_posts
        for i, post in enumerate([author.post(id) for id in postids]):
            if i % 1000 == 999:
                print("post", i, "of", len(postids), limit, "to go")
            if "selftext" not in post or post["selftext"] == "":
                continue  # Skip URL-only posts
            if "subreddit" not in post:
                print("No subreddit in post " + post["id"])
                continue
            if post["id"] in seen_posts: continue
            csvp.writerow([
                post["id"], None, post["author"], post["author_flair_text"],
                str(post["over_18"]),
                str(post["score"]), post["subreddit"], "Reddit", post["title"],
                datetime.fromtimestamp(post["created"], tz).isoformat(),
                "reddit",
                post.get("selftext", post["url"])
            ])
            limit -= 1
            if limit == 0: return
Example #10
0
    }
    proxy = os.environ.get("TELEGRAM_PROXY")
    if proxy:
        request_kwargs["proxy_url"] = proxy
        logger.info(f"Run with proxy: {proxy}")
    else:
        logger.warning("Running bot without proxy!")

    bot = MQBot(
        token,
        request=Request(**request_kwargs),
        mqueue=MessageQueue(all_burst_limit=29, all_time_limit_ms=1017),
    )

    bot.db = db
    bot.reddit = Reddit()
    bot.admin = int(os.environ.get("TELEGRAM_ADMIN_ID", 0))
    bot.tz_api = None
    google_api_key = os.environ.get("GOOGLE_API_KEY")
    if google_api_key:
        bot.tz_api = TimeZoneAPI(google_api_key)

    bot.news_time = time(hour=8, minute=00)
    bot.news_time = bot.news_time.hour * 60 + bot.news_time.minute  # get minutes

    updater = Updater(bot=bot, use_context=True)
    dp = updater.dispatcher
    jobs = updater.job_queue

    dp.add_handler(CommandHandler("start", handlers.start_handler))
    dp.add_handler(CommandHandler("r", handlers.r_handler))
                    skipped += 1

    print('SKIPPED: ', skipped)


if __name__ == '__main__':
    """
    Runs in a loop. Every two hours, collects data, then sleeps.
    """
    while True:
        print('Collecting data...')

        # Does the data file exist? If not, create it.
        # The data file contains a list of posts we've already seen.
        if os.path.isfile('data.pkl'):
            with open('data.pkl', 'rb') as f:
                dat = pickle.load(f)
        else:
            dat = Data()

        # Create the Reddit instance.
        reddit = Reddit().reddit

        # Retrieve the data.
        grab_posts(dat, reddit)

        # Write the data file to disk.
        with open('data.pkl', 'wb') as f:
            pickle.dump(dat, f)

        time.sleep(7200)
Example #12
0
from reddit import Reddit
from kafka import KafkaProducer
timeout_in_sec=30
brokers = ["broker:9092"]
topic = "twitter"
kafka_options = {
  'bootstrap_servers':brokers,
  'value_serializer':lambda v: v.encode('utf-8')
}
producer = KafkaProducer(**kafka_options)
for posts in Reddit().poll():
  for post_title in posts:
    producer.send(topic, post_title).get(timeout=timeout_in_sec)