import praw
import requests
import json
from datetime import datetime
import time

### Replace values to connect to api (https://praw.readthedocs.io/en/latest/getting_started/authentication.html)
reddit = praw.Reddit(client_id="", client_secret="", user_agent="")
###


def scrape_posts(subreddit, start_time=None, end_time=None, interval=3600 * 6):
    '''With default args, downloads all posts from a subreddit'''
    error_wait_time = 1
    wait_time = 0.5
    retries = 10
    pushshift_max = 100
    all_posts = []
    if start_time is None:
        start_time = int(reddit.subreddit(subreddit).created_utc)
    if end_time is None:
        end_time = int(datetime.now().timestamp())

    with open(f'fetched_r_{subreddit}.json', 'w') as f:
        f.write("[")
        while start_time < end_time:
            end_interval = min(start_time + interval, end_time)
            url = f"https://api.pushshift.io/reddit/search/submission/?after={start_time}&before={end_interval}&subreddit={subreddit}&limit=1000&score=%3E0"
            r = requests.get(url)
            for i in range(retries):
                if r.status_code == 200:
import praw, time, datetime, json, os

reddit = praw.Reddit(client_id='', client_secret='', user_agent='')


def unix_to_datetime(unix_time):
    """ FUNCTION:
    Input = unix time, returns a date and hhmmss string, used for daily file names"""
    normal_time = datetime.datetime.fromtimestamp(
        int(unix_time)).strftime('%d_%m_%Y_%H')
    return normal_time


def what_month_time(time_str):
    '''This function returns a tuple with the month and hour of a submission or comment'''
    time_str = time_str.split('_')
    return int(time_str[1]), int(time_str[3])


class DailyRedditorData:
    '''A class for easier access of Redditor data.'''
    def __init__(self, username):
        self.username = username
        instance = reddit.redditor(username)
        self.ck = instance.comment_karma  # comment karma
        self.pk = instance.link_karma  # link or post karma
        self.pr = instance.is_gold  # boolean for if the user has premium
        self.mo = instance.is_mod  # boolean for if the user is a mod
        self.co = len(list(
            instance.comments.new(limit=None)))  # number of comments
        self.po = len(list(
Ejemplo n.º 3
0
def authenticate():
    print('Authenticating...')
    reddit = praw.Reddit('<bot_name>', user_agent='Test bot /u/<account_name>')
    print('Authenticated as {}'.format(reddit.user.me()))
    return reddit
Ejemplo n.º 4
0
try:
    dir_path = os.path.dirname(os.path.realpath(__file__)) + "/www"
    enginerunning = False
    if "acestreamengine" in (p.name() for p in psutil.process_iter()):
        enginerunning = True
    if enginerunning == False:  #start engine
        print "Start engine"
        #os.spawnl(os.P_WAIT, ["/snap/bin/acestreamplayer.engine", "--client-console", "--bind-all", "--live-cache-type", "disk", "--live-disk-cache-size", "1000000000"])
        proc1 = subprocess.Popen([
            "/snap/bin/acestreamplayer.engine", "--client-console",
            "--bind-all", "--live-cache-type", "disk",
            "--live-disk-cache-size", "1000000000"
        ])
        time.sleep(10)
    reddit = praw.Reddit(client_id='eSLpkm36H4FelA',
                         client_secret='NM50GW0wQZ63Wju_n-8lgP4N0LE',
                         user_agent='my user agent')

    matches = {}
    subreddit = reddit.subreddit('soccerstreams')
    for submission in reddit.subreddit('soccerstreams').new(
            limit=30):  #get 10 newest posts
        if ((time.time() - submission.created_utc)) < (
                24 * 3600):  #only search posts last 24 hours
            if re.search(r'vs', submission.title, re.IGNORECASE):
                if re.search(searchterm, submission.title, re.IGNORECASE):
                    if submission.title not in matches:
                        matches[submission.title] = {}
                    for comment in submission.comments:
                        if re.search("acestream", comment.body):
                            acestreams = re.findall(r'acestream://([0-9a-z]+)',
def scrape():
    VISITED_FNAME = "visited_submissions"
    NUM_SUBMISSION_AT_TIME = 100  #how many submission to process at a time
    visited_submissions = set()  #IDs of submissions we've already visited
    #NOTE: (submission, comment) would be more efficient
    #But then each time we ran program we'd have to check every comment
    #Given that activity probably dies off quickly, the slight increase
    #in thoroughness is probably not worth the loss of efficiency

    if os.path.isfile(VISITED_FNAME):
        visited_comments = pickle.load(open(VISITED_FNAME, "r"))

    reddit_obj = praw.Reddit(user_agent="Fake Images Scraper")

    psbattle = reddit_obj.get_subreddit(
        "photoshopbattles")  #subreddit of interest

    #TODO: logic may not be good
    #If you only get 10 submissions at a time and you've already looked at all 10,
    #nothing happens
    #Better: keep going through submissions until you find you haven't checked them
    #http://stackoverflow.com/questions/16263217/python-praw-wrapper-logic-problems
    img_count = 0
    count_submissions = len(
        visited_submissions)  #start counting wherever we left off
    original_num_submissions = len(
        visited_submissions)  #so we can count how many new submissions
    while len(visited_submissions
              ) - original_num_submissions < NUM_SUBMISSION_AT_TIME:
        first = psbattle.get_top(
            limit=1)  #psbattle.get_new(limit=5) #submissions in that subreddit
        submission = next(first, None)  #process one submission at a time

        if not submission:
            continue

        #step backward to most recent unvisited submission
        while submission.id in visited_submissions:
            submission = next(
                psbattle.get_new(limit=1,
                                 params={"after": submission.fullname}), None)
            if not submission:  #out of submissions
                break
        print "submission: ", submission

        if submission:
            print "submission id: ", submission.id
            visited_submissions.add(
                submission.id)  #make note we will have visited this submission
            #get all comments for now (note: may cause dataset imbalance?
            #also takes longer because more API calls)
            submission.replace_more_comments(limit=None, threshold=0)
            count_submissions += 1
            comments = submission.comments

            count_comments = 1
            for comment in comments:  #TODO figure out how to access roots directly
                try:
                    #if we've made it this far assume image is original
                    if comment.is_root:  #this is a top level content
                        links = find_links(
                            comment.body)  #get links (presumably of images)

                        #this link is valid so download image at this link
                        for link in links:
                            link = link.replace(
                                ")", ""
                            )  #because sometimes Imgur links have trailing )
                            save_image(link, count_submissions, submission.id,
                                       count_comments)
                            img_count += 1
                    count_comments += 1  #count comment if we were able to process it successfully
                except Exception as e:
                    print "Exception ", e, " caused by comment ", comment
            print("%d valid comments on submission %d. Now %d images total" \
              % (count_comments, count_submissions, img_count))
        time.sleep(2)  #comply with reddit policy
    print("%d images scraped" % img_count)
    pickle.dump(visited_comments, open(VISITED_FNAME, "w"))
Ejemplo n.º 6
0
import json
import random
import praw
from youtube_search import YoutubeSearch
from discord.ext import commands

token = ''
bot = commands.Bot(command_prefix='!')
bot.remove_command('help')

reddit = praw.Reddit(client_id="",
                     client_secret="",
                     user_agent="discordMemeBot",
                     redirect_uri="http://127.0.0.1:65010/authorize_callback")


@bot.command(pass_context=True)
async def meme(ctx):
    for submission in reddit.subreddit("dankmemes").hot(limit=1):
        if not submission.stickied:
            await ctx.send(submission.url)


@bot.command(pass_context=True)
async def help(ctx):
    await ctx.send(
        "```!yt + запрос - скидывает видео с YouTube™ в чат \n!meme - скидывает случайны мем с Reddit™"
        "\n!music + запрос - скидывает плейлист Spotify™ по запросу```")


@bot.command(pass_context=True)
Ejemplo n.º 7
0
import praw
import requests
import time

reddit = praw.Reddit(client_id='id',
                     client_secret='secret',
                     username='******',
                     password='******',
                     user_agent='cricket wc weather bot by /u/himanscience')

#print(reddit.read_only)
subreddit = reddit.subreddit('CricketShitpost')

keyphrase = '!cricketwcweather'

api_address = 'https://samples.openweathermap.org/data/2.5/weather?appid=daf6b9f4fd27d31ca3cf23cd8b37e24b&q='
country_code = ',GB'
city = [
    'Birmingham', 'Bristol', 'Cardiff', 'Chester-le-Street', 'Leeds', 'London',
    'Manchester', 'Nottingham', 'Southampton', 'Taunton'
]

for comment in subreddit.stream.comments(skip_existing=True):
    if keyphrase in comment.body:
        try:
            weath = ''
            for i in city:
                url = api_address + i + country_code
                json_data = requests.get(url).json()
                weath = weath + i + ": " + json_data['weather'][0][
                    'description'] + ', \n'
Ejemplo n.º 8
0
cparser.read(config_file)

aparser = argparse.ArgumentParser(
    description=
    f'send a private message to a reddit user. authentication variables and default arguments sourced from {config_file}'
)
aparser.add_argument('body', help='body text of message')
aparser.add_argument(
    '-u',
    '--user',
    default=cparser.get('Message', 'default_user'),
    help=
    f'reddit username of recipient, default is {cparser.get("Message", "default_user")}'
)
aparser.add_argument(
    '-s',
    '--subject',
    default=cparser.get('Message', 'default_subject'),
    help=
    f'subject text of message, default is {cparser.get("Message", "default_subject")}'
)
args = aparser.parse_args()

reddit = praw.Reddit(client_id=cparser.get('Authentication', 'client_id'),
                     client_secret=cparser.get('Authentication',
                                               'client_secret'),
                     user_agent=cparser.get('Authentication', 'user_agent'),
                     username=cparser.get('Authentication', 'username'),
                     password=cparser.get('Authentication', 'password'))
reddit.redditor(args.user).message(args.subject, args.body)
Ejemplo n.º 9
0
"""
Created on 8 August 2017
@author: Rishabh Patil
"""

import praw
import sys
import operator
import json

with open("config.json", "r") as f:
    config = json.load(f)

reddit = praw.Reddit(client_id='',
                     client_secret='',
                     user_agent='',
                     username='',
                     password='')

print(reddit.user.me())

subreddit = reddit.subreddit('mbti')

print(subreddit.display_name)
print(subreddit.title)

with open("authors.json", "r") as f:
    authors = json.load(f)


def add_author(name, text_flair, css_flair):
Ejemplo n.º 10
0
#!/usr/bin/python3

import praw
import sqlite3

from subreddits import subs_dict

import actions

reddit = praw.Reddit("modmail_bot")

# temporary - due to bug in getting all modmail messages from reddit API #
state_switch = ""
state_vers = 1


class Modmail_bot():
    def read_modmail(self):

        # temporary #
        global state_switch, state_vers
        if state_vers == 1:
            state_switch = "all"
        elif state_vers == 2:
            state_switch = "archived"
        elif state_vers == 3:
            state_switch = "mod"
        # temporary #

        for mail in reddit.subreddit("all").modmail.conversations(
                limit=30,
Ejemplo n.º 11
0
import discord
from discord.ext import commands
import praw
import random
import os

r = praw.Reddit(client_id=os.getenv("CLIENT_ID"),
                client_secret=os.getenv("CLIENT_SECRET"),
                password=os.getenv("PASSWORD"),
                user_agent="QGdBOT",
                username="******")


class Meme(commands.Cog):
    def __init__(self, bot):
        self.bot = bot

    @commands.command()
    async def meme(self, ctx, *, topic=""):
        sr = r.subreddit(topic + "memes")
        ts = []
        t = sr.top(limit=30)

        for s in t:
            ts.append(s)

        rs = random.choice(ts)
        name = rs.title
        url = rs.url

        em = discord.Embed(title=name, color=discord.Colour.green())
Ejemplo n.º 12
0
def initializeClass(client_id, client_secret,user_agent):
	return praw.Reddit(client_id=client_id,
                     client_secret=client_secret,
                     user_agent=user_agent)
Ejemplo n.º 13
0
        'https://www.youtube.com/channel/UCqQo7ewe87aYAe7ub5UqXMw'
    ],
    [
        'FreeDawkins',
        'https://www.youtube.com/channel/UCEjOSbbaOfgnfRODEEMYlCw'
    ],
    ['DownToBuck', 'https://www.youtube.com/channel/UCNaGVvWvXaYI16vAxQUfq3g'],
    ['ESPN', 'https://www.youtube.com/user/ESPN'],
    ['CliveNBAParody', 'https://www.youtube.com/user/RealKingClive'],
    ['NBA on ESPN', 'https://www.youtube.com/user/TheNBAonESPN']
]

#getting a reddit instance by giving appropiate credentials
reddit = praw.Reddit(
    username=config.username,
    password=config.password,
    client_id=config.client_id,
    client_secret=config.client_secret,
    user_agent="script:rnba-boxscore-comment-bot:v1.0 (by /u/f1uk3r)")


def requestApi(url):
    req = requests.get(url)
    return req.json()


#appending + sign in front of biggest lead and +/- stats
def appendPlusMinus(someStat):
    if someStat.isdigit():
        if int(someStat) > 0:
            return "+" + str(someStat)
        return str(someStat)
Ejemplo n.º 14
0
import praw
import shutil
import random
import discord
import youtube_dl
from itertools import cycle
from discord.utils import get
from discord.ext import commands, tasks


client = commands.Bot(command_prefix = '.')
client.remove_command('help')

initial_role = 'Peasants'

reddit = praw.Reddit()


'''

rant space for john john


'''

# ----events----

@client.event
async def on_ready():
    await client.change_presence(status = discord.Status.online, activity = discord.Game('with my stand | .help | .music'))
    # change_status.start()
Ejemplo n.º 15
0
        posts_replied_to = list(filter(None, posts_replied_to))

# Importing and naming secrets
identity = IDandSecret.DBClientID
secretpass = IDandSecret.DBClientSecret
botpassword = IDandSecret.DBMyPass
usernames = ['/u/UNLUCK3', '/u/DiscreetBot']

# The text the bot should be replying with. Currently only replies with the third part? IDK
reply_text = 'Hey ', usernames, 'This guy got a knife inside Canada somehow! Do you want it?     ^^I\'m ^^a ^^bot.    ' \
                                '  ^^PM ^^/u/UNLUCK3 ^^for ^^help, ^^or ^^to ^^add ^^yourself '

# Getting oAuth2 Access
reddit = praw.Reddit(user_agent='CanadaKnifeBot (by /u/UNLUCK3',
                     client_id=identity,
                     client_secret=secretpass,
                     username='******',
                     password=botpassword)

# Which subreddit the bot should crawl
subreddit = reddit.subreddit('DiscreetTest')


# Main part of the program
def mainloop():
    counter1 = 0  # Counts submissions in new that have been crawled
    for submission in subreddit.new(limit=5):  # Get the 5 newest submissions
        counter1 = counter1 + 1
        if submission.id not in posts_replied_to:  # what I want is for it to print the as they come in while running,
            # idk how to do that yet though...
            print("These posts are new: \n")
Ejemplo n.º 16
0
 def __init__(self):
     self.r = praw.Reddit(user_agent=config.get('reddit', 'userAgent'))
     self.alreadyProcessed = []
Ejemplo n.º 17
0
def GetWal():
    subreddit = settings['subreddit']
    if settings['select'] == 'top':
        random = False
    else:
        random = True

    r = praw.Reddit(user_agent='linux:wallies-from-reddit:v0.1 by u/prmsrswt')
    print('[I] Connected to Reddit')
    print('[I] fetching submissions')
    if random:
        if settings['search'] == '':
            print("[I] Aquiring random image.")
            image = get_random_image(r.subreddit(subreddit))
        else:
            print("[I] Aquiring random image with specified search term.")
            image = get_random_image_search(r.subreddit(subreddit))
    else:
        # Get top image link
        if settings['search'] == '':
            print("[I] Aquiring top image.")
            image = get_top_image(r.subreddit(subreddit))
        else:
            print("[I] Aquiring top image with given search parameter.")
            image = get_top_image_search(r.subreddit(subreddit))
    if "url" not in image:
        print("Error: No suitable images were found, please retry")

    # Get home directory and location where image will be saved
    # (default location for Ubuntu is used)
    home_dir = os.path.expanduser("~")
    save_location = "{home_dir}/{save_dir}/{subreddit}-{id}.{image_type}".format(
        home_dir=home_dir,
        save_dir=settings['save_dir'],
        subreddit=subreddit,
        id=image["id"],
        image_type=image['type'])
    if not os.path.isfile(save_location):
        print('[I] Downloading Image....')
        # Request image
        response = requests.get(image['url'], allow_redirects=False)

        # If image is available, proceed to save
        if response.status_code == requests.codes.ok:

            # Create folders if they don't exist
            dir = os.path.dirname(save_location)
            if not os.path.exists(dir):
                os.makedirs(dir)

            # Write to disk
            with open(save_location, "wb") as fo:
                for chunk in response.iter_content(4096):
                    fo.write(chunk)

            # Setting the wallpaper
            set_wallpaper(save_location)

        else:
            sys.exit(
                "Error: Image url is not available, the program is now exiting."
            )
    else:
        print('[I] Image Found on disk. Skipping Downloading.')
        set_wallpaper(save_location)
Ejemplo n.º 18
0
                arr[k] = r[j]
                j += 1
            k += 1
        while i < len(l):
            arr[k] = l[i]
            i += 1
            k += 1
        while j < len(r):
            arr[k] = r[j]
            j += 1
            k += 1


reddit = praw.Reddit(user_agent=config['user_agent'],
                     client_id=config['client_id'],
                     client_secret=config['client_secret'],
                     username=config['username'],
                     password=config['password'])

try:
    links_file = open('links.txt', 'r')
    links = links_file.readlines()
    links_file.close()
except FileNotFoundError:
    exit('Error: "links.txt" file not found! Make sure the file exists.')
except IOError:
    exit('IOError while reading "links.txt"')

for l in links:
    try:
        submission = reddit.submission(url=l)
Ejemplo n.º 19
0
import config
import praw
import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from scipy.interpolate import griddata
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

reddit = praw.Reddit(user_agent=config.my_agent,
                     client_id=config.my_id,
                     client_secret=config.my_secret,
                     username=config.my_username,
                     password=config.my_password)
analyzer = SentimentIntensityAnalyzer()


def main():
    subreddit_name = 'canada'
    sentiment_data = []
    score_data = []
    num_bins = 50
    avg_scores = []
    avg_sentiments = []
    df = pd.DataFrame()
    comments = []

    # fetch comments and get sentiments
    if not os.path.isfile(subreddit_name + '.pkl'):
Ejemplo n.º 20
0
import praw
import prawcore

reddit = praw.Reddit(
    client_id="<client-id>",
    client_secret="<client-secret>",
    user_agent="python:subtester:0.1 (by /u/ACEDT)",
    user_agent="python:subtester:0.2 (by /u/ACEDT)",
)


def subTest(sub):
    exists = 0
    try:
        subtest = reddit.subreddit(sub)
        testvar = subtest.id
    except prawcore.exceptions.Redirect:
        exists = 1
    except prawcore.exceptions.NotFound:
        exists = 2
    except prawcore.exceptions.Forbidden:
        exists = 3
    return exists


def readList(inFilePath):
    with open(inFilePath, 'r') as infile:
        subs = []
        for line in infile:
            line = line.replace("\n", "")
            line = line.replace("r/", "", 1)
Ejemplo n.º 21
0
    modified_z_score = 0.6745 * diff / med_abs_deviation
    print("Median threshold is: ", med_abs_deviation)
    return modified_z_score > thresh


# reddit credential bullshit
my_client_id = "ChicYmXzPyjIMQ"
my_client_secret = "QW7E4td2L7iaOwi5-FOeyFMjcTE"
my_user_agent = "Baby Scraping Bot"
my_username = "******"
my_password = "******"

# gathers "reddit" for later use lol
reddit = praw.Reddit(user_agent=my_user_agent,
                     client_id=my_client_id,
                     client_secret=my_client_secret,
                     username=my_username,
                     password=my_password)

# get ID from user and save submission from ID
subID = input("What is the submission ID of the post you want to search?: ")
submission = reddit.submission(id=subID)
print("Submission title: ",
      submission.title)  # Output: the title of the submission

scoreList = []  # holds all score values
count = 0

# currrently only iterates through the 1st 500 comments listed
# need to add functionality to "press" more comments.
submission.comments.replace_more(limit=0)
Ejemplo n.º 22
0
import praw
import re
import os

REPLIED_POST_LIST = "repliedPostList.txt"
SUBREDDIT_TARGET = "crumbumsandbox"

reddit = praw.Reddit("bot1")

#plain text file to store submission ids
print(os.path)
if not os.path.isfile(REPLIED_POST_LIST):
    repliedPostList = []
else:
    with open(REPLIED_POST_LIST, "r") as f:
        repliedPostList = f.read()
        repliedPostList = repliedPostList.split("\n")
        repliedPostList = list(filter(None, repliedPostList))

#load new posts in subreddit and reply

subreddit = reddit.subreddit(SUBREDDIT_TARGET)

for submission in subreddit.new(limit=5):
    if submission.id not in repliedPostList:
        if re.search("this is how we do it", submission.title, re.IGNORECASE):
            submission.reply("It's Friday Nieeeght")
            print("ReplyBot replying to post " + submission.id + ": " +
                  submission.title)
            repliedPostList.append(submission.id)
Ejemplo n.º 23
0
    # the data types that this file will contain
    parameters = []
    parameters.append({'name': 'submission', 'maxlength': 10000})
    redditsubmissions = DataCollector(filename='redditsubmissions', parameters=parameters, overwrite=False,
                            checklength=True)


    parameters = []
    parameters.append({'name': 'comment', 'maxlength': 4000})
    redditcomments = DataCollector(filename='redditcomments', parameters=parameters, overwrite=False,
                                   checklength=True)

    # start the reddit instance
    reddit = praw.Reddit(client_id=reddit_credentials.CLIENT_ID,
                         client_secret=reddit_credentials.CLIENT_SECRET,
                         user_agent='agent')

    subreddit = reddit.subreddit('bitcoin')


    # reddit_collect_submissions(subreddit, redditsubmissions)
    # reddit_collect_comments(subreddit, redditcomments)

    submissionprocess = Process(target=reddit_collect_submissions, args=(subreddit, redditsubmissions,))
    commentprocess = Process(target=reddit_collect_comments, args=(subreddit, redditcomments,))

    submissionprocess.start()
    commentprocess.start()

Ejemplo n.º 24
0
from urllib.request import urlopen, urlretrieve
from pgmagick import Geometry, Image
from imgurpython import ImgurClient
from prawoauth2 import PrawOAuth2Mini

# =============================================================================
# GLOBALS
# =============================================================================

# Reads the config file
config = configparser.ConfigParser()
config.read("EnhanceImageBot.cfg")

#Reddit info
user_agent = ("EnhanceImage v0.3 by /u/sprunth")
r = praw.Reddit(user_agent=user_agent)
CLIENT_ID = config.get("Reddit", "client_id")
CLIENT_SECRET = config.get("Reddit", "client_secret")
TOKEN = config.get('Reddit', 'token')
REFRESH_TOKEN = config.get('Reddit', 'refresh_token')
SCOPES = ['identity', 'submit']

imgur_client_id = config.get('Imgur', 'client_id')
imgur_client_secret = config.get('Imgur', 'client_secret')
imgur_access_token = config.get('Imgur', 'access_token')
imgur_refresh_token = config.get('Imgur', 'refresh_token')

imgur_client = ImgurClient(imgur_client_id, imgur_client_secret,
                           imgur_access_token, imgur_refresh_token)

already_done = set()
Ejemplo n.º 25
0
sql = sqlite3.connect('sql.db')
print('Loaded SQL Database')
cur = sql.cursor()

cur.execute('CREATE TABLE IF NOT EXISTS oldposts(ID TEXT)')
print('Loaded Completed table')

sql.commit()

print("Logging in")

r = praw.Reddit(
    client_id='',
    client_secret='',
    password='',
    user_agent=
    'History info for /r/gameswap and /r/GameSale V.1.02 Creator - Lambawamba',
    username='')

subreddit = r.subreddit('')


def scanSub():
    print('Searching...')
    posts = subreddit.new(limit=MAXPOSTS)
    for post in posts:
        pid = post.id
        cur.execute('SELECT * FROM oldposts WHERE ID=?', [pid])
        if not cur.fetchone():
            cur.execute('INSERT INTO oldposts VALUES(?)', [pid])
Ejemplo n.º 26
0
import praw
import operator
import nltk

nltk.download("stopwords")
nltk.download('punkt')
nltk.download("words")

reddit = praw.Reddit('bot1')
stop_word_list = """.,--,\'s,?,),(,:,\',\'re,",-,},{,!,...,\'\',\'ve,n\'t,%,``,#,],[,&,;,\'m,=,\'ll""".split(',')
all_stop_words = nltk.corpus.stopwords.words('english') + stop_word_list
all_stop_words.append(",")

subreddit = reddit.subreddit("learnprogramming")

for submission in subreddit.hot(limit=5):
    print("---" * 20)
    print(submission.title)
    print("FREQUENT WORDS: ")
    post = reddit.submission(id=submission.id)
    flattened_comments = list(post.comments)
    all_comments = ""
    for comment in flattened_comments:
        all_comments += comment.body
    comment_sentences = nltk.tokenize.sent_tokenize(all_comments)


    words = [word.lower() for sentence in comment_sentences for word in 
        nltk.tokenize.word_tokenize(sentence)]

    freq = nltk.FreqDist(words)
Ejemplo n.º 27
0
import praw
import time
import datetime
import os
import requests

#this code uses inspiration from peoplma's subredditarchive
absolute_path = os.path.dirname(os.path.abspath(__file__))
archive_path = os.path.join(absolute_path, 'data')
reddit = praw.Reddit(client_id="RpE-AGxmVjApEw",
                     client_secret="Na8g9uUYGr2LnOELU6ycHNJbXPM",
                     user_agent="python:MDEArchiver:v0.1.0")
subreddit = reddit.subreddit('milliondollarextreme')

def main():
    #if archives already exist, update instead
    if os.path.isdir(archive_path):
        update()
    else:
        new_archive()
 
#the program goes into this function if the archive has already started. Uses date located in
#dates.txt for the start time
def update():
    infile = os.path.join(archive_path, 'dates.txt')
    start = None
    
    with open(infile) as dates:
        start = int(dates.readline())
    
    end = int(time.time())
def initDankBank():
    r = praw.Reddit(user_agent = 'my_dank_application')
    submissions = r.get_subreddit(SUBREDDIT).get_top_from_month(limit = NUM_POSTS)
    normie = r.get_subreddit(NORMIE).get_top_from_month(limit = NUM_POSTS)
    makeBank(submissions, normie)
Ejemplo n.º 29
0
    async def reddit(self, ctx, url_or_id):
        """Displays a formatted reddit post

		Note that this will only get nsfw posts if you call this in an nsfw channel"""
        if settings.reddit is None:
            raise UserError(
                "This MangoByte has not been configured to get reddit submissions. Gotta add your info to `settings.json`"
            )

        await ctx.channel.trigger_typing()

        reddit = praw.Reddit(client_id=settings.reddit["client_id"],
                             client_secret=settings.reddit["client_secret"],
                             user_agent=settings.reddit["user_agent"])

        try:
            if re.search(r"(redd\.it|reddit.com)", url_or_id):
                if not re.search(r"https?://", url_or_id):
                    url_or_id = "http://" + url_or_id
                submission = reddit.submission(url=url_or_id)
            else:
                submission = reddit.submission(id=url_or_id)
            description = submission.selftext
        except:
            raise UserError("Couldn't properly find that reddit submission")

        if submission.over_18:
            if (isinstance(ctx.channel,
                           discord.DMChannel)) or (not ctx.channel.is_nsfw()):
                raise UserError(
                    "That is an NSFW post, so I can't link it in this non-nsfw channel."
                )

        character_limit = 600
        # convert between markdown types
        description = re.sub(r"\n(?:\*|-) (.*)", r"\n• \1", description)
        description = re.sub(r"(?:^|\n)#+([^#\n]+)\n", r"\n__**\1**__ \n",
                             description)
        description = re.sub(r"\n+---\n+", r"\n\n", description)
        description = re.sub(r"&nbsp;", r" ", description)

        description = html.unescape(description)

        if len(description) > character_limit:
            description = f"{description[0:character_limit]}...\n[Read More]({submission.shortlink})"

        embed = discord.Embed(description=description,
                              color=discord.Color(int("ff4500", 16)))
        embed.set_footer(
            text=f"/r/{submission.subreddit}",
            icon_url=
            "https://images-na.ssl-images-amazon.com/images/I/418PuxYS63L.png")

        embed.title = submission.title
        embed.url = submission.shortlink

        url_ext = submission.url.split(".")[-1]

        if url_ext in ["gifv", "gif", "png", "jpg", "jpeg"]:
            image_url = submission.url
            if url_ext == "gifv":
                image_url = image_url.replace(".gifv", ".gif")
            embed.set_image(url=image_url)

        await ctx.send(embed=embed)
Ejemplo n.º 30
0
import httpx
import praw

from aiocache import cached, Cache
from pyppeteer import launch
from starlette.applications import Starlette
from starlette.responses import HTMLResponse
from starlette.staticfiles import StaticFiles
from starlette.templating import Jinja2Templates

app = Starlette(debug=True)
app.mount("/static", StaticFiles(directory="static"), name="static")
cpath = "/usr/bin/chromium-browser"
client = httpx.AsyncClient()
log = logging.getLogger()
reddit = praw.Reddit(user_agent="EFT - v1.0.0")
sub = reddit.subreddit(os.getenv("subreddit"))
templates = Jinja2Templates(directory="templates")


async def generate_and_upload_images(refresh_interval=300):
    args = ["--no-sandbox", "--disable-setuid-sandbox"]
    chrome = await launch(args=args, headless=True, executablePath=cpath, autoClose=False)
    page = await chrome.newPage()
    while True:  # TODO: only generate images if any price changed since last update
        resp = await page.goto("http://localhost")
        await page.setViewport({"height": 800, "width": 1680})
        clip1 = {"x": 0, "y": 0, "height": 19, "width": 1680}
        clip2 = {"x": 0, "y": 19, "height": 19, "width": 1680}
        if resp.status != 200:
            log.warn(f'Error, ticker not generated. Wait {refresh_interval} seconds')