Ejemplo n.º 1
0
def build_post(new_users, number):
    title = "User re-add"
    if config.title_date:
        title = helpers.date_string() + " - " + title
    if config.title_number:
        stats = helpers.load_data("stats")
        stats["re-add count"] += 1
        readd_count = stats["re-add count"]
        helpers.write_data("stats", stats)
        title += " #{}".format(readd_count)

    lines = []
    for user in new_users:
        lines.append(r"- \#{} /u/{}".format(number, user))
        number += 1

    if config.stats_section:
        cap = number - 1
        diff = len(new_users)
        lines.append(
            "\n# Info:\n\n- 0 users kicked\n- {} users added\n- Membercap: {} (+{})".format(
                diff, cap, diff
            )
        )

    body = "  \n".join(lines)

    return title, body
Ejemplo n.º 2
0
def replace(old_un, new_un):
    users = helpers.load_data("user_list")
    users[users.index(old_un)] = new_un
    helpers.write_data("user_list", users)

    reddit = helpers.initialize_reddit()
    if not config.testing:
        try:
            reddit.subreddit(config.target_subreddit).flair.set(
                redditor=old_un, text="Moved to /u/{}".format(new_un)
            )
            reddit.subreddit(config.target_subreddit).contributor.remove(old_un)
        except (praw.exceptions.PRAWException, prawcore.PrawcoreException):
            # Deleted user, most likely
            pass
        main.flair_users(
            [new_un], reddit, config.flair_normal, number_adjustment=users.index(new_un)
        )
        main.add_users([new_un], reddit)
        participated = set(helpers.load_data("participated"))
        if old_un in participated:
            participated.add(new_un)
            helpers.write_data("participated", list(participated))
    else:
        print(
            "Flaired and removed /u/{}; Flaired and added /u/{}".format(old_un, new_un)
        )

    if config.update_sidebar:
        updates.update_sidebar(users)
Ejemplo n.º 3
0
def main():
    if config.forward_user:
        forward_messages.forward(config.forward_user)

    reddit = helpers.initialize_reddit()
    participated = set(helpers.load_data("participated"))
    stats = helpers.load_data("stats")

    participated = participated.union(
        get_participants(reddit, stats["last_full_run"]))
    helpers.write_data("participated", list(participated))

    regulars = config.regulars_list
    warn = []

    for i in regulars:
        if i not in participated:
            warn.append(i)

    if len(warn) > 0:
        msg = "These regulars haven't participated this week: "
        aux = ""
        for i in warn:
            msg += aux + i
            aux = ", "
        print(msg)

    print("Amount of participants this week:",
          len(set(participated) - set(config.mods_list)))

    stats["last_daily_run"] = (
        time.time() - 60)  # to cover accidental gaps due to execution time
    helpers.write_data("stats", stats)
Ejemplo n.º 4
0
def new_sub():
    reddit = helpers.initialize_reddit()
    main.check_permissions(reddit)
    user_list = helpers.load_data("user_list")
    main.flair_users(user_list, reddit, config.flair_normal)
    if config.change_title:
        updates.change_title()
    if config.update_sidebar:
        updates.update_sidebar(user_list)
    main.add_users(user_list, reddit)
    helpers.write_data("participated", [])
Ejemplo n.º 5
0
def main():
    potential_adds = helpers.load_data("potential_adds", {
        "users": [],
        "urls": []
    })
    potential_adds_copy = deepcopy(potential_adds)
    users_to_remove = [
        3, 4, 5, 8, 9, 11, 12, 13, 14, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34,
        35, 36, 37, 38, 39, 40, 41, 44, 49, 55, 56, 57, 58, 59, 60, 64
    ]
    for i in users_to_remove:
        config.redditor_blacklist += potential_adds_copy["users"][i - 2]
        potential_adds["users"].remove(potential_adds_copy["users"][i - 2])
        potential_adds["urls"].remove(potential_adds_copy["urls"][i - 2])
    helpers.write_data("potential_adds", potential_adds)
    print("Amount remaining: " + str(len(potential_adds["users"])))
Ejemplo n.º 6
0
def acquire():
    reddit = initialize_reddit()
    user_list = load_data("user_list")
    # new_users, new_user_urls = get_new_users(reddit, 1, user_list)
    user_amount = 65

    # potential_adds = load_data("potential_adds", {"users": [], "urls": []})
    potential_adds = {"users": [], "urls": []}
    # while new_users[0] in potential_adds["users"]:  # just in case!!
    new_users, new_user_urls = get_new_users(reddit, user_amount, user_list)
    print(new_users, new_user_urls)
    formatted_urls = ""
    try:
        for i in range(user_amount):
            potential_adds["users"].append(new_users[i])
            potential_adds["urls"].append(new_user_urls[i])
            formatted_urls += " u/" + new_users[i]
    except Exception as exc:
        print(
            vars(exc)
        )  # you probably deleted potential_adds.json, it needs to be {"users": [], "urls": []}
    write_data("potential_adds", potential_adds)
    helpers.write_data_txt("formatted_users", formatted_urls)
Ejemplo n.º 7
0
def main():
    user_list = helpers.load_data("user_list")
    reddit = helpers.initialize_reddit()
    stats = helpers.load_data("stats")

    if user_list and ("--ignore-active-community"
                      not in sys.argv):  # checks if the user-list is non-empty
        msg = "Userlist is non-empty. Exiting. Call with --ignore-active-community to run anyway"
        helpers.write_log_trash("Failed {}".format(helpers.date_string()), msg)
        raise ActiveCommunity(msg)

    new_users, new_user_urls = helpers.load_data("potential_adds")
    helpers.write_log_trash("New users {}".format(helpers.date_string()),
                            new_users)

    post_text_items = [daddy.build_new_text(new_users, 1), "\n"]

    if config.entry_comments:
        post_text_items.append("[Comments for entry]({})".format(
            daddy.build_and_post_gist(new_users, new_user_urls)))
    if config.stats_section:
        post_text_items.append("# Info:\n\n")
        post_text_items.append("- {} users added".format(len(new_users)))
        diff = len(new_users)
        change = "+{}".format(diff) if diff >= 0 else str(diff)
        post_text_items.append("- Membercap: {} ({})".format(
            len(new_users), change))

    post_text = "\n".join(post_text_items)

    title = config.main_log_title
    if config.title_date:
        title = helpers.date_string() + " - " + title
    if config.title_number:
        stats["log_count"] += 1
        title += " #{}".format(stats["log_count"])

    daddy.make_post(title, post_text, reddit)

    if config.change_title:
        updates.change_title()

    daddy.add_users(new_users, reddit)
    daddy.flair_users(new_users, reddit, config.flair_new)

    if config.update_sidebar:
        updates.update_sidebar(new_users)

    stats["last_full_run"] = time.time()
    helpers.write_data("stats", stats)
    helpers.write_data("user_list", new_users)
    helpers.write_data("participated", [])
Ejemplo n.º 8
0
text['pre'] = []
text['imgsrc'] = []
for tag in article.contents:
  #multiple if statements here to make is easier to read
  if tag is not None and tag.name is not None:
    if tag.name == "p":
      text['p'].append(tag.text)
    elif tag.name == 'h1':
      text['h1'].append(tag.text)
    elif tag.name == 'h3':
      text['h3'].append(tag.text)
    elif tag.name == 'pre':
      text['pre'].append(tag.text)
for tag in article.findAll('img'):
  text['imgsrc'].append(tag['src'])
helpers.write_data('bs', text)

##
## LXML
##
import lxml.html
page = lxml.html.fromstring(page_string)
post = page.find_class('entry-content')[0] #0 since only one tag with that class

text = {}
text['p'] = []
text['h1'] = []
text['h3'] = []
text['pre'] = []
text['imgsrc'] = []
#test_content is needed to get all of the text within the tag, not just on the top level
Ejemplo n.º 9
0
def main():
    if config.delay:
        time.sleep(random.randrange(0, config.max_delay * 60))

    daily.main()

    reddit = (helpers.initialize_reddit()
              )  # will exit if Reddit isn't properly initialized
    check_permissions(
        reddit
    )  # will check if bot has all needed permissions; exit on failure
    participated = set(helpers.load_data("participated"))
    stats = helpers.load_data("stats")
    user_list = helpers.load_data("user_list")
    helpers.write_log_trash("User list {}".format(helpers.date_string()),
                            user_list)

    if stats["last_full_run"] + 23 * 60 * 60 > time.time():
        if "--override_time" not in sys.argv:
            msg = 'Less than 23 hours since last run. Exiting. Run with "--override_time" as an option to disregard'
            print(msg)
            helpers.write_log_trash("Failed {}".format(helpers.date_string()),
                                    msg)
            sys.exit(1)

    updated_list, not_participated = segregate_users(user_list, participated)
    helpers.write_log_trash(
        "Not participated {}".format(helpers.date_string()), not_participated)

    flair_and_remove(not_participated, reddit)
    flair_users(updated_list, reddit, config.flair_normal)

    saved_users, saved_urls = check_saved_users()
    valid_users = []
    valid_urls = []
    for i in range(len(saved_users)):
        if (valid_user(saved_users[i], reddit)
                and saved_users[i] not in valid_users
                and saved_users[i] not in updated_list):
            valid_users.append(saved_users[i])
            valid_urls.append(saved_urls[i])

    # helpers.delete_datafile("potential_adds")
    total_needed_users = len(valid_users)
    num_still_needed_users = 0
    new_users, new_user_urls = get_new_users(reddit, num_still_needed_users,
                                             updated_list)
    new_users = valid_users + new_users
    new_user_urls = valid_urls + new_user_urls
    # new_users, new_user_urls = hack_shuffle(new_users, new_user_urls)
    new_users = new_users[:total_needed_users]
    new_user_urls = new_user_urls[:total_needed_users]

    helpers.write_log_trash("New users {}".format(helpers.date_string()),
                            new_users)

    post_text_lines = [
        build_removed_text(user_list, not_participated),
        "\n",
        build_new_text(new_users,
                       len(updated_list) + 1),
        "\n",
    ]

    if config.entry_comments:
        try:
            post_text_lines.append("\n[Comments for entry]({})".format(
                build_and_post_gist(new_users, new_user_urls)))
        except Exception:  # can fail!
            pass  # lol
    if config.stats_section:
        post_text_lines.append("\n# Info:\n")
        post_text_lines.append("- {} users kicked".format(
            len(not_participated)))
        post_text_lines.append("- {} users added".format(len(new_users)))
        diff = len(new_users) - len(not_participated)
        change = "+{}".format(diff) if diff >= 0 else str(diff)
        post_text_lines.append("- Membercap: {} ({})".format(
            (len(updated_list) + len(new_users)), change))

    post_text = "\n".join(post_text_lines)

    title = config.main_log_title
    if config.title_date:
        title = helpers.date_string() + " - " + title
    if config.title_number:
        stats["log_count"] += 1
        title += " #{}".format(stats["log_count"])

    make_post(title, post_text, reddit)

    if config.change_title:
        updates.change_title()

    add_users(new_users, reddit)
    flair_users(new_users,
                reddit,
                config.flair_new,
                number_adjustment=len(updated_list))

    updated_list_copy = updated_list[:]
    updated_list_copy.extend(new_users)
    if config.update_sidebar:
        updates.update_sidebar(updated_list_copy)

    stats["last_full_run"] = time.time()
    helpers.write_data("stats", stats)
    helpers.write_data("user_list", updated_list_copy)
    helpers.write_data("participated", [])
Ejemplo n.º 10
0
    url = '{}/{}/articles/{}/votes.json'.format(settings['src_root'],
                                                settings['locale'],
                                                src_article)
    votes = api.get_resource_list(url)
    if not votes:
        print('- no votes found')
        continue
    for vote in votes:
        if last_sync < arrow.get(vote['created_at']):
            print('- adding vote {} to article {}'.format(
                vote['id'], dst_article))
            if vote['value'] == -1:
                url = '{}/articles/{}/down.json'.format(
                    settings['dst_root'], dst_article)
            else:
                url = '{}/articles/{}/up.json'.format(settings['dst_root'],
                                                      dst_article)
            payload = {
                'vote': {
                    'user_id': vote['user_id'],
                    'created_at': vote['created_at']
                }
            }
            response = api.post_resource(url, payload, status=200)
            if response is False:
                print('Skipping vote {}'.format(vote['id']))

utc = arrow.utcnow()
sync_dates['article_votes'] = utc.format()
write_data(sync_dates, 'sync_dates')
Ejemplo n.º 11
0
def insert_users_to_userlist(new_users):
    user_list = helpers.load_data("user_list")
    user_list.extend(new_users)
    helpers.write_data("user_list", user_list)
Ejemplo n.º 12
0
            src_article['author_id'] = verify_author(src_article['author_id'],
                                                     settings['team_user'])
            url = '{}/{}/sections/{}/articles.json'.format(
                settings['dst_root'], settings['locale'], dst_section)
            payload = package_article(src_article)
            new_article = api.post_resource(url, payload)
            if new_article is False:
                print('Skipping article {}'.format(src_article['id']))
                continue
            article_map[str(src_article['id'])] = new_article['id']
            continue
        if last_sync < arrow.get(src_article['edited_at']):
            print('- updating article {} in destination section {}'.format(
                src_article['id'], dst_section))
            dst_article = article_map[str(src_article['id'])]
            url = '{}/articles/{}/translations/{}.json'.format(
                settings['dst_root'], dst_article, settings['locale'])
            payload = package_article(src_article, put=True)
            response = api.put_resource(url, payload)
            if response is False:
                print('Skipping article {}'.format(src_article['id']))
            continue
        print('- article {} is up-to-date in destination section {}'.format(
            src_article['id'], dst_section))

utc = arrow.utcnow()
sync_dates['articles'] = utc.format()
write_data(sync_dates, 'sync_dates')
write_data(article_map, 'article_map')
write_js_redirects(article_map)
Ejemplo n.º 13
0
import helpers

from selenium import webdriver
from selenium.webdriver.common.keys import Keys

url = 'https://bigishdata.com/2017/05/11/general-tips-for-web-scraping-with-python/'

driver = webdriver.PhantomJS()
driver.get(url)
elem = driver.find_element_by_class_name('entry-content')

text = {}
desired_tags = (u'p', u'h1', u'h3', u'pre')
for tag in desired_tags:
    tags = elem.find_elements_by_tag_name(tag)
    text[tag] = []
    for data in tags:
        text[tag].append(data.text)

helpers.write_data('selenium', text)
Ejemplo n.º 14
0
                                                      src_article)
    attachments = api.get_resource_list(url,
                                        list_name='article_attachments',
                                        paginate=False)
    if not attachments:
        print('- no attachments found')
        continue
    for src_attachment in attachments:
        if last_sync < arrow.get(src_attachment['created_at']):
            print('- adding new attachment {} to article {}'.format(
                src_attachment['file_name'], dst_article))
            print(src_attachment)
            url = '{}/articles/{}/attachments.json'.format(
                settings['dst_root'], dst_article)
            new_attachment = api.post_attachment(url, src_attachment)
            if new_attachment is False:
                print('Skipping attachment {}'.format(
                    src_attachment['file_name']))
                continue
            attachment_map[str(src_attachment['id'])] = new_attachment['id']
            attachment_article_map[str(src_attachment['id'])] = src_article
            continue
        print('- attachment {} is up to date'.format(
            src_attachment['file_name']))

utc = arrow.utcnow()
sync_dates['attachments'] = utc.format()
write_data(sync_dates, 'sync_dates')
write_data(attachment_map, 'attachment_map')
write_data(attachment_article_map, 'attachment_article_map')
Ejemplo n.º 15
0
 def parse(self, response):
     selector = Selector(response=response)
     for tag in self.desired_tags:
         self.text[tag] = self.words_from_tags(tag, response)
     helpers.write_data('scrapy', self.text)
     yield self.text  #how scrapy returns the json object you created
Ejemplo n.º 16
0
            print('- adding new comment {} to article {}'.format(
                src_comment['id'], dst_article))
            url = '{}/articles/{}/comments.json'.format(
                settings['dst_root'], dst_article)
            payload = package_comment(src_comment)
            new_comment = api.post_resource(url, payload)
            if new_comment is False:
                print('Skipping comment {}'.format(src_comment['id']))
                continue
            comment_map[str(src_comment['id'])] = new_comment['id']
            comment_article_map[str(src_comment['id'])] = src_article
            continue
        if last_sync < arrow.get(src_comment['updated_at']):
            print('- updating comment {} in article {}'.format(
                src_comment['id'], dst_article))
            dst_comment = comment_map[str(src_comment['id'])]
            url = '{}/articles/{}/comments/{}.json'.format(
                settings['dst_root'], dst_article, dst_comment)
            payload = package_comment(src_comment, put=True)
            response = api.put_resource(url, payload)
            if response is False:
                print('Skipping comment {}'.format(src_comment['id']))
            continue
        print('- comment {} is up to date'.format(src_comment['id']))

utc = arrow.utcnow()
sync_dates['comments'] = utc.format()
write_data(sync_dates, 'sync_dates')
write_data(comment_map, 'comment_map')
write_data(comment_article_map, 'comment_article_map')