Python Comment.select Beispiele

Programmiersprache: Python

Namespace / Paketname: pwdb

Klasse / Typ: Comment

Methode / Funktion: select

Beispiele auf hotexamples.com: 3

Python Comment.select - 3 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die pwdb.Comment.select, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Häufig verwendete Methoden

Anzeigen Verbergen

update(3)

select(3)

insert(1)

Häufig verwendete Methoden

update (3)

select (3)

insert (1)

Beispiel #1

Datei anzeigen

Datei: subreddit.py Projekt: pl77/redditPostArchiver

def reddit_comment_update(appcfg, update_length=604800):
    print('     ---UPDATING COMMENTS WITH DATA FROM THE REDDIT API')
    totalnumber = Comment.select().where(
        (Comment.retrieved_on - Comment.created_utc) < update_length).count()
    needs_update_list = list()
    needs_update = Comment.select().where(
        (Comment.retrieved_on - Comment.created_utc) < update_length)
    print(
        '         ---Building Task List.  This could take a while for large subreddits'
    )

    with tqdm(total=totalnumber, ncols=100, dynamic_ncols=False) as nbar:
        for dbcomment in needs_update:
            fullname = "t1_{}".format(dbcomment.comment_id)
            needs_update_list.append(fullname)
            nbar.update(1)
    needs_update_list = list(chunks(needs_update_list, 100))
    print(
        '         ---Accessing data from Reddit API and entering into database'
    )
    with tqdm(total=totalnumber, ncols=100, dynamic_ncols=False) as pbar:
        for nlist in needs_update_list:
            try:
                rd_comments = list(r.info(nlist))
            except RequestException:
                print("Connection Error to Reddit API. Exiting...")
                # quit()
                return
            with appcfg.database.atomic():
                for rdcomment in rd_comments:
                    updatedtime = arrow.now().timestamp
                    if rdcomment.author is None and rdcomment.body == '[deleted]':
                        Comment.update(
                            score=rdcomment.score,
                            retrieved_on=updatedtime,
                            deleted=True).where(
                                Comment.comment_id == rdcomment.id).execute()
                        """
                    elif rdcomment.body == '[deleted]':
                        Comment.update(score=rdcomment.score,
                                       retrieved_on=updatedtime,
                                       deleted=False).where(Comment.comment_id == rdcomment.id).execute()
                    elif rdcomment.author is None:
                        Comment.update(score=rdcomment.score,
                                       # body=rdcomment.body_html,
                                       retrieved_on=updatedtime,
                                       deleted=True).where(Comment.comment_id == rdcomment.id).execute()
                        """
                    else:
                        Comment.update(
                            score=rdcomment.score,
                            # body=rdcomment.body_html,
                            retrieved_on=updatedtime,
                            deleted=False).where(
                                Comment.comment_id == rdcomment.id).execute()
                    pbar.update(1)

Beispiel #2

Datei anzeigen

def process_comment_urls(udb, ulimit=100000, number_of_processes=4):
    print('---EXTRACTING COMMENT URLS')
    totalcompleted = 0
    if ulimit == 0:
        ulimit = None
    total_to_process = Comment.select().where(
        Comment.number_urls.is_null()).count()
    if ulimit is not None and total_to_process > ulimit:
        total_to_process = ulimit
    with tqdm(total=total_to_process) as pbar:
        while totalcompleted < total_to_process:
            with udb.atomic():
                queue_tasks = [(comment.id, comment.body)
                               for comment in Comment.select().where(
                                   Comment.number_urls.is_null()).limit(ulimit)
                               ]
            # Create queues
            task_queue = Queue()  # ctx.Queue()  #
            done_queue = Queue()  # ctx.Queue()  #

            # Submit tasks
            for task in queue_tasks:
                task_queue.put(task)

            # Start worker processes
            for i in range(number_of_processes):
                Process(target=url_worker,
                        args=(task_queue, done_queue)).start()

            for i in range(len(queue_tasks)):
                comment_id, url_set = done_queue.get()
                try:
                    with udb.atomic():
                        Comment.update(number_urls=len(url_set)).where(
                            Comment.id == comment_id).execute()
                        for url in url_set:
                            url, urlcreated = Url.get_or_create(link=url)
                            try:
                                CommentLinks.insert(
                                    comment=comment_id,
                                    url=url.id).on_conflict_ignore().execute()
                            except SQLError:
                                print(comment_id, url.id)
                                raise
                except KeyboardInterrupt:
                    quit()

                pbar.update(1)
                totalcompleted += 1

            # Tell child processes to stop
            for i in range(number_of_processes):
                task_queue.put('STOP')
    """

Beispiel #3

Datei anzeigen

Datei: subreddit.py Projekt: pl77/redditPostArchiver

def process_comments(appcfg):
    # Get newest comments with two week overlap
    print('   PROCESSING NEWEST PUSHSHIFT.IO COMMENTS FOR', appcfg.subreddit)

    try:
        newest_utc = int(
            Comment.select(fn.MAX(Comment.created_utc)).scalar().timestamp())
    except (TypeError, AttributeError):
        newest_utc = None
    if newest_utc is not None:
        oldestdate = newest_utc  # - 1209600  # two weeks overlap, in seconds
    else:
        oldestdate = appcfg.oldestdate

    try:
        comment_id_set = get_push_comments(appcfg, appcfg.newestdate,
                                           oldestdate)
    except (ConnectionError, SSLError, ChunkedEncodingError):
        comment_id_set = None
        print("     Connection Error for Pushshift API.  Quitting...")
        # quit()
        return comment_id_set

    # Get oldest comments in case progress was interrupted, with two week overlap
    try:
        oldest_utc = int(
            Comment.select(fn.MIN(Comment.created_utc)).scalar().timestamp())
    except (TypeError, AttributeError):
        oldest_utc = None
    if oldest_utc is not None:
        newestdate = oldest_utc  # + 1209600  # two weeks overlap, in seconds
    else:
        newestdate = appcfg.newestdate
    print('   PROCESSING OLDEST PUSHSHIFT.IO COMMENTS FOR', appcfg.subreddit)

    try:
        old_comment_id_set = get_push_comments(appcfg, newestdate,
                                               appcfg.oldestdate)
    except (ConnectionError, SSLError, ChunkedEncodingError):
        old_comment_id_set = None
        print("     Connection Error for Pushshift API.  Quitting...")
        # quit()
        return old_comment_id_set
    comment_id_set |= old_comment_id_set
    filedate = arrow.now().timestamp
    basedir = "/rpa" if os.environ.get('DOCKER', '0') == '1' else '.'
    coutput_file_path = "{basedir}/{subreddit}_comments_{timestamp}.txt".format(
        basedir=basedir, subreddit=appcfg.subreddit, timestamp=filedate)

    # with open(coutput_file_path, 'w', encoding='UTF-8') as comment_file:
    #     comment_file.writelines(comment_id_set)
    print("     Total comments submitted to", appcfg.subreddit, "in set:",
          len(comment_id_set))
    deleted = Author.get_or_none(name='[deleted]')
    if deleted is not None:
        cupdatet = Comment.update(deleted=True).where(
            (Comment.author == deleted.id)
            & (Comment.deleted.is_null() or Comment.deleted == 0)).execute()
        print(
            '     Updated deleted field in comments.  Set deleted = True for',
            cupdatet, 'records.')
        cupdatef = Comment.update(
            deleted=False).where((Comment.author != deleted.id)
                                 & (Comment.deleted.is_null())).execute()
        print(
            '     Updated deleted field in comments.  Set deleted = False for',
            cupdatef, 'records.')