Exemple #1
0
def _save_question(question, fetch_index):

    # It seems that the the ID of the owner is missing from some records.
    # This little bit of logic checks to see if it's missing.
    owner_id = question['owner']['user_id']\
        if 'owner' in question and 'user_id' in question['owner']\
        else None

    # Dates are returned by the Stack Exchange API in Unix epoch time.
    # This inline method converts the timestamps to datetime objects that
    # can be stored in a Postgres database.  Note that the times will be
    # converted to _local times_ on this server rather than their original
    # UTC times.  I chose to do this as the date of creation of these records
    # will also be in local time.
    timestamp_to_datetime = lambda ts: datetime.datetime.fromtimestamp(ts)

    # Create a snapshot of this question
    snapshot = QuestionSnapshot.create(
        fetch_index=fetch_index,
        question_id=question['question_id'],
        owner_id=owner_id,
        comment_count=question['comment_count'],
        delete_vote_count=question['delete_vote_count'],
        reopen_vote_count=question['reopen_vote_count'],
        close_vote_count=question['close_vote_count'],
        is_answered=question['is_answered'],
        view_count=question['view_count'],
        favorite_count=question['favorite_count'],
        down_vote_count=question['down_vote_count'],
        up_vote_count=question['up_vote_count'],
        answer_count=question['answer_count'],
        score=question['score'],
        last_activity_date=timestamp_to_datetime(
            question['last_activity_date']),
        creation_date=timestamp_to_datetime(question['creation_date']),
        title=question['title'],
        body=question['body'],
    )

    # Link this snapshot to all tags related to it
    for tag_name in question['tags']:

        if tag_name in tag_cache:
            tag = tag_cache[tag_name]
        else:
            try:
                tag = Tag.get(tag_name=tag_name)
            except Tag.DoesNotExist:
                tag = None
            tag_cache[tag_name] = tag

        if tag is not None:
            QuestionSnapshotTag.create(question_snapshot_id=snapshot.id,
                                       tag_id=tag.id)
def main(tags, *args, **kwargs):

    # Create a new fetch index.
    last_fetch_index = QuestionSnapshot.select(fn.Max(QuestionSnapshot.fetch_index)).scalar() or 0
    fetch_index = last_fetch_index + 1

    with open(tags) as tag_file:
        tag_list = [t.strip() for t in tag_file.readlines()]

    for tag in tag_list:
        fetch_questions_for_tag(tag, fetch_index)
Exemple #3
0
def main(tags, *args, **kwargs):

    # Create a new fetch index.
    last_fetch_index = QuestionSnapshot.select(
        fn.Max(QuestionSnapshot.fetch_index)).scalar() or 0
    fetch_index = last_fetch_index + 1

    with open(tags) as tag_file:
        tag_list = [t.strip() for t in tag_file.readlines()]

    for tag in tag_list:
        fetch_questions_for_tag(tag, fetch_index)
def _save_question(question, fetch_index):

    # It seems that the the ID of the owner is missing from some records.
    # This little bit of logic checks to see if it's missing.
    owner_id = question['owner']['user_id']\
        if 'owner' in question and 'user_id' in question['owner']\
        else None

    # Dates are returned by the Stack Exchange API in Unix epoch time.
    # This inline method converts the timestamps to datetime objects that
    # can be stored in a Postgres database.  Note that the times will be
    # converted to _local times_ on this server rather than their original
    # UTC times.  I chose to do this as the date of creation of these records
    # will also be in local time.
    timestamp_to_datetime = lambda ts: datetime.datetime.fromtimestamp(ts)

    # Create a snapshot of this question
    snapshot = QuestionSnapshot.create(
        fetch_index=fetch_index,
        question_id=question['question_id'],
        owner_id=owner_id,
        comment_count=question['comment_count'],
        delete_vote_count=question['delete_vote_count'],
        reopen_vote_count=question['reopen_vote_count'],
        close_vote_count=question['close_vote_count'],
        is_answered=question['is_answered'],
        view_count=question['view_count'],
        favorite_count=question['favorite_count'],
        down_vote_count=question['down_vote_count'],
        up_vote_count=question['up_vote_count'],
        answer_count=question['answer_count'],
        score=question['score'],
        last_activity_date=timestamp_to_datetime(question['last_activity_date']),
        creation_date=timestamp_to_datetime(question['creation_date']),
        title=question['title'],
        body=question['body'],
    )

    # Link this snapshot to all tags related to it
    for tag_name in question['tags']:

        if tag_name in tag_cache:
            tag = tag_cache[tag_name]
        else:
            try:
                tag = Tag.get(tag_name=tag_name)
            except Tag.DoesNotExist:
                tag = None
            tag_cache[tag_name] = tag

        if tag is not None:
            QuestionSnapshotTag.create(question_snapshot_id=snapshot.id, tag_id=tag.id)