def _get_revisions(pageid=None, base_revision=None, chunk_size=CHUNK_SIZE, continuous=False):
    """

    :param pageid:
    :return:
    """

    w = WikiFetch()
    revisions = w.fetch_revisions_for_page(
        pageid=pageid, start_rev=base_revision, chunk_size=chunk_size, continuous=continuous
    )
    print "Length of revisions at this level {}".format(len(revisions))

    return revisions
Esempio n. 2
0
def generate():
    """
    Generate function is called through a job continuously.

    This function polls Wikipedia for recent changes and then creates analysis entry
    for those pages in analysis table (per analysis).
        - This entry will now stay in the table. If the entry was already there,
        it has its last_annotated revision updated in it.
        - Otherwise, value of 0 is provided to the last_annotated revision column and
        this page needs to get updated from first revision.
        - Status of an entry in analysis table is set to active indicating that it
        needs work to be performed
        - An entry now has the page id, a last known revision for reference,
        a last_annotated id which is either already there or is 0 for new, and analysis type

    :return: A dict holding information of pages with recent changes
    """

    generated_entries = set(
    )  # Initializing set of analysis entries generated in one call

    # Get pages with changes
    # Comes in as a dict of pages with recent changes
    pages_with_changes = WikiFetch.get_recent_changes()

    # Set these pages open for each analysis
    # Create or update analysis entry in table for analysis
    # Iterating over all new pages with changes
    for k, v in pages_with_changes.iteritems():
        # Extract page ID, title and latest revision from Wikipedia.
        # Language not available in this call
        pageid = k
        title = v['title']
        last_known_rev = v['last_known_rev']

        # Update page table with this page's entry.
        query = (db.wikipages.pageid == pageid)
        page_id = db.wikipages.update_or_insert(query,
                                                pageid=pageid,
                                                last_known_rev=last_known_rev,
                                                title=title)

        # Get available analysis_types
        analysis_types = _get_analysis_type_from_db()
        print(analysis_types)
        # For each available analysis, make an analysis requirement
        for analysis in analysis_types.keys():

            # Call the create analysis function for writing entry in the DB.
            analysis_entry = create_analysis(pageid=pageid,
                                             analysis_type=analysis)
            print analysis_entry
            if analysis_entry['status'] == "SUCCESS":
                # If an entry has been created, add it to the set of generated entries
                generated_entries.add(analysis_entry['value'])
            else:
                # TODO: Based on response, log the error or success of entries.
                pass

    return locals()
Esempio n. 3
0
def _get_revisions(pageid=None,
                   base_revision=None,
                   chunk_size=CHUNK_SIZE,
                   continuous=False):
    """

    :param pageid:
    :return:
    """

    w = WikiFetch()
    revisions = w.fetch_revisions_for_page(pageid=pageid,
                                           start_rev=base_revision,
                                           chunk_size=chunk_size,
                                           continuous=continuous)
    print "Length of revisions at this level {}".format(len(revisions))

    return revisions
def generate():
    """
    Generate function is called through a job continuously.

    This function polls Wikipedia for recent changes and then creates analysis entry
    for those pages in analysis table (per analysis).
        - This entry will now stay in the table. If the entry was already there,
        it has its last_annotated revision updated in it.
        - Otherwise, value of 0 is provided to the last_annotated revision column and
        this page needs to get updated from first revision.
        - Status of an entry in analysis table is set to active indicating that it
        needs work to be performed
        - An entry now has the page id, a last known revision for reference,
        a last_annotated id which is either already there or is 0 for new, and analysis type

    :return: A dict holding information of pages with recent changes
    """

    generated_entries = set()  # Initializing set of analysis entries generated in one call

    # Get pages with changes
    # Comes in as a dict of pages with recent changes
    pages_with_changes = WikiFetch.get_recent_changes()

    # Set these pages open for each analysis
    # Create or update analysis entry in table for analysis
    # Iterating over all new pages with changes
    for k, v in pages_with_changes.iteritems():
        # Extract page ID, title and latest revision from Wikipedia.
        # Language not available in this call
        pageid = k
        title = v["title"]
        last_known_rev = v["last_known_rev"]

        # Update page table with this page's entry.
        query = db.wikipages.pageid == pageid
        page_id = db.wikipages.update_or_insert(query, pageid=pageid, last_known_rev=last_known_rev, title=title)

        # Get available analysis_types
        analysis_types = _get_analysis_type_from_db()
        print (analysis_types)
        # For each available analysis, make an analysis requirement
        for analysis in analysis_types.keys():

            # Call the create analysis function for writing entry in the DB.
            analysis_entry = create_analysis(pageid=pageid, analysis_type=analysis)
            print analysis_entry
            if analysis_entry["status"] == "SUCCESS":
                # If an entry has been created, add it to the set of generated entries
                generated_entries.add(analysis_entry["value"])
            else:
                # TODO: Based on response, log the error or success of entries.
                pass

    return locals()