Beispiel #1
0
def get_source_if_matches(source_url, source, state, conditions=[(1, 0, 0)], fresh=False):
    """
        Get a source and save it if there are matches.

        min_candidates, min_constituencies, min_parties
    """
    
    result = {
        'url': source_url,
        'source': source,
        'state': state
    }

    # First, get the parsed page object 
    page = Page.get_url(source_url)

    if page is not None:
        print "Page already exists."

        if not fresh:
            result['skip'] = {
                'text': 'Page already exists.'
            }

    else:
        print "Page doesn't exist"

        web_page = WebPage(source_url)

        try:
            web_page.fetch()
        except WebPage.FailedToFetch, e:
            result['error'] = {
                'type': 'WebPage.FailedToFetch',
                'text': str(e),
            }

        if web_page.is_local:
            result['skip'] = {
                'text': 'Already in cache',
            }
        else:
            try:
                page = Page.from_web_page(web_page, source)
                page.save()
            except Page.FetchError, e:
                print "FAILED", e
                result['error'] = {
                    'type': 'Page.FetchError',
                    'text': str(e),
                }
Beispiel #2
0
def get_source_if_matches(source_url,
                          source,
                          state,
                          conditions=[(1, 0, 0)],
                          fresh=False):
    """
        Get a source and save it if there are matches.

        min_candidates, min_constituencies, min_parties
    """

    result = {'url': source_url, 'source': source, 'state': state}

    # First, get the parsed page object
    page = Page.get_url(source_url)

    if page is not None:
        print "Page already exists."

        if not fresh:
            result['skip'] = {'text': 'Page already exists.'}

    else:
        print "Page doesn't exist"

        web_page = WebPage(source_url)

        try:
            web_page.fetch()
        except WebPage.FailedToFetch, e:
            result['error'] = {
                'type': 'WebPage.FailedToFetch',
                'text': str(e),
            }

        if web_page.is_local:
            result['skip'] = {
                'text': 'Already in cache',
            }
        else:
            try:
                page = Page.from_web_page(web_page, source)
                page.save()
            except Page.FetchError, e:
                print "FAILED", e
                result['error'] = {
                    'type': 'Page.FetchError',
                    'text': str(e),
                }
Beispiel #3
0
        print >>sys.stderr, datetime.now(), result

    return result


def get_source(source_url, source, state):
    """
        Get a source and save it, no matter what.
    """
    
    web_page = WebPage(source_url)

    try:
        web_page.fetch()
    except WebPage.FailedToFetch, e:
        print "FAILED", e
        return None

    page = Page.from_web_page(web_page, source)
    page.save()

    new, article = get_or_create_doc([page])

    article.process() 

    article.state = state
    article.save()

    return article

Beispiel #4
0
    if 'error' in result:
        print >> sys.stderr, datetime.now(), result

    return result


def get_source(source_url, source, state):
    """
        Get a source and save it, no matter what.
    """

    web_page = WebPage(source_url)

    try:
        web_page.fetch()
    except WebPage.FailedToFetch, e:
        print "FAILED", e
        return None

    page = Page.from_web_page(web_page, source)
    page.save()

    new, article = get_or_create_doc([page])

    article.process()

    article.state = state
    article.save()

    return article