Esempio n. 1
0
def main():

    import logging
    import settings

    if settings.DEBUG:
        from lib import helper
        helper.config_logging()

    from lib.html import Html
    from lib.myhtmlparser import MyHTMLParser
    from lib.file import write_to_filesystem
    from lib import db

    # fetching cli args
    arguments = parse_arguments()

    # creating Html object
    html = Html.from_url_string(url_string=arguments.url)

    # db is a storage of all html_objs containing url as html_content
    # adding in db, with uniqueness of urls
    db.add(html)

    # html parser to findout urls i.e.- <a href=''>
    html_parser = MyHTMLParser()
    html_parser.feed(html.html)

    # for debugging
    urls_in_db = db.get_all_url_strings()
    logging.debug( 'Url count in db: %d' % len(urls_in_db))
    logging.debug('Urls in db are: {0}'.format(urls_in_db))

    # after fetching, writing to files
    [write_to_filesystem(html_obj=obj) for obj in db.get_all()]
def add_user():
    customer_id = request.form['customerId']
    name = request.form['inputName']
    email = request.form['inputEmail']
    amount = request.form['inputAmount']
    month = request.form['inputMonth']
    charm = request.form['Charm']
    # validate the received values
    if name and email and amount and request.method == 'POST':
        # save details
        id = db.add({
            '_id': customer_id,
            'name': name,
            'email': email,
            'amount': amount,
            'month': month,
            'charm': charm
        })
        flash('User added successfully!')
        return redirect('/')
    else:
        return not_found()
Esempio n. 3
0
def handle_message_with_entities(message):
    assert message['entities']
    for url_info in message['entities']['urls']:
        url = url_info['expanded_url']
        log.info('Found URL: %s', url)
        try:
            canonical_url = urlwork.canonicalize(url)
        except requests.TooManyRedirects:
            log.error('Too many redirects: %s', url)
        except Exception, e:
            log.exception('Canonicalization error: %s', e)
            log.error('URL info: %r', url_info)
        else:
            if canonical_url != url:
                log.info('=> %s', canonical_url)

            source = message['user']['id']
            source_url = make_tweet_url(message)
            count = db.add(canonical_url, source, source_url)

            if count >= int(os.environ.get('THRESHOLD', 5)):
                log.info('URL %s seen %d times!', canonical_url, count)
                handle_thresholded_url(canonical_url)
def handle_message_with_entities(message):
    assert message['entities']
    for url_info in message['entities']['urls']:
        url = url_info['expanded_url']
        log.info('Found URL: %s', url)
        try:
            canonical_url = urlwork.canonicalize(url)
        except requests.TooManyRedirects:
            log.error('Too many redirects: %s', url)
        except Exception, e:
            log.exception('Canonicalization error: %s', e)
            log.error('URL info: %r', url_info)
        else:
            if canonical_url != url:
                log.info('=> %s', canonical_url)

            source = message['user']['id']
            source_url = make_tweet_url(message)
            count = db.add(canonical_url, source, source_url)

            if count >= int(os.environ.get('THRESHOLD', 5)):
                log.info('URL %s seen %d times!', canonical_url, count)
                handle_thresholded_url(canonical_url)