def main(): import logging import settings if settings.DEBUG: from lib import helper helper.config_logging() from lib.html import Html from lib.myhtmlparser import MyHTMLParser from lib.file import write_to_filesystem from lib import db # fetching cli args arguments = parse_arguments() # creating Html object html = Html.from_url_string(url_string=arguments.url) # db is a storage of all html_objs containing url as html_content # adding in db, with uniqueness of urls db.add(html) # html parser to findout urls i.e.- <a href=''> html_parser = MyHTMLParser() html_parser.feed(html.html) # for debugging urls_in_db = db.get_all_url_strings() logging.debug( 'Url count in db: %d' % len(urls_in_db)) logging.debug('Urls in db are: {0}'.format(urls_in_db)) # after fetching, writing to files [write_to_filesystem(html_obj=obj) for obj in db.get_all()]
def add_user(): customer_id = request.form['customerId'] name = request.form['inputName'] email = request.form['inputEmail'] amount = request.form['inputAmount'] month = request.form['inputMonth'] charm = request.form['Charm'] # validate the received values if name and email and amount and request.method == 'POST': # save details id = db.add({ '_id': customer_id, 'name': name, 'email': email, 'amount': amount, 'month': month, 'charm': charm }) flash('User added successfully!') return redirect('/') else: return not_found()
def handle_message_with_entities(message): assert message['entities'] for url_info in message['entities']['urls']: url = url_info['expanded_url'] log.info('Found URL: %s', url) try: canonical_url = urlwork.canonicalize(url) except requests.TooManyRedirects: log.error('Too many redirects: %s', url) except Exception, e: log.exception('Canonicalization error: %s', e) log.error('URL info: %r', url_info) else: if canonical_url != url: log.info('=> %s', canonical_url) source = message['user']['id'] source_url = make_tweet_url(message) count = db.add(canonical_url, source, source_url) if count >= int(os.environ.get('THRESHOLD', 5)): log.info('URL %s seen %d times!', canonical_url, count) handle_thresholded_url(canonical_url)