def bookmarklet_follow(request): uri = request.GET["u"] title = request.GET["t"] # url opener redirect_handler = HTTPRedirectHandler() redirect_handler.max_redirections = settings.CONFIG["url"]["max_redirections"] opener = urllib2.build_opener(redirect_handler) # web page loading handle = opener.open(uri) encoding = detect_header_encoding(handle.headers.dict) content = decode_html(handle.read(), encoding) handle.close() # build a resource discovered_resource = DiscoveredResource() discovered_resource.uri = uri discovered_resource.title = title discovered_resource.content = content # process the discovered resource resource = processing_service_client.process(discovered_resource) topic = topic_manager.create_from_features(resource.title, resource.terms, resource.entities) return redirect("topic_tracking_web.demo.views.topics_show", topic._id)
pass # remove resource from collection self._resources_collection.remove_model(resource) if __name__ == '__main__': # configuration file config_file = sys.argv[1] config = yaml.load(file(config_file, 'r')) # logging logging.config.dictConfig(config['logging']) logger = logging.getLogger() # MongoDB mcm = mongo_from_config(config['mongo']) database = config['mongo']['databases']['discovery'] resources_collection = mcm.get_collection(database, 'resources', DiscoveredResource) # url opener redirect_handler = HTTPRedirectHandler() redirect_handler.max_redirections = config['url']['max_redirections'] opener = urllib2.build_opener(redirect_handler) # load pages to the queue loader = WebPageLoader(resources_collection, opener, config) loader.start()