Example #1
0
def bookmarklet_follow(request):
    uri = request.GET["u"]
    title = request.GET["t"]

    # url opener
    redirect_handler = HTTPRedirectHandler()
    redirect_handler.max_redirections = settings.CONFIG["url"]["max_redirections"]
    opener = urllib2.build_opener(redirect_handler)

    # web page loading
    handle = opener.open(uri)
    encoding = detect_header_encoding(handle.headers.dict)
    content = decode_html(handle.read(), encoding)
    handle.close()

    # build a resource
    discovered_resource = DiscoveredResource()
    discovered_resource.uri = uri
    discovered_resource.title = title
    discovered_resource.content = content

    # process the discovered resource
    resource = processing_service_client.process(discovered_resource)

    topic = topic_manager.create_from_features(resource.title, resource.terms, resource.entities)

    return redirect("topic_tracking_web.demo.views.topics_show", topic._id)
            pass

        # remove resource from collection
        self._resources_collection.remove_model(resource)


if __name__ == '__main__':

    # configuration file
    config_file = sys.argv[1]
    config = yaml.load(file(config_file, 'r'))

    # logging
    logging.config.dictConfig(config['logging'])
    logger = logging.getLogger()

    # MongoDB
    mcm = mongo_from_config(config['mongo'])
    database = config['mongo']['databases']['discovery']
    resources_collection = mcm.get_collection(database, 'resources', DiscoveredResource)

    # url opener
    redirect_handler = HTTPRedirectHandler()
    redirect_handler.max_redirections = config['url']['max_redirections']
    opener = urllib2.build_opener(redirect_handler)

    # load pages to the queue
    loader = WebPageLoader(resources_collection, opener, config)
    loader.start()