def update_user_consumer(channel, method, header, body): """Update a user's information. This takes the meta-information passed through the message queue and updates the information in the Mongo store. """ user = dict([ (k,v) for k,v in json.loads(body).iteritems() if v is not None ]) # TODO Stop the silent dropping of username changes: user.pop("username") get_collection("users").update({ "username": user.pop("original_username") }, { "$set": user }, upsert = True) channel.basic_ack(delivery_tag = method.delivery_tag)
def password_change_consumer(channel, method, header, body): """Change the password—completing the bottom half of verification. This takes the meta-information passed through a message queue and performs the following operations: * create password hash * store password hash * remove verification token """ user = json.loads(body) h = hashlib.md5("{0}:{1}:{2}".format(user["username"], information.AUTHENTICATION_REALM, user["password"])).hexdigest() logger.debug("h: %s", h) get_collection("users").update({ "username": user["username"] }, { "$set": { "hash": h } }, upsert = True) channel.basic_ack(delivery_tag = method.delivery_tag)
def delete(self, username): """Delete an User. .. note:: This is an authenticated action that requires an access token from the user's token property. Request ------- :: DELETE /alunduil X-Auth-Token: 6e585a2d-438d-4a33-856a-8a7c086421ee Response -------- :: HTTP/1.0 200 OK Possible Errors --------------- :401: Unauthorized—Requested a user be deleted that isn't associated with the passed token. """ if get_keyspace("tokens").get(request.headers.get("X-Auth-Token")) != username: # TODO Redirect to token URL? raise UnauthorizedError(username=username) # TODO Submit queued job and not write from this API? get_collection("users").remove({"username": username}) return ""
def create_user_consumer(channel, method, header, body): """Create a user—completing the bottom half of user creation. This takes the meta-information passed through the message queue and performs the following operations: * Add User to DataStore * Reset the user's password """ user = json.loads(body) try: get_collection("users").insert(user) # TODO Fix race condition of multiple sign-ups. except (pymongo.errors.DuplicateKeyError) as e: logger.exception(e) logger.error("Duplicate user request ignored!") try: password_email_consumer(channel, method, header, body) except (RuntimeError) as e: logger.exception(e) get_collection("users").remove({ "username": user["username"] })
def get(self, username): """Retrieve an User's information. Request ------- :: GET /alunduil Response -------- :: HTTP/1.0 200 OK { "username": "******", "name": "Alex Brandt", "email": "*****@*****.**" } Possible Errors --------------- :401: Unauthorized—Requested a profile that isn't associated with the passed token. """ user = get_collection("users").find_one({"username": username}, {"hash": 0}) # TODO Should this be an authenticated action? logger.debug("user: %s", user) if user is None: abort(404) # TODO Set JSON mimetype? return json.dumps(unicode(user))
def update_references_consumer(channel, method, header, body): """Update the references to and from the article specified. This takes the UUID of the article being added and searches it for links to articles (updating the remote references) and then kicks off a temporary map reduce process to find other articles that point back at this article completing the citation circle. This updates two types of notations: * citations * references These are updated on the article in question and the remote article for every link found. This is an extremely heavy process once we have more than a trivial amount of articles in the data store. .. note:: This is an easy area for improvement with algorithms on search. """ article = json.loads(body) logger.debug("article: %s", article) article = get_collection("articles").find_one({ "_id": article["_id"] }) # TODO Implement the following general algorithm: # Retrieve the raw HTML # Find all links to other cataloged articles # Update those articles with a citation reference to this article # Update this article with a reference reference to the remote articles # Search all bodies of all articles for links to this article # Update those articles with a reference reference to this article # Update this article with a citation reference to the remote articles. # TODO Automatic indexing of articles: #article["tags"] += extract_keywords(soup) channel.basic_ack(delivery_tag = method.delivery_tag)
def password_email_consumer(channel, method, header, body): """Send a user the link to reset their password. This takes the information passed through the queue and creates a verification token to email to the user for resetting their password. * Record the Verification URL in the Token Store * Email the Verification URL to the User """ user = json.loads(body) user = get_collection("users").find_one({ "username": user["username"] }) verification = uuid.uuid4() get_keyspace("verifications").setex(verification, user["username"], datetime.timedelta(hours = 6)) send_user_email(user, verification) channel.basic_ack(delivery_tag = method.delivery_tag)
def login(username): """Get an authorized token for subsequent API calls. This is the login method and must be called to get the token required for all calls making a note that they require the X-Auth-Token header. This call does require a password to be provided (digest authentication is used to improve security). This also means that one cannot simply pass in their username and password and get the resulting token. This token request requires two invocations: 1. Returns the HTTP Digest parameters 2. Returns the X-Auth-Token value Challenge-Response ------------------ A challenge is sent every time the API returns a 401 Unauthoried. This is the first step in getting a token. Response (Challenge) '''''''''''''''''''' :: 401 Unauthorized Location: /alunduil/token WWW-Authenticate: Digest realm="margarine.api", qop="auth", nonce="0cc175b9c0f1b6a831c399e269772661", opaque="92eb5ffee6ae2fec3ad71c777531578f" Request (Client Authentication Response) '''''''''''''''''''''''''''''''''''''''' :: GET /alunduil/token HTTP/1.1 Host: www.example.com Authorization: Digest username="******", realm="margarine.api", nonce="0cc175b9c0f1b6a831c399e269772661", uri="/v1/users/alunduil/token", qop=auth, nc=00000001, cnonce="4a8a08f09d37b73795649038408b5f33", response="2370039ff8a9fb83b4293210b5fb53e3", opaque="92eb5ffee6ae2fec3ad71c777531578f" Response (Token) '''''''''''''''' :: HTTP/1.1 200 OK 0b4fb639-edd1-44fe-b757-589a099097a5 """ logger.info("Checking authentication!") if request.authorization is None or request.authorization.opaque != Parameters()["api.uuid"]: raise UnauthorizedError(username=username) user = get_collection("users").find_one({"username": username}) logger.debug("user: %s", user) if user is None: abort(404) h1 = user["hash"] _ = "{request.method}:{request.path}" h2 = hashlib.md5(_.format(request=request)).hexdigest() _ = "{h1}:{a.nonce}:{a.nc}:{a.cnonce}:{a.qop}:{h2}" h3 = hashlib.md5(_.format(h1=h1, a=request.authorization, h2=h2)).hexdigest() logger.debug("response: %s", request.authorization.response) logger.debug("h3: %s", h3) if request.authorization.response != h3: raise UnauthorizedError(username=username) token = uuid.uuid4() get_keyspace("tokens").setex(str(token), username, datetime.timedelta(hours=6)) return str(token)
def put(self, username): """Create an User or modify an existing User. Create an User ============= To create a new user in the system, perform a PUT on the particular user's URL that want created with any parameters (required and optional) specified in the form data. Request ------- :: PUT /alunduil Content-Type: application/x-www-form-urlencoded email=alunduil%40alunduil.com name=Alex%20Brandt Response -------- :: HTTP/1.0 202 Accepted Modify an User ============== This method can also be used to modify an existing user—not just for creating new users. Request ------- :: PUT /alunduil Content-Type: application/x-www-form-urlencoded X-Auth-Token: 6e585a2d-438d-4a33-856a-8a7c086421ee email=alunduil%40alunduil.com Response -------- :: HTTP/1.0 200 OK Possible Errors =============== :400: Bad Request—A required option was not passed or is improperly formatted :401: Unauthorized—An attempt to create an existing user was detected The following are also used when updating a user: :409: Conflict—The new username requested is already in use. """ user = get_collection("users").find_one({"username": username}) logger.debug("user: %s", user) message_properties = pika.BasicProperties() message_properties.content_type = "application/json" message_properties.durable = False message = { "username": request.form.get("username", username), "email": request.form.get("email"), "name": request.form.get("name"), } routing_key = "users.create" if user is not None: routing_key = "users.update" message["original_username"] = username logger.debug("X-Auth-Token: %s", request.headers.get("X-Auth-Token")) if get_keyspace("tokens").get(request.headers.get("X-Auth-Token")) != username: # TODO Redirect to token URL? raise UnauthorizedError(username=username) if message["email"] is None and routing_key == "users.create": abort(400) message = json.dumps(message) channel = get_channel() channel.exchange_declare(exchange="margarine.users.topic", type="topic", auto_delete=False) channel.basic_publish( body=message, exchange="margarine.users.topic", properties=message_properties, routing_key=routing_key ) channel.close() return "", 202
def article(article_id): """Retrieve a sanitized article. Request ------- :: GET /44d85795-248d-5899-b8ca-ac2bd8233755 Response -------- .. note:: The following is formatted for readability and does not match the actual response from the API. Also, the body parameter has been shortened to fit this example more concisely. :: HTTP/1.0 200 Ok { "body": "…Singularity, an Alternative Openstack Guest Agent | Hackery &c… "url": "http://blog.alunduil.com/posts/singularity-an-alternative-openstack-guest-agent.html", "created_at": {"$date": 1374007667571}, "etag": "6e2f69536ca15cc18260bffe7583b849", "_id": "03db19bb92205b4fb5fc3c4c0e4b1279", "parsed_at": {"$date": 1374008521414}, "size": 9964 } """ article = get_collection("articles").find_one({ "_id": uuid.UUID(article_id).hex }) logger.debug("article: %s", article) if article is None or "etag" not in article: # 404 not only if the object doesn't exist but also if we haven't # sanitized the body yet. abort(404) container_name, object_name = article.pop("text_container_name"), article.pop("text_object_name") logger.debug("article: %s", article) # TODO Catch connection issues and return Temporarily Unavailable. if request.method != "HEAD": data = get_container(container_name).get_object(object_name).fetch() logger.debug("type(data): %s", type(data)) logger.debug("len(data): %s", len(data)) article["body"] = data response = make_response(json.dumps(article, default = json_util.default), 200) response.mimetype = "application/json" response.headers["Access-Control-Allow-Origin"] = Parameters()["server.domain"] return response
def create_article_consumer(channel, method, header, body): """Create an article—completing the bottom half of article creation. This takes the UUID of the article to create and the URL passed through the message queue and fills out the rest of the meta-information as well as submitting a job to sanitize the HTML. This process should be idempotent and therefore not have any ill-effect if invoked multiple times (i.e. POSTed by mutliple users). Performs the following specific actions: * Updates the etag for the article with a HEAD request. * Initializes parsed_at to Null until parsing is complete. The following actions should be performed in parallel by a fanout: * Submits a reference job to update automatic notations in this and others. * Submits the sanitization request for the HTML body. .. note:: The article will need a pre-allocated space in MongoDB for performance reasons. The way we start with the bare necesities and then add information at a later time means we may get fragmentation in the data stored if we don't pre-allocate enough space. """ article = json.loads(body) logger.debug("article: %s", article) _id = article.pop("_id") articles = get_collection("articles") _ = articles.find_one({ "_id": _id }) logger.debug("Found: %s", _) if _ is None or "created_at" not in _: article["created_at"] = datetime.datetime.now() article = dict([ (k, v) for k, v in article.iteritems() if _ is None or k not in _ or v != _[k] ]) logger.debug("article: %s", article) logger.debug("_id: %s", _id) get_collection("articles").update({ "_id": _id }, { "$set": article }, upsert = True) message_properties = pika.BasicProperties() message_properties.content_type = "application/json" message_properties.durable = False message = json.dumps({ "_id": _id }) _ = get_channel() _.exchange_declare(exchange = "margarine.articles.create", type = "fanout", auto_delete = False) _.basic_publish(body = message, exchange = "margarine.articles.create", properties = message_properties, routing_key = "articles.create") _.close() channel.basic_ack(delivery_tag = method.delivery_tag)
def sanitize_html_consumer(channel, method, header, body): """Download and sanitize the HTML for the given article. The HTML should be simplified as much as possible without modifying the feel of the structure to someone reading the content of the body of the document. .. note:: Analysis will be necessary that shows the statistics on sanitized HTML size for a determination as to whether we can store it inline in Mongo or out of band in an object store like Rackspace Cloud Files. The decisions and algorithms used for streamlining the HTML are not proprietary in any way and can be used and modified under the terms of this file's licensing but more importantly can be improved or modified if imperfections are found. """ _id = json.loads(body)["_id"] logger.debug("article._id: %s", _id) articles = get_collection("articles") article = articles.find_one({ "_id": _id }, { "_id": 0 }) request = urllib2.Request(article["url"]) request.get_method = lambda: "HEAD" response = urllib2.urlopen(request) logger.debug("response: %s", response) logger.debug("response.info(): %s", response.info()) logger.debug("response.info().__class__: %s", response.info().__class__) etag = response.info().getheader("etag") # TODO Check Last-Modified? # TODO Use expires to set the next poll? # TODO Respect Cache-Control? # TODO Other header considerations. # TODO Use Content-Type to set encoding? if article.get("etag") != etag: logger.info("Parsing full HTML of %s", article["url"]) article["etag"] = etag response = urllib2.urlopen(article["url"]) soup = bs4.BeautifulSoup(response.read()) # TODO Use this when more is required: #html = sanitize(soup) html = soup.get_text() article["parsed_at"] = datetime.datetime.now() logger.debug("HTML Size: %s B", sys.getsizeof(html)) article["size"] = sys.getsizeof(html) container_part, object_part = str(uuid.UUID(_id)).split("-", 1) article["text_container_name"] = "margarine-" + container_part article["text_object_name"] = object_part logger.info("Uploading text to cloudfiles") get_container(article["text_container_name"]).store_object(article["text_object_name"], html, content_type = "text/html") logger.info("Uploaded text to cloudfiles") articles.update({ "_id": _id }, { "$set": article }, upsert = True) logger.info("finished processing article: %s", article["url"]) channel.basic_ack(delivery_tag = method.delivery_tag)