Beispiel #1
0
def update_user_consumer(channel, method, header, body):
    """Update a user's information.

    This takes the meta-information passed through the message queue and
    updates the information in the Mongo store.

    """

    user = dict([ (k,v) for k,v in json.loads(body).iteritems() if v is not None ])

    # TODO Stop the silent dropping of username changes:
    user.pop("username")

    get_collection("users").update({ "username": user.pop("original_username") }, { "$set": user }, upsert = True)

    channel.basic_ack(delivery_tag = method.delivery_tag)
Beispiel #2
0
def password_change_consumer(channel, method, header, body):
    """Change the password—completing the bottom half of verification.

    This takes the meta-information passed through a message queue and performs
    the following operations:

    * create password hash
    * store password hash
    * remove verification token
   
    """

    user = json.loads(body)

    h = hashlib.md5("{0}:{1}:{2}".format(user["username"], information.AUTHENTICATION_REALM, user["password"])).hexdigest()

    logger.debug("h: %s", h)

    get_collection("users").update({ "username": user["username"] }, { "$set": { "hash": h } }, upsert = True)

    channel.basic_ack(delivery_tag = method.delivery_tag)
Beispiel #3
0
    def delete(self, username):
        """Delete an User.

        .. note::
            This is an authenticated action that requires an access token from
            the user's token property.

        Request
        -------

        ::

            DELETE /alunduil
            X-Auth-Token: 6e585a2d-438d-4a33-856a-8a7c086421ee

        Response
        --------

        ::

            HTTP/1.0 200 OK

        Possible Errors
        ---------------

        :401: Unauthorized—Requested a user be deleted that isn't associated
              with the passed token.

        """

        if get_keyspace("tokens").get(request.headers.get("X-Auth-Token")) != username:
            # TODO Redirect to token URL?
            raise UnauthorizedError(username=username)

        # TODO Submit queued job and not write from this API?
        get_collection("users").remove({"username": username})

        return ""
Beispiel #4
0
def create_user_consumer(channel, method, header, body):
    """Create a user—completing the bottom half of user creation.

    This takes the meta-information passed through the message queue and
    performs the following operations:

    * Add User to DataStore
    * Reset the user's password

    """
    
    user = json.loads(body)

    try:
        get_collection("users").insert(user) # TODO Fix race condition of multiple sign-ups.
    except (pymongo.errors.DuplicateKeyError) as e:
        logger.exception(e)
        logger.error("Duplicate user request ignored!")

    try:
        password_email_consumer(channel, method, header, body)
    except (RuntimeError) as e:
        logger.exception(e)
        get_collection("users").remove({ "username": user["username"] })
Beispiel #5
0
    def get(self, username):
        """Retrieve an User's information.

        Request
        -------

        ::

            GET /alunduil

        Response
        --------

        ::

            HTTP/1.0 200 OK

            {
              "username": "******",
              "name": "Alex Brandt",
              "email": "*****@*****.**"
            }

        Possible Errors
        ---------------

        :401: Unauthorized—Requested a profile that isn't associated with the
              passed token.

        """

        user = get_collection("users").find_one({"username": username}, {"hash": 0})

        # TODO Should this be an authenticated action?

        logger.debug("user: %s", user)

        if user is None:
            abort(404)

        # TODO Set JSON mimetype?

        return json.dumps(unicode(user))
Beispiel #6
0
def update_references_consumer(channel, method, header, body):
    """Update the references to and from the article specified.

    This takes the UUID of the article being added and searches it for links to
    articles (updating the remote references) and then kicks off a temporary
    map reduce process to find other articles that point back at this article
    completing the citation circle.

    This updates two types of notations:

    * citations
    * references

    These are updated on the article in question and the remote article for
    every link found.  This is an extremely heavy process once we have more
    than a trivial amount of articles in the data store.

    .. note::
        This is an easy area for improvement with algorithms on search.

    """

    article = json.loads(body)

    logger.debug("article: %s", article)

    article = get_collection("articles").find_one({ "_id": article["_id"] })

    # TODO Implement the following general algorithm:
    # Retrieve the raw HTML
    # Find all links to other cataloged articles
    # Update those articles with a citation reference to this article
    # Update this article with a reference reference to the remote articles
    # Search all bodies of all articles for links to this article
    # Update those articles with a reference reference to this article
    # Update this article with a citation reference to the remote articles.

    # TODO Automatic indexing of articles:
    #article["tags"] += extract_keywords(soup)

    channel.basic_ack(delivery_tag = method.delivery_tag)
Beispiel #7
0
def password_email_consumer(channel, method, header, body):
    """Send a user the link to reset their password.

    This takes the information passed through the queue and creates a
    verification token to email to the user for resetting their password.

    * Record the Verification URL in the Token Store
    * Email the Verification URL to the User

    """

    user = json.loads(body)

    user = get_collection("users").find_one({ "username": user["username"] })

    verification = uuid.uuid4()

    get_keyspace("verifications").setex(verification, user["username"], datetime.timedelta(hours = 6))

    send_user_email(user, verification)

    channel.basic_ack(delivery_tag = method.delivery_tag)
Beispiel #8
0
def login(username):
    """Get an authorized token for subsequent API calls.

    This is the login method and must be called to get the token required for
    all calls making a note that they require the X-Auth-Token header.

    This call does require a password to be provided (digest authentication is
    used to improve security).  This also means that one cannot simply pass in
    their username and password and get the resulting token.  This token
    request requires two invocations:

    1. Returns the HTTP Digest parameters
    2. Returns the X-Auth-Token value

    Challenge-Response
    ------------------

    A challenge is sent every time the API returns a 401 Unauthoried.  This
    is the first step in getting a token.

    Response (Challenge)
    ''''''''''''''''''''

    ::

        401 Unauthorized
        Location: /alunduil/token
        WWW-Authenticate: Digest realm="margarine.api",
          qop="auth",
          nonce="0cc175b9c0f1b6a831c399e269772661",
          opaque="92eb5ffee6ae2fec3ad71c777531578f"

    Request (Client Authentication Response)
    ''''''''''''''''''''''''''''''''''''''''

    ::
      
        GET /alunduil/token HTTP/1.1
        Host: www.example.com
        Authorization: Digest username="******",
          realm="margarine.api",
          nonce="0cc175b9c0f1b6a831c399e269772661",
          uri="/v1/users/alunduil/token",
          qop=auth,
          nc=00000001,
          cnonce="4a8a08f09d37b73795649038408b5f33",
          response="2370039ff8a9fb83b4293210b5fb53e3",
          opaque="92eb5ffee6ae2fec3ad71c777531578f"

    Response (Token)
    ''''''''''''''''

    ::

        HTTP/1.1 200 OK

        0b4fb639-edd1-44fe-b757-589a099097a5

    """

    logger.info("Checking authentication!")

    if request.authorization is None or request.authorization.opaque != Parameters()["api.uuid"]:
        raise UnauthorizedError(username=username)

    user = get_collection("users").find_one({"username": username})

    logger.debug("user: %s", user)

    if user is None:
        abort(404)

    h1 = user["hash"]

    _ = "{request.method}:{request.path}"
    h2 = hashlib.md5(_.format(request=request)).hexdigest()

    _ = "{h1}:{a.nonce}:{a.nc}:{a.cnonce}:{a.qop}:{h2}"
    h3 = hashlib.md5(_.format(h1=h1, a=request.authorization, h2=h2)).hexdigest()

    logger.debug("response: %s", request.authorization.response)
    logger.debug("h3: %s", h3)

    if request.authorization.response != h3:
        raise UnauthorizedError(username=username)

    token = uuid.uuid4()

    get_keyspace("tokens").setex(str(token), username, datetime.timedelta(hours=6))

    return str(token)
Beispiel #9
0
    def put(self, username):
        """Create an User or modify an existing User.

        Create an User
        =============

        To create a new user in the system, perform a PUT on the particular
        user's URL that want created with any parameters (required and
        optional) specified in the form data.

        Request
        -------

        ::
        
            PUT /alunduil
            Content-Type: application/x-www-form-urlencoded

            email=alunduil%40alunduil.com
            name=Alex%20Brandt

        Response
        --------

        ::

            HTTP/1.0 202 Accepted

        Modify an User
        ==============

        This method can also be used to modify an existing user—not just for
        creating new users.

        Request
        -------

        ::

            PUT /alunduil
            Content-Type: application/x-www-form-urlencoded
            X-Auth-Token: 6e585a2d-438d-4a33-856a-8a7c086421ee

            email=alunduil%40alunduil.com

        Response
        --------

        ::

            HTTP/1.0 200 OK

        Possible Errors
        ===============

        :400: Bad Request—A required option was not passed or is improperly
              formatted
        :401: Unauthorized—An attempt to create an existing user was detected

        The following are also used when updating a user:

        :409: Conflict—The new username requested is already in use.

        """

        user = get_collection("users").find_one({"username": username})

        logger.debug("user: %s", user)

        message_properties = pika.BasicProperties()
        message_properties.content_type = "application/json"
        message_properties.durable = False

        message = {
            "username": request.form.get("username", username),
            "email": request.form.get("email"),
            "name": request.form.get("name"),
        }

        routing_key = "users.create"

        if user is not None:
            routing_key = "users.update"

            message["original_username"] = username

            logger.debug("X-Auth-Token: %s", request.headers.get("X-Auth-Token"))

            if get_keyspace("tokens").get(request.headers.get("X-Auth-Token")) != username:
                # TODO Redirect to token URL?
                raise UnauthorizedError(username=username)

        if message["email"] is None and routing_key == "users.create":
            abort(400)

        message = json.dumps(message)

        channel = get_channel()
        channel.exchange_declare(exchange="margarine.users.topic", type="topic", auto_delete=False)
        channel.basic_publish(
            body=message, exchange="margarine.users.topic", properties=message_properties, routing_key=routing_key
        )
        channel.close()

        return "", 202
Beispiel #10
0
def article(article_id):
    """Retrieve a sanitized article.

    Request
    -------

    ::

        GET /44d85795-248d-5899-b8ca-ac2bd8233755
        
    Response
    --------

    .. note::
        The following is formatted for readability and does not match the 
        actual response from the API.  Also, the body parameter has been
        shortened to fit this example more concisely.

    ::

        HTTP/1.0 200 Ok

        {
          "body": "…Singularity, an Alternative Openstack Guest Agent | Hackery &c…
          "url": "http://blog.alunduil.com/posts/singularity-an-alternative-openstack-guest-agent.html",
          "created_at": {"$date": 1374007667571},
          "etag": "6e2f69536ca15cc18260bffe7583b849",
          "_id": "03db19bb92205b4fb5fc3c4c0e4b1279",
          "parsed_at": {"$date": 1374008521414},
          "size": 9964
        }

    """

    article = get_collection("articles").find_one({ "_id": uuid.UUID(article_id).hex })

    logger.debug("article: %s", article)

    if article is None or "etag" not in article:
        # 404 not only if the object doesn't exist but also if we haven't
        # sanitized the body yet.
        abort(404)

    container_name, object_name = article.pop("text_container_name"), article.pop("text_object_name")

    logger.debug("article: %s", article)

    # TODO Catch connection issues and return Temporarily Unavailable.
    if request.method != "HEAD":
        data = get_container(container_name).get_object(object_name).fetch()

        logger.debug("type(data): %s", type(data))
        logger.debug("len(data): %s", len(data))

        article["body"] = data

    response = make_response(json.dumps(article, default = json_util.default), 200)

    response.mimetype = "application/json"

    response.headers["Access-Control-Allow-Origin"] = Parameters()["server.domain"]

    return response
Beispiel #11
0
def create_article_consumer(channel, method, header, body):
    """Create an article—completing the bottom half of article creation.

    This takes the UUID of the article to create and the URL passed through the
    message queue and fills out the rest of the meta-information as well as
    submitting a job to sanitize the HTML.

    This process should be idempotent and therefore not have any ill-effect if
    invoked multiple times (i.e. POSTed by mutliple users).

    Performs the following specific actions:

    * Updates the etag for the article with a HEAD request.
    * Initializes parsed_at to Null until parsing is complete.

    The following actions should be performed in parallel by a fanout:

    * Submits a reference job to update automatic notations in this and others.
    * Submits the sanitization request for the HTML body. 

    .. note::
        The article will need a pre-allocated space in MongoDB for performance
        reasons.  The way we start with the bare necesities and then add
        information at a later time means we may get fragmentation in the data
        stored if we don't pre-allocate enough space.

    """

    article = json.loads(body)

    logger.debug("article: %s", article)

    _id = article.pop("_id")

    articles = get_collection("articles")

    _ = articles.find_one({ "_id": _id })

    logger.debug("Found: %s", _)

    if _ is None or "created_at" not in _:
        article["created_at"] = datetime.datetime.now()

    article = dict([ (k, v) for k, v in article.iteritems() if _ is None or k not in _ or v != _[k] ])

    logger.debug("article: %s", article)
    logger.debug("_id: %s", _id)

    get_collection("articles").update({ "_id": _id }, { "$set": article }, upsert = True)

    message_properties = pika.BasicProperties()
    message_properties.content_type = "application/json"
    message_properties.durable = False

    message = json.dumps({ "_id": _id })

    _ = get_channel()
    _.exchange_declare(exchange = "margarine.articles.create", type = "fanout", auto_delete = False)
    _.basic_publish(body = message, exchange = "margarine.articles.create", properties = message_properties, routing_key = "articles.create")
    _.close()

    channel.basic_ack(delivery_tag = method.delivery_tag)
Beispiel #12
0
def sanitize_html_consumer(channel, method, header, body):
    """Download and sanitize the HTML for the given article.

    The HTML should be simplified as much as possible without modifying the
    feel of the structure to someone reading the content of the body of the
    document.

    .. note::
        Analysis will be necessary that shows the statistics on sanitized HTML
        size for a determination as to whether we can store it inline in Mongo
        or out of band in an object store like Rackspace Cloud Files.

    The decisions and algorithms used for streamlining the HTML are not
    proprietary in any way and can be used and modified under the terms of this
    file's licensing but more importantly can be improved or modified if 
    imperfections are found.

    """

    _id = json.loads(body)["_id"]

    logger.debug("article._id: %s", _id)

    articles = get_collection("articles")

    article = articles.find_one({ "_id": _id }, { "_id": 0 })

    request = urllib2.Request(article["url"])
    request.get_method = lambda: "HEAD"

    response = urllib2.urlopen(request)

    logger.debug("response: %s", response)
    logger.debug("response.info(): %s", response.info())
    logger.debug("response.info().__class__: %s", response.info().__class__)

    etag = response.info().getheader("etag")

    # TODO Check Last-Modified?
    # TODO Use expires to set the next poll?
    # TODO Respect Cache-Control?
    # TODO Other header considerations.
    # TODO Use Content-Type to set encoding?

    if article.get("etag") != etag:
        logger.info("Parsing full HTML of %s", article["url"])

        article["etag"] = etag

        response = urllib2.urlopen(article["url"])

        soup = bs4.BeautifulSoup(response.read())

        # TODO Use this when more is required:
        #html = sanitize(soup)
        html = soup.get_text()

        article["parsed_at"] = datetime.datetime.now()

        logger.debug("HTML Size: %s B", sys.getsizeof(html))
        article["size"] = sys.getsizeof(html)

        container_part, object_part = str(uuid.UUID(_id)).split("-", 1)

        article["text_container_name"] = "margarine-" + container_part
        article["text_object_name"] = object_part

        logger.info("Uploading text to cloudfiles")

        get_container(article["text_container_name"]).store_object(article["text_object_name"], html, content_type = "text/html")

        logger.info("Uploaded text to cloudfiles")

        articles.update({ "_id": _id }, { "$set": article }, upsert = True)

    logger.info("finished processing article: %s", article["url"])

    channel.basic_ack(delivery_tag = method.delivery_tag)