コード例 #1
0
ファイル: parse.py プロジェクト: rlisagor/mls
def main(argv):
    log.info("Starting parser")
    sleeper = aws.Sleeper(5)
    # Loop indefinitely, waiting for messages
    # If a message is available, grab the data to parse out of S3
    while True:
        m = parse_queue.read(visibility_timeout=10)
        if m is not None:
            sleeper.reset()
            message_data = simplejson.loads(m.get_body())
                
            log.info("Processing %s with timestamp %s", message_data["mls"], message_data["date"])
            if aws.mls_exists(mls_domain, message_data["mls"], message_data["date"]):
                log.info("already exists")
                continue
            listing_key = bucket.get_key(message_data["key"])
            listing_html = listing_key.get_contents_as_string()
            # Parse it
            listing = realtylink.Listing(message_data["mls"], listing_html)
            
            # TODO: Make this more efficient by using the result from above
            listing_item = aws.mls_exists(mls_domain, message_data["mls"])
            if not listing_item:
                # And insert it into SimpleDB
                listing_item = mls_domain.new_item(hash(message_data["mls"]))
                listing_item["mls"] = listing.mls
                listing_item["description"] = listing.description[:1023]
                listing_item["area"] = listing.area
                listing_item["type"] = listing.type
                listing_item["bedrooms"] = listing.bedrooms
                listing_item["bathrooms"] = listing.bathrooms
                listing_item["age"] = listing.age
                listing_item["maintenance_fee"] = listing.maintenance_fee
                listing_item["features"] = listing.features
                listing_item["address"] = listing.address
                listing_item["region"] = listing.region
                listing_item["city"] = listing.city
                listing_item["unit"] = listing.unit
                listing_item["last_seen"] = aws.get_iso_timestamp()
                if "first_seen" not in listing_item:
                    listing_item["first_seen"] = aws.get_iso_timestamp()
            listing_item.add_value("prices", (listing.price, message_data["date"]))
            log.debug(listing_item)
            # Don't save it or delete the message while debugging
            listing_item.save()
            
            parse_queue.delete_message(m)
        else:
            log.info("Sleeping")
            sleeper.sleep()
コード例 #2
0
ファイル: reprocess.py プロジェクト: rlisagor/mls
def main(argv):
    for key in bucket.list():
        mls, datestr = key.key.split("/")
        date = datestr.split(".")[0]
        print mls, date
        
        if not aws.mls_exists(mls_domain, mls, date):
            print "Need to parse %s" % mls
            msg_body = simplejson.dumps({"mls":mls, "key":key.name, "bucket":bucket.name, "date":date})
            request_msg = parse_queue.new_message(msg_body)
            parse_queue.write(request_msg)