def calc_current_period(period_size):

        now = dateHelpers.datetime_in_utc()

        if period_size == "s":
            period_start = now.replace(microsecond=0)
            period_end   = period_start + ONE_SECOND - ONE_MICROSECOND
        elif period_size == "m":
            period_start = now.replace(second=0, microsecond=0)
            period_end   = period_start + ONE_MINUTE - ONE_MICROSECOND
        elif period_size == "h":
            period_start = now.replace(minute=0, second=0, microsecond=0)
            period_end   = period_start + ONE_HOUR - ONE_MICROSECOND
        elif period_size == "d":
            period_start = now.replace(hour=0, minute=0, second=0,
                                       microsecond=0)
            period_end   = period_start + ONE_DAY - ONE_MICROSECOND
        elif period_size == "w":
            period_start = now.replace(hour=0, minute=0, second=0,
                                       microsecond=0)
            while period_start.weekday() > 0:
                period_start = period_start - ONE_DAY
            period_end = period_start + ONE_WEEK - ONE_MICROSECOND
        else:
            raise RuntimeError("Invalid time period: " + repr(period))

        return (period_start, period_end)
    def _setup(self):
        """ Setup the data reducer.

            This is called the first time the add() method is called.  We set
            up the internal data structures used by the data reducer.

            Note that if something is wrong (eg, no time period was specified),
            we raise a suitable RuntimeError.
        """
        if self._start_time == None or self._end_time == None:
            raise RuntimeError("DataReducer.set_period() not called!")

        # Calculate the total number of seconds in the reporting time period.

        start_secs   = datetime_to_seconds(self._start_time)
        end_secs     = datetime_to_seconds(self._end_time)
        tot_num_secs = end_secs - start_secs + 1

        # Calculate how many buckets we need, and how many seconds are covered
        # by each bucket.

        num_buckets     = tot_num_secs # initially.
        secs_per_bucket = 1

        while num_buckets > self._max_num_data_points:
            secs_per_bucket = secs_per_bucket + 1
            num_buckets     = int(math.ceil(tot_num_secs / secs_per_bucket))

        # Setup the mapping from seconds to bucket indexes.

        for cur_secs in range(start_secs, end_secs+1):
            bucket_index = int(float(cur_secs-start_secs) /
                               float(secs_per_bucket))
            self._seconds_to_bucket[cur_secs] = bucket_index

        # Finally, setup the list of buckets.

        for bucket_num in range(num_buckets):
            bucket_start_secs = start_secs + bucket_num * secs_per_bucket
            bucket_end_secs   = bucket_start_secs + secs_per_bucket - 1

            bucket_start = dateHelpers.datetime_in_utc(bucket_start_secs)
            bucket_end   = dateHelpers.datetime_in_utc(bucket_end_secs)

            self._buckets.append({'start'  : bucket_start,
                                  'end'    : bucket_end,
                                  'values' : []})
Exemple #3
0
def poll(request):
    """
    Respond to the "/api/latest/poll" URL.

    We poll postings from given timestamp and rpp arguments.
    """

    now = time.time()
    # start_time = time.time()

    if request.method != "GET":
        return HttpResponseNotAllowed(["GET"])

    # avail. GET arguments
    rpp = None
    timestamp = None

    # Extract timestamp parameter.

    if "timestamp" in request.GET:

        # try to extract timestamp from object
        match_obj = re.match(TIMESTAMP_VALIDATE_PATTERN, request.GET['timestamp'])

        try:
            timestamp = int(match_obj.group(1))
        except (ValueError, AttributeError):
            return HttpResponse(json.dumps(
                                        {'success': False,
                                         'error': "Invalid 'timestamp' value"}),
                                mimetype="application/json")

        # validate timestamp bounds
        if timestamp > now:
            return HttpResponse(json.dumps(
                                    {'success': False,
                                     'error': "'timestamp' value is out of bounds: %d" % timestamp}),
                                mimetype="application/json")
    else:
        timestamp = int(now - DEFAULT_TIMESTAMP_DIFF)

    # Extract rpp parameter.

    if "rpp" in request.GET:
        try:
            rpp = int(request.GET['rpp'])
        except ValueError:
            return HttpResponse(json.dumps(
                                    {'success': False,
                                     'error': "Invalid 'rpp' value."}),
                                mimetype="application/json")

        if rpp < 1 or rpp > MAX_POSTINGS_NUMBER:
            return HttpResponse(json.dumps(
                                    {'success': False,
                                     'error': "'rpp' value out of range"}),
                                mimetype="application/json")

    else:
        rpp = DEFAULT_POSTINGS_NUMBER

    # Construct a search query based on the supplied parameters.

    query = Posting.objects.all()

    timestamp = datetime_in_utc(timestamp)
    now = datetime_in_utc(now)

    query = query.filter(timestamp__gte=timestamp, timestamp__lte=now)
    query = query.order_by("-timestamp")
    query = query[:rpp]

    # Testing: If the caller provided a "return_sql" parameter, return the raw
    # SQL statement rather than running it.

    # sql = str(query.query)
    # if (request.GET.get("return_sql") == "1"):
        # return HttpResponse(sql)

    # Before running the query, set a timeout so we don't hang if the query
    # takes too long.

    cursor = connection.cursor()
    cursor.execute("SET STATEMENT_TIMEOUT=%s" % settings.QUERY_TIMEOUT)

    # Process the search query, and assemble our search results.

    found_postings = []

    # {
      # + "categoryName": "Autos",
      # + "categoryClassName": "Vehicles",
      # + "postingTimestamp": "2012-10-22T11:56:00Z",
      # + "id": 69046547,
      # "annotations": {
        # "source_account": "*****@*****.**",
        # "source_neighborhood": "clarksville, md",
        # "phone": "3356154294",
        # "source_loc": "baltimore",
        # "year": "2012",
        # "source_continent": "usa",
        # "make": "gmc",
        # "source_cat": "sss",
        # "source_state": "maryland",
        # "model": "acadia",
        # "source_subcat": "cta|ctd"
      # },
      # + "source": "CRAIG",
      # "location": {
        # "cityCode": "USA-WAS-BAT",
        # "countryCode": "USA",
        # "zipCode": "USA-21202",
        # "countyCode": "USA-MD-BAC",
        # "localityCode": "USA-WAS-DOB",
        # "metroCode": "USA-WAS",
        # "longitude": -76.61219,
        # "regionCode": null,
        # "stateCode": "USA-MD",
        # "latitude": 39.29038,
        # "accuracy": 4
      # },
      # - "indexed": "2012-10-22T12:07:41Z",
      # + "heading": "2010 GMC Acadia AWD 4dr SLT2",
      # + "hasImage": true

    try:
        for posting in query:
            found_posting = {}
            found_posting['id'] = posting.id
            found_posting['category'] = posting.category.name
            found_posting['category_group'] = posting.category_group.name
            found_posting['source'] = posting.source.code
            found_posting['heading'] = posting.heading
            found_posting['timestamp'] = str(posting.timestamp)
            found_posting['has_image'] = posting.has_image
            # found_posting['external_id'] = posting.external_id
            # found_posting['external_url'] = posting.external_url

            annotations = {}
            for posting_annotation in posting.postingannotation_set.all():
                s = posting_annotation.annotation.annotation
                key,value = s.split(":", 1)
                annotations[key] = value
            found_posting['annotations'] = annotations

            loc = {}
            if posting.location_latitude is not None:
                loc['latitude'] = posting.location_latitude
            if posting.location_longitude is not None:
                loc['longitude'] = posting.location_longitude
            if posting.location_accuracy is not None:
                loc['accuracy'] = posting.location_accuracy
            if posting.location_country is not None:
                loc['country'] = posting.location_country.code
            if posting.location_state is not None:
                loc['state'] = posting.location_state.code
            if posting.location_metro is not None:
                loc['metro'] = posting.location_metro.code
            if posting.location_region is not None:
                loc['region'] = posting.location_region.code
            if posting.location_county is not None:
                loc['county'] = posting.location_county.code
            if posting.location_city is not None:
                loc['city'] = posting.location_city.code
            if posting.location_locality is not None:
                loc['locality'] = posting.location_locality.code
            if posting.location_zipcode is not None:
                loc['zipcode'] = posting.location_zipcode.code
            found_posting['location'] = loc

            # status = {}
            # status['offered'] = posting.status_offered
            # status['lost']    = posting.status_lost
            # status['stolen']  = posting.status_stolen
            # status['found']   = posting.status_found
            # status['deleted'] = posting.status_deleted
            # found_posting['status'] = status

            found_postings.append(found_posting)
    except DatabaseError, exc:
        transaction.rollback()  # Let the database keep working.

        # if "statement timeout" in str(exc):
        if re.search(DB_TIMEOUT_PATTERN, str(exc)):

            sql = str(query.query)
            # The query timed out. Tell the user the bad news.
            logger.debug("DATABASE TIMEOUT, query=" + sql)
            # eventRecorder.record("POLLING_API", "QUERY_TIMED_OUT", text=sql)
            transaction.commit()
            return HttpResponse(json.dumps({'success': False,
                                            'error': "Database timeout"}),
                                mimetype="application/json")
        else:
            return HttpResponse(json.dumps({'success': False,
                                            'error': "Database error"}),
                                mimetype="application/json")
def record(source, type, primary_value=None, secondary_value=None, text=None):
    """ Record the occurrence of an event.

        The parameters are as follows:

            'source'

                A string indicating the source of this event.

            'type'

                A string indicating the type of event.

            'primary_value'

                An integer giving the primary value for this event, if any.

            'secondary_value'

                An integer giving the secondary value for this event, if any.

            'text'

                Some optional text to associate with this event, if any.

        We create a new event with the given values and add it to the database.
        Upon completion, we return None if the event was successfully added, or
        an appropriate error message if something went wrong.
    """
    # Check that the parameters are correct.

    if source not in ["POSTING_API",
                      "SEARCH_API",
                      # "POLLING_API",
                      "SUMMARIZER_API"]:
        return "Unknown source: " + repr(source)

    if type == "POSTINGS_QUEUED":
        if primary_value == None:
            return "Missing required primary value"
        if secondary_value == None:
            return "Missing required secondary value"
        if text != None:
            return "This event type doesn't take any text"
    elif type == "POSTINGS_DEQUEUED":
        if primary_value == None:
            return "Missing required primary value"
        if secondary_value != None:
            return "This event type doesn't take a secondary value"
        if text != None:
            return "This event type doesn't take any text"
    elif type == "POSTINGS_PROCESSED":
        if primary_value == None:
            return "Missing required primary value"
        if secondary_value == None:
            return "Missing required secondary value"
        if text != None:
            return "This event type doesn't take any text"
    elif type == "SEARCH_REQUESTS":
        if primary_value == None:
            return "Missing required primary value"
        if secondary_value == None:
            return "Missing required secondary value"
    elif type == "SUMMARY_REQUESTS":
        if primary_value == None:
            return "Missing required primary value"
        if secondary_value == None:
            return "Missing required secondary value"
        if text != None:
            return "This event type doesn't take any text"
    elif type == "QUERY_TIMED_OUT":
        if primary_value != None:
            return "This event type doesn't take a primary value"
        if secondary_value != None:
            return "This event type doesn't take a secondary value"
        if text == None:
            return "This event type requires a text value"
    else:
        return "Unknown event type: " + type

    # Translate our event source and type into EventSource and EventType
    # objects, creating new records as required.

    event_source,created = EventSource.objects.get_or_create(source=source)
    event_type,created   = EventType.objects.get_or_create(type=type)

    # Create the new Event object.

    event = Event()
    event.timestamp       = dateHelpers.datetime_in_utc()
    event.source          = event_source
    event.type            = event_type
    event.primary_value   = primary_value
    event.secondary_value = secondary_value
    event.text            = text
    event.save()

    # That's all, folks!

    logger.debug("Received event %s from %s, values = %s"
                 % (type, source, str([primary_value, secondary_value, text])))

    return None
    def handle(self, *args, **kwargs):
        if len(args) > 0:
            raise CommandError("This command doesn't take any parameters.")

        # Get the "POSTINGS_QUEUED" and "POSTINGS_DEQUEUED" event types.  We'll
        # need these for our various database queries.

        try:
            postings_queued_event = EventType.objects.get(
                                                    type="POSTINGS_QUEUED")
        except EventType.DoesNotExist:
            postings_queued_event = None

        try:
            postings_dequeued_event = EventType.objects.get(
                                                    type="POSTINGS_DEQUEUED")
        except EventType.DoesNotExist:
            postings_dequeued_event = None

        # Get the total number of postings which have been queued.

        if postings_queued_event != None:
            query = Event.objects.filter(type=postings_queued_event)
            num_postings_added = \
                query.aggregate(Sum("primary_value"))['primary_value__sum']
            if num_postings_added == None: num_postings_added = 0
        else:
            num_postings_added = 0

        # Get the total number of postings which have been dequeued.

        if postings_dequeued_event != None:
            query = Event.objects.filter(type=postings_dequeued_event)
            num_postings_removed = \
                query.aggregate(Sum("primary_value"))['primary_value__sum']
            if num_postings_removed == None: num_postings_removed = 0
        else:
            num_postings_removed = 0

        # Calculate the number of left-over postings.

        postings_to_remove = num_postings_added - num_postings_removed

        if postings_to_remove == 0:
            raise CommandError("There are no postings to remove!")

        # Finally, add a new "POSTINGS_DEQUEUED" event to reset the number of
        # postings back to zero.

        if postings_dequeued_event == None:
            postings_dequeued_event = EventType()
            postings_dequeued_event.type = "POSTINGS_DEQUEUED"
            postings_dequeued_event.save()

        posting_api_source = EventSource.objects.get_or_create(
                                                source="POSTING_API")[0]

        event = Event()
        event.timestamp     = dateHelpers.datetime_in_utc()
        event.type          = postings_dequeued_event
        event.source        = posting_api_source
        event.primary_value = num_postings_added - num_postings_removed
        event.save()
def build_search_query(criteria):
    """ Build and return a QuerySet object based on the given search criteria.

        We construct a search query that will search against the given set of
        search criteria.

        Upon completion, we return a (success, result) tuple, where 'success'
        is True if and only if we could build a search query out of the given
        search criteria.  If 'success' is True, 'result' will be the QuerySet
        object we created.  Otherwise, 'result' will be a string explaining why
        we couldn't construct the query set.
    """
    query = Posting.objects.all() # initially.

    # Append locations filters.

    if "country" in criteria:
        try:
            country = Location.objects.get(code=criteria['country'],
                                           level=Location.LEVEL_COUNTRY)
        except Location.DoesNotExist:
            return (False, "Unknown country: " + criteria['country'])
        query = query.filter(location_country=country)

    if "state" in criteria:
        try:
            state = Location.objects.get(code=criteria['state'],
                                         level=Location.LEVEL_STATE)
        except Location.DoesNotExist:
            return (False, "Unknown state: " + criteria['state'])
        query = query.filter(location_state=state)

    if "metro" in criteria:
        try:
            metro = Location.objects.get(code=criteria['metro'],
                                         level=Location.LEVEL_METRO)
        except Location.DoesNotExist:
            return (False, "Unknown metro: " + criteria['metro'])
        query = query.filter(location_metro=metro)

    if "region" in criteria:
        try:
            region = Location.objects.get(code=criteria['region'],
                                          level=Location.LEVEL_REGION)
        except Location.DoesNotExist:
            return (False, "Unknown region: " + criteria['region'])
        query = query.filter(location_region=region)

    if "county" in criteria:
        try:
            county = Location.objects.get(code=criteria['county'],
                                          level=Location.LEVEL_COUNTY)
        except Location.DoesNotExist:
            return (False, "Unknown county: " + criteria['county'])
        query = query.filter(location_county=county)

    if "city" in criteria:
        try:
            city = Location.objects.get(code=criteria['city'],
                                        level=Location.LEVEL_CITY)
        except Location.DoesNotExist:
            return (False, "Unknown city: " + criteria['city'])
        query = query.filter(location_city=city)

    if "locality" in criteria:
        try:
            locality = Location.objects.get(code=criteria['locality'],
                                            level=Location.LEVEL_LOCALITY)
        except Location.DoesNotExist:
            return (False, "Unknown locality: " + criteria['locality'])
        query = query.filter(location_locality=locality)

    if "zipcode" in criteria:
        try:
            zipcode = Location.objects.get(code=criteria['zipcode'],
                                           level=Location.LEVEL_ZIPCODE)
        except Location.DoesNotExist:
            return (False, "Unknown zipcode: " + criteria['zipcode'])
        query = query.filter(location_zipcode=zipcode)

    # Append other filters.

    if "category_group" in criteria:
        try:
            group = CategoryGroup.objects.get(code=criteria['category_group'])
        except CategoryGroup.DoesNotExist:
            return (False,
                    "Unknown category group: " + criteria['category_group'])
        query = query.filter(category_group=group)

    if "category" in criteria:
        try:
            category = Category.objects.get(code=criteria['category'])
        except Category.DoesNotExist:
            return (False, "Unknown category: " + criteria['category'])
        query = query.filter(category=category)

    if "source" in criteria:
        try:
            source = Source.objects.get(code=criteria['source'])
        except Source.DoesNotExist:
            return (False, "Unknown source: " + criteria['source'])
        query = query.filter(source=source)

    if "external_id" in criteria:
        query = query.filter(external_id=criteria['external_id'])

    if "heading" in criteria:
        query = add_heading_search(query, criteria['heading'])

    if "body" in criteria:
        query = add_body_search(query, criteria['body'])

    if "text" in criteria:
        query = add_heading_or_body_search(query, criteria['text'])

    if "timestamp" in criteria:
        if ".." not in criteria['timestamp']:
            return (False,
                    "Invalid timestamp criteria: " + criteria['timestamp'])
        s1,s2 = criteria['timestamp'].split("..", 1)
        try:
            min_timestamp = int(s1)
        except ValueError:
            return (False, "Invalid timestamp value: " + s1)
        try:
            max_timestamp = int(s2)
        except ValueError:
            return (False, "Invalid timestamp value: " + s2)
        min_timestamp = dateHelpers.datetime_in_utc(min_timestamp)
        max_timestamp = dateHelpers.datetime_in_utc(max_timestamp)
        query = query.filter(timestamp__gte=min_timestamp,
                             timestamp__lte=max_timestamp)

    if "price" in criteria:
        if ".." not in criteria['price']:
            return (False, "Invalid price criteria: " + criteria['price'])
        s1,s2 = criteria['price'].split("..", 1)
        if s1 != "":
            try:
                min_price = float(s1)
            except ValueError:
                return (False, "Invalid price value: " + s1)
            query = query.filter(price__gte=min_price)
        if s2 != "":
            try:
                max_price = float(s2)
            except ValueError:
                return (False, "Invalid price value: " + s2)
            query = query.filter(price__lte=max_price)

    if "id" in criteria:
        if ".." in criteria['id']:
            s1,s2 = criteria['id'].split("..", 1)
            try:
                min_id = int(s1)
            except ValueError:
                return (False, "Invalid id value: " + s1)
            try:
                max_id = int(s2)
            except ValueError:
                return (False, "Invalid id value: " + s2)
            query = query.filter(id__gte=min_id, id__lte=max_id)
        else:
            try:
                id = int(criteria['id'])
            except ValueError:
                return (False, "Invalid id value: " + criteria['id'])
            query = query.filter(id=id)

    if "currency" in criteria:
        query = query.filter(currency=criteria['currency'])

    if "annotations" in criteria:
        success,result = annotationParser.parse(criteria['annotations'])
        if not success:
            return (False, result)
        else:
            query = query.filter(result)

    if "status" in criteria:
        if criteria['status'] == "offered":
            query = query.filter(status_offered=True)
        if criteria['status'] == "wanted":
            query = query.filter(status_wanted=True)
        elif criteria['status'] == "lost":
            query = query.filter(status_lost=True)
        elif criteria['status'] == "stolen":
            query = query.filter(status_stolen=True)
        elif criteria['status'] == "found":
            query = query.filter(status_found=True)
        elif criteria['status'] == "deleted":
            query = query.filter(status_deleted=True)
        else:
            return (False, "Invalid status criteria: " + criteria['status'])

    if "has_image" in criteria:
        if criteria['has_image'] == "1":
            query = query.filter(has_image=True)
        elif criteria['has_image'] == "0":
            query = query.filter(has_image=False)
        else:
            return (False,
                    "Invalid has_image criteria: " + criteria['has_image'])

    include_deleted = False # initially.
    only_deleted    = False

    if "include_deleted" in criteria:
        if criteria['include_deleted'] == "1":
            include_deleted = True

    if "only_deleted" in criteria:
        if criteria['only_deleted'] == "1":
            only_deleted = True

    if not include_deleted and not only_deleted:
        query = query.filter(status_deleted=False)
    elif only_deleted:
        query = query.filter(status_deleted=True)

    return (True, query)
def parse_field(src_dict, src_key, dst_dict, dst_key, remaining_fields,
                required=False, coerce_to_type="string", foreign_key=None,
                min_value=None, max_value=None):
    """ Parse a single field.

        The parameters are as follows:

            'src_dict'

                A dictionary holding the raw (unparsed) data.

            'src_key'

                The name of the field in the source dictionary.

            'dst_dict'

                A dictionary which will hold the parsed version of the value.

            'dst_key'

                The name of the field in the destination dictionary in which to
                store this value.

            'remaining_fields'

                A set containing the names of the fields we haven't processed
                yet.

            'required'

                Is this field required? 

            'coerce_to_type'

                Coerce the supplied value to the given type of data, if
                possible.  The following data types are currently supported:

                    "string"
                    "integer"
                    "float"
                    "decimal"
                    "boolean"
                    "datetime"

            'foreign_key'

                If supplied, this is a dictionary containing foreign key values
                to use for this field.  Each dictionary entry maps a foreign
                key value to its associated record ID.  Note that for foreign
                keys, we append "_id" to the end of dst_attr so that we set the
                internal record ID of the foreign key field directly.

            'min_value'

                If supplied, the field must be greater than or equal to this
                value.

            'max_value'

                If supplied, the field must be less than or equal to this
                value.

        We do the following to process the field:

            * If 'src_key' doesn't exist in 'src_dict' and 'required' is True,
              we raise a ParsingException with the appropriate error message.

            * If 'foreign_key' is not None, we see if the given field value
              (converted to uppercase) is in the supplied dictionary.  If
              so, we store the associated record ID into:

                  dst_dict[dst_key + "_id"]

              If not, we raise a ParsingException with an appropriate error
              message.

            * If the supplied value can't be coerced to the given data type, we
              raise a ParsingException with the appropriate error message.

            * If the supplied value isn't within the given min_value and
              max_value range, we raise a ParsingException with an appropriate
              error message.

            * Otherwise, the supplied value will be copied from the source
              dictionary to the destination dictionary.

            * If the supplied source value was copied across (either directly,
              or via a translation table), we remove the source field from
              'remaining_fields'.

        Note that no value is returned by this function; either an exception is
        raised or the function completes silently.
    """
    try:
        src_value = src_dict[src_key]
    except KeyError:
        src_value = None

    if src_value == None:
        if required:
            raise ParsingException("Missing required '%s' field" % src_key)
        else:
            return # Nothing else to do.

    if foreign_key != None:
        if isinstance(src_value, basestring):
            try:
                record_id = foreign_key[src_value.upper()]
            except KeyError:
                raise ParsingException("Unknown %s value: '%s'" % (src_key,
                                                                   src_value))
            dst_dict[dst_key + "_id"] = record_id
            remaining_fields.remove(src_key)
            return
        else:
            raise ParsingException(src_key + " must be a string")

    if coerce_to_type == "string":
        try:
            src_value = str(src_value)
        except ValueError:
            raise ParsingException("Unable to convert " + src_key +
                                   " to a string")
    elif coerce_to_type == "integer":
        try:
            src_value = int(src_value)
        except ValueError:
            raise ParsingException("Unable to convert " + src_key +
                                   " to an integer")
    elif coerce_to_type == "float":
        try:
            src_value = float(src_value)
        except ValueError:
            raise ParsingException("Unable to convert " + src_key +
                                   " to a floating point number")
    elif coerce_to_type == "decimal":
        try:
            src_value = decimal.Decimal(str(src_value))
        except decimal.InvalidOperation:
            raise ParsingException("Unable to convert " + src_key +
                                   " to a decimal value")
    elif coerce_to_type == "boolean":
        try:
            src_value = bool(src_value)
        except ValueError:
            raise ParsingException("Unable to convert " + src_key +
                                   " to a boolean")
    elif coerce_to_type == "datetime":
        src_value = dateHelpers.datetime_in_utc(src_value)
        if src_value == None:
            raise ParsingException("Unable to convert " + src_key +
                                   " to a datetime")
    else:
        raise RuntimeError("Unknown coerce_to_type: " + repr(coerce_to_type))

    if min_value != None:
        if src_value < min_value:
            raise ParsingException(src_key + " can't be less than " +
                                   str(min_value))

    if max_value != None:
        if src_value > max_value:
            raise ParsingException(src_key + " can't be more than " +
                                   str(max_value))

    dst_dict[dst_key] = src_value
    remaining_fields.remove(src_key)
Exemple #8
0
def poll(request):
    """ Respond to the "/poll" URL.

        We return a list of the postings with the given timestamp
        We poll postings from given timestamp and rpp arguments.
    """

    now = time.time()

    if request.method != "GET":
        return HttpResponseNotAllowed(["GET"])

    # avail. GET arguments
    rpp = None
    timestamp = None

    # Extract timestamp parameter.

    if "timestamp" in request.GET:
        try:
            timestamp = int(request.GET['timestamp'])
        except (ValueError, AttributeError):
            return HttpResponse(json.dumps(
                                  {'success' : False,
                                   'error'   : "Invalid 'timestamp' value"}),
                                mimetype="application/json")
    else:
        timestamp = int(now - 24*60*60) # 1 day ago.

    # Extract rpp parameter.

    if "rpp" in request.GET:
        try:
            rpp = int(request.GET['rpp'])
        except ValueError:
            return HttpResponse(json.dumps(
                                    {'success': False,
                                     'error': "Invalid 'rpp' value."}),
                                mimetype="application/json")

        if rpp < 1 or rpp > 1000:
            return HttpResponse(json.dumps(
                                    {'success': False,
                                     'error': "'rpp' value out of range"}),
                                mimetype="application/json")

    else:
        rpp = 1000

    # Construct a search query based on the supplied parameters.

    timestamp = dateHelpers.datetime_in_utc(timestamp)

    query = Posting.objects.filter(updated_at__gte=timestamp)
    query = query.order_by("-updated_at")
    query = query[:rpp]

    # Before running the query, set a timeout so we don't hang if the query
    # takes too long.

    cursor = connection.cursor()
    cursor.execute("SET STATEMENT_TIMEOUT=%s" % settings.QUERY_TIMEOUT)

    # Process the search query, and assemble our search results.

    found_postings = []

    try:
        for posting in query:
            found_posting = {}
            _p_resolve_base(found_posting, posting)
            _p_resolve_locs(found_posting, posting)
            _p_resolve_annotaions(found_posting, posting)

            found_postings.append(found_posting)
    except DatabaseError,e:
        transaction.rollback()  # Let the database keep working.

        if "statement timeout" in str(e):
            # The query timed out. Tell the user the bad news.
            sql = str(query.query)
            logger.debug("DATABASE TIMEOUT, query=" + sql)
            # eventRecorder.record("POLLING_API", "QUERY_TIMED_OUT", text=sql)
            transaction.commit()
            return HttpResponse(json.dumps({'success': False,
                                            'error': "Database timeout"}),
                                mimetype="application/json")
        else:
            return HttpResponse(json.dumps({'success': False,
                                            'error': "Database error"}),
                                mimetype="application/json")
    transaction.commit()

    # Save the postings into the database.  Note that we simply process the
    # postings one at a time, using a transaction to periodically commit
    # changes.  This may not be the fastest way to do it, but given the fact
    # that we have to update existing postings, we may not have a choice...

    try:
        num_postings_in_transaction = 0

        for src in parsed_postings:

            # Add or update the posting itself.

            src['posting']['inserted'] = dateHelpers.datetime_in_utc()

            posting,created = Posting.objects.get_or_create(
                                source_id=src['posting']['source_id'],
                                external_id=src['posting']['external_id'],
                                defaults=src['posting'])

            if not created:
                # We have an existing posting -> update it with the
                # newly-supplied values.
                for key,value in src['posting'].items():
                    if key not in ["source_id", "external_id"]:
                        setattr(posting, key, value)
                posting.save()

            try:
def check_raw_postings(raw_postings):
    """ Check that the given raw postings are acceptable.

        'raw_postings' should be a list of raw postings, where each raw posting
        is represented by a dictionary.

        Upon completion, we return a list of (success, result) tuples, one for
        each of the raw postings, where'success' is a boolean indicating
        whether or not that posting was acceptable, and 'result' is either the
        parsed form of that posting, as described above, or a string containing
        a suitable error message explaining why that posting was not
        acceptable.
    """
    source_codes    = get_source_codes()
    category_codes  = get_category_codes()
    category_groups = get_category_groups()
    country_codes   = None # Loaded into memory as required.
    state_codes     = None # ditto.
    metro_codes     = None # ditto.
    region_codes    = None # ditto.
    county_codes    = None # ditto.
    city_codes      = None # ditto.
    locality_codes  = None # ditto.
    zip_codes       = None # ditto.

    results = []
    for raw_posting in raw_postings:
        try:
            posting     = {}
            annotations = []
            images      = []

            if not isinstance(raw_posting, dict):
                raise ParsingException("Posting must be an object or " +
                                       "dictionary")

            remaining_fields = set(raw_posting.keys())

            parse_field(raw_posting, "account_id", posting, "account_id",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "source", posting, "source",
                        remaining_fields, required=True,
                        foreign_key=source_codes)

            parse_field(raw_posting, "category", posting, "category",
                        remaining_fields, foreign_key=category_codes)

            if "category" in raw_posting:
                posting['category_group_id'] = \
                    category_groups[raw_posting['category'].upper()]

            if "location" in raw_posting:
                raw_loc = raw_posting['location']
                remaining_fields.remove("location")

                remaining_loc_fields = set(raw_loc.keys())

                parse_field(raw_loc, "lat", posting, "location_latitude",
                            remaining_loc_fields, coerce_to_type="decimal",
                            min_value=-90, max_value=+90)

                parse_field(raw_loc, "long", posting, "location_longitude",
                            remaining_loc_fields, coerce_to_type="decimal",
                            min_value=-180, max_value=+180)

                parse_field(raw_loc, "accuracy", posting, "location_accuracy",
                            remaining_loc_fields, coerce_to_type="integer")

                if "bounds" in raw_loc:
                    # Manually copy across the bounds array.
                    posting['location_bounds'] = raw_loc['bounds']
                    remaining_loc_fields.remove("bounds")

                if "country" in raw_loc:
                    if country_codes == None:
                        country_codes = get_country_codes()

                    parse_field(raw_loc, "country", posting, "location_country",
                                remaining_loc_fields, foreign_key=country_codes)

                if "state" in raw_loc:
                    if state_codes == None:
                        state_codes = get_state_codes()

                    parse_field(raw_loc, "state", posting, "location_state",
                                remaining_loc_fields, foreign_key=state_codes)

                if "metro" in raw_loc:
                    if metro_codes == None:
                        metro_codes = get_metro_codes()

                    parse_field(raw_loc, "metro", posting, "location_metro",
                                remaining_loc_fields, foreign_key=metro_codes)

                if "region" in raw_loc:
                    if region_codes == None:
                        region_codes = get_region_codes()

                    parse_field(raw_loc, "region", posting, "location_region",
                                remaining_loc_fields, foreign_key=region_codes)

                if "county" in raw_loc:
                    if county_codes == None:
                        county_codes = get_county_codes()

                    parse_field(raw_loc, "county", posting, "location_county",
                                remaining_loc_fields, foreign_key=county_codes)

                if "city" in raw_loc:
                    if city_codes == None:
                        city_codes = get_city_codes()

                    parse_field(raw_loc, "city", posting, "location_city",
                                remaining_loc_fields, foreign_key=city_codes)

                if "locality" in raw_loc:
                    if locality_codes == None:
                        locality_codes = get_locality_codes()

                    parse_field(raw_loc, "locality", posting,
                                "location_locality", remaining_loc_fields,
                                foreign_key=locality_codes)

                if "zipcode" in raw_loc:
                    if zip_codes == None:
                        zip_codes = get_zip_codes()

                    parse_field(raw_loc, "zipcode", posting,
                                "location_zipcode", remaining_loc_fields,
                                foreign_key=zip_codes)

                if remaining_loc_fields:
                    raise ParsingException("Unexpected location field(s): " +
                                           ", ".join(remaining_loc_fields))

            parse_field(raw_posting, "external_id", posting, "external_id",
                        remaining_fields, required=True,
                        coerce_to_type="string")

            parse_field(raw_posting, "external_url", posting, "external_url",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "heading", posting, "heading",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "body", posting, "body",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "html", posting, "html",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "timestamp", posting, "timestamp",
                        remaining_fields, coerce_to_type="datetime")

            if "expires" in raw_posting:
                parse_field(raw_posting, "expires", posting, "expires",
                            remaining_fields, coerce_to_type="datetime")
            else:
                posting['expires'] = dateHelpers.datetime_in_utc() \
                                   + datetime.timedelta(days=7)

            parse_field(raw_posting, "language", posting, "language",
                        remaining_fields, coerce_to_type="string")

            parse_field(raw_posting, "price", posting, "price",
                        remaining_fields, coerce_to_type="float")

            parse_field(raw_posting, "currency", posting, "currency",
                        remaining_fields, coerce_to_type="string")

            if "images" in raw_posting:
                raw_images = raw_posting['images']
                remaining_fields.remove("images")

                if not isinstance(raw_images, (list, tuple)):
                    raise ParsingException("images must be an array")

                for raw_image in raw_images:
                    remaining_image_fields = set(raw_image.keys())

                    image = {}

                    parse_field(raw_image, "full", image, "full_url",
                                remaining_image_fields,
                                coerce_to_type="string")

                    parse_field(raw_image, "full_width", image, "full_width",
                                remaining_image_fields,
                                coerce_to_type="integer")

                    parse_field(raw_image, "full_height", image, "full_height",
                                remaining_image_fields,
                                coerce_to_type="integer")

                    parse_field(raw_image, "thumbnail", image, "thumbnail_url",
                                remaining_image_fields,
                                coerce_to_type="string")

                    parse_field(raw_image, "thumbnail_width",
                                image, "thumbnail_width",
                                remaining_image_fields,
                                coerce_to_type="integer")

                    parse_field(raw_image, "thumbnail_height",
                                image, "thumbnail_height",
                                remaining_image_fields,
                                coerce_to_type="integer")

                    if remaining_image_fields:
                        raise ParsingException("Unexpected image field(s): " +
                                            ", ".join(remaining_image_fields))

                    images.append(image)

            if len(images) > 0:
                posting['has_image'] = True
            else:
                posting['has_image'] = False

            if "annotations" in raw_posting:
                raw_annotations = raw_posting['annotations']
                remaining_fields.remove("annotations")

                for key,value in raw_annotations.items():
                    if value == None: continue

                    if not isinstance(key, basestring):
                        raise ParsingException("Annotation keys must be " +
                                               "strings")

                    if not isinstance(value, basestring):
                        raise ParsingException("Annotation values must be " +
                                               "strings")

                    annotations.append(key + ":" + value)

            if "status" in raw_posting:
                raw_status = raw_posting['status']
                remaining_fields.remove("status")

                remaining_status_fields = set(raw_status.keys())

                parse_field(raw_status, "offered", posting, "status_offered",
                            remaining_status_fields, coerce_to_type="boolean")

                parse_field(raw_status, "wanted", posting, "status_wanted",
                            remaining_status_fields, coerce_to_type="boolean")

                parse_field(raw_status, "lost", posting, "status_lost",
                            remaining_status_fields, coerce_to_type="boolean")

                parse_field(raw_status, "stolen", posting, "status_stolen",
                            remaining_status_fields, coerce_to_type="boolean")

                parse_field(raw_status, "found", posting, "status_found",
                            remaining_status_fields, coerce_to_type="boolean")

                parse_field(raw_status, "deleted", posting, "status_deleted",
                            remaining_status_fields, coerce_to_type="boolean")

                if remaining_status_fields:
                    raise ParsingException("Unexpected status field(s): " +
                                        ", ".join(remaining_status_fields))

            parse_field(raw_posting, "immortal", posting, "immortal",
                        remaining_fields, coerce_to_type="boolean")

            if remaining_fields:
                raise ParsingException("Unexpected field(s): " +
                                    ", ".join(remaining_fields))
        except ParsingException,e:
            results.append((False, e.err_msg))
            continue

        parsed_posting = {'posting'     : posting,
                          'annotations' : annotations,
                          'images'      : images}

        results.append((True, parsed_posting))