Example #1
0
    def enqueue(self, items, batch=None, pipe=None):
        """
        Put items into the queue.

        The items will be pushed into Redis as part of a single (given)
        pipe in batches corresponding to the given batch argument.
        """
        if batch is None:
            batch = self.batch

        if batch == 0:
            batch = len(items)

        if self.json:
            # simplejson.dumps returns Unicode strings
            items = [
                simplejson.dumps(item, encoding='utf-8').encode('utf-8')
                for item in items
            ]

        if self.compress:
            items = [util.encode_gzip(item, encoding=None) for item in items]

        if pipe is not None:
            self._push(pipe, items, batch)
        else:
            with redis_pipeline(self.redis_client) as pipe:
                self._push(pipe, items, batch)
Example #2
0
    def enqueue(self, items, batch=None, pipe=None):
        """
        Put items into the queue.

        The items will be pushed into Redis as part of a single (given)
        pipe in batches corresponding to the given batch argument.
        """
        if batch is None:
            batch = self.batch

        if batch == 0:
            batch = len(items)

        if self.json:
            # simplejson.dumps returns Unicode strings
            items = [simplejson.dumps(item, encoding='utf-8').encode('utf-8')
                     for item in items]

        if self.compress:
            items = [util.encode_gzip(item, encoding=None) for item in items]

        if pipe is not None:
            self._push(pipe, items, batch)
        else:
            with redis_pipeline(self.redis_client) as pipe:
                self._push(pipe, items, batch)
Example #3
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    celery_app.data_queues = configure_data(redis_client)
    task = FakeTask(celery_app)
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(task, session, pipe,
                             cell_type=datatype)(filename=filename)
Example #4
0
 def _enqueue(self, items, queue_key, batch=100, expire=False, pipe=None):
     data = [str(internal_dumps(item)) for item in items]
     if pipe is not None:
         self._push(pipe, data, queue_key, batch=batch, expire=expire)
     else:
         with redis_pipeline(self.redis_client) as pipe:
             self._push(pipe, data, queue_key, batch=batch, expire=expire)
Example #5
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    celery_app.data_queues = configure_data(redis_client)
    task = FakeTask(celery_app)
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(
                task, cell_type=datatype)(pipe, session, filename=filename)
Example #6
0
def load_file(db, redis_client, datatype, filename):  # pragma: no cover
    with redis_pipeline(redis_client) as pipe:
        with db_worker_session(db) as session:
            ocid.ImportLocal(
                None, session, pipe,
                cell_type=datatype,
                update_area_task=update_area)(filename=filename)
Example #7
0
    def redis_pipeline(self, execute=True):
        """
        Returns a Redis pipeline usable as a context manager.

        :param execute: Should the pipeline be executed or aborted at the end?
        :type execute: bool
        """
        return redis_pipeline(self.redis_client, execute=execute)
Example #8
0
 def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(self.celery_app)
     with self.get_csv(lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(self.redis_client) as pipe:
             ImportLocal(task, self.session, pipe,
                         cell_type=cell_type)(filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
Example #9
0
 def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(self.celery_app)
     with self.get_csv(lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(self.redis_client) as pipe:
             ImportLocal(task, pipe, cell_type=cell_type)(
                 self.session, filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
Example #10
0
 def _enqueue(self, items, queue_key, batch=100, pipe=None, json=True):
     if json:
         data = [str(internal_dumps(item)) for item in items]
     else:
         # make a copy, since _push is modifying the list in-place
         data = list(items)
     if pipe is not None:
         self._push(pipe, data, queue_key, batch=batch)
     else:
         with redis_pipeline(self.redis_client) as pipe:
             self._push(pipe, data, queue_key, batch=batch)
Example #11
0
 def import_csv(self, celery, redis, session, cell,
                lo=1, hi=10, time=1408604686, cell_type='ocid'):
     task = FakeTask(celery)
     with self.get_csv(cell, lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(redis) as pipe:
             ImportLocal(task, cell_type=cell_type)(
                 pipe, session, filename=path)
     if cell_type == 'ocid':
         update_cellarea_ocid.delay().get()
     else:
         update_cellarea.delay().get()
Example #12
0
 def add_counter(self, stat_key, time, value):
     stat_counter = StatCounter(stat_key, time)
     with redis_pipeline(self.redis_client) as pipe:
         stat_counter.incr(pipe, value)
Example #13
0
 def add_counter(self, stat_key, time, value):
     stat_counter = StatCounter(stat_key, time)
     with redis_pipeline(self.redis_client) as pipe:
         stat_counter.incr(pipe, value)
 def add_counter(self, redis, stat_key, time, value):
     stat_counter = StatCounter(stat_key, time)
     with redis_pipeline(redis) as pipe:
         stat_counter.incr(pipe, value)
Example #15
0
 def redis_pipeline(self, execute=True):
     # returns a context manager
     return redis_pipeline(self.redis_client, execute=execute)
Example #16
0
 def add_counter(self, redis, stat_key, time, value):
     stat_counter = StatCounter(stat_key, time)
     with redis_pipeline(redis) as pipe:
         stat_counter.incr(pipe, value)
Example #17
0
 def import_csv(self, lo=1, hi=10, time=1408604686):
     with self.get_csv(lo=lo, hi=hi, time=time) as path:
         with redis_pipeline(self.redis_client) as pipe:
             ImportLocal(
                 None, self.session, pipe,
                 update_area_task=update_area)(filename=path)
Example #18
0
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue):
    """
    Read stations from a public cell export CSV.

    :arg session: a database session
    :arg file_handle: an open file handle for the CSV data
    :arg redis_client: a Redis client
    :arg cellarea_queue: the DataQueue for updating cellarea IDs
    """
    # Avoid circular imports
    from ichnaea.data.tasks import update_cellarea, update_statregion

    csv_content = peekable(reader(file_handle))
    # UMTS was the original name for WCDMA stations
    radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"}

    counts = defaultdict(Counter)
    areas = set()
    areas_total = 0
    total = 0

    if not csv_content:
        LOGGER.warning("Nothing to process.")
        return

    first_row = csv_content.peek()
    if first_row == _FIELD_NAMES:
        # Skip the first row because it's a header row
        next(csv_content)
    else:
        LOGGER.warning("Expected header row, got data: %s", first_row)

    for row in csv_content:
        try:
            radio = radio_type[row[0]]
        except KeyError:
            raise InvalidCSV("Unknown radio type in row: %s" % row)

        if radio == "Unknown":
            LOGGER.warning("Skipping unknown radio: %s", row)
            continue

        try:
            data = {
                "radio": radio,
                "mcc": int(row[1]),
                "mnc": int(row[2]),
                "lac": int(row[3]),
                "cid": int(row[4]),
                "psc": int(row[5]) if row[5] else 0,
                "lon": float(row[6]),
                "lat": float(row[7]),
                # Some exported radiuses exceed the max and fail validation
                "radius": min(int(row[8]), CELL_MAX_RADIUS),
                "samples": int(row[9]),
                # row[10] is "changable", always 1 and not imported
                "created": datetime.fromtimestamp(int(row[11]), UTC),
                "modified": datetime.fromtimestamp(int(row[12]), UTC),
            }
            shard = CellShard.create(_raise_invalid=True, **data)
        except (colander.Invalid, ValueError) as e:
            if total == 0:
                # If the first row is invalid, it's likely the rest of the
                # file is, too--drop out here.
                raise InvalidCSV("first row %s is invalid: %s" % (row, e))
            else:
                LOGGER.warning("row %s is invalid: %s", row, e)
                continue

        # Is this station in the database?
        shard_type = shard.__class__
        existing = (session.query(shard_type).filter(
            shard_type.cellid == shard.cellid).options(
                load_only("modified")).one_or_none())

        if existing:
            if existing.modified < data["modified"]:
                # Update existing station with new data
                operation = "updated"
                existing.psc = shard.psc
                existing.lon = shard.lon
                existing.lat = shard.lat
                existing.radius = shard.radius
                existing.samples = shard.samples
                existing.created = shard.created
                existing.modified = shard.modified
            else:
                # Do nothing to existing station record
                operation = "found"
        else:
            # Add a new station record
            operation = "new"
            shard.min_lat = shard.lat
            shard.max_lat = shard.lat
            shard.min_lon = shard.lon
            shard.max_lon = shard.lon
            session.add(shard)

        counts[data["radio"]][operation] += 1

        # Process the cell area?
        if operation in {"new", "updated"}:
            areas.add(area_id(shard))

        # Process a chunk of stations, report on progress
        total += 1
        if total % 1000 == 0:
            session.commit()
            LOGGER.info("Processed %d stations", total)

        if areas and (len(areas) % 1000 == 0):
            session.commit()
            areas_total += len(areas)
            LOGGER.info("Processed %d station areas", areas_total)
            with redis_pipeline(redis_client) as pipe:
                cellarea_queue.enqueue(list(areas), pipe=pipe)
            update_cellarea.delay()
            areas = set()

    # Commit remaining station data
    session.commit()

    # Update the remaining cell areas
    if areas:
        areas_total += len(areas)
        with redis_pipeline(redis_client) as pipe:
            cellarea_queue.enqueue(list(areas), pipe=pipe)
        update_cellarea.delay()

    # Now that we've updated all the cell areas, we need to update the
    # statregion
    update_statregion.delay()

    # Summarize results
    LOGGER.info("Complete, processed %d station%s:", total,
                "" if total == 1 else "s")
    for radio_type, op_counts in sorted(counts.items()):
        LOGGER.info(
            "  %s: %d new, %d updated, %d already loaded",
            radio_type,
            op_counts["new"],
            op_counts["updated"],
            op_counts["found"],
        )
    if areas_total:
        LOGGER.info("  %d station area%s updated", areas_total,
                    "" if areas_total == 1 else "s")