def enqueue(self, items, batch=None, pipe=None): """ Put items into the queue. The items will be pushed into Redis as part of a single (given) pipe in batches corresponding to the given batch argument. """ if batch is None: batch = self.batch if batch == 0: batch = len(items) if self.json: # simplejson.dumps returns Unicode strings items = [ simplejson.dumps(item, encoding='utf-8').encode('utf-8') for item in items ] if self.compress: items = [util.encode_gzip(item, encoding=None) for item in items] if pipe is not None: self._push(pipe, items, batch) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, items, batch)
def enqueue(self, items, batch=None, pipe=None): """ Put items into the queue. The items will be pushed into Redis as part of a single (given) pipe in batches corresponding to the given batch argument. """ if batch is None: batch = self.batch if batch == 0: batch = len(items) if self.json: # simplejson.dumps returns Unicode strings items = [simplejson.dumps(item, encoding='utf-8').encode('utf-8') for item in items] if self.compress: items = [util.encode_gzip(item, encoding=None) for item in items] if pipe is not None: self._push(pipe, items, batch) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, items, batch)
def load_file(db, redis_client, datatype, filename): # pragma: no cover celery_app.data_queues = configure_data(redis_client) task = FakeTask(celery_app) with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal(task, session, pipe, cell_type=datatype)(filename=filename)
def _enqueue(self, items, queue_key, batch=100, expire=False, pipe=None): data = [str(internal_dumps(item)) for item in items] if pipe is not None: self._push(pipe, data, queue_key, batch=batch, expire=expire) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, data, queue_key, batch=batch, expire=expire)
def load_file(db, redis_client, datatype, filename): # pragma: no cover celery_app.data_queues = configure_data(redis_client) task = FakeTask(celery_app) with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal( task, cell_type=datatype)(pipe, session, filename=filename)
def load_file(db, redis_client, datatype, filename): # pragma: no cover with redis_pipeline(redis_client) as pipe: with db_worker_session(db) as session: ocid.ImportLocal( None, session, pipe, cell_type=datatype, update_area_task=update_area)(filename=filename)
def redis_pipeline(self, execute=True): """ Returns a Redis pipeline usable as a context manager. :param execute: Should the pipeline be executed or aborted at the end? :type execute: bool """ return redis_pipeline(self.redis_client, execute=execute)
def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(self.celery_app) with self.get_csv(lo=lo, hi=hi, time=time) as path: with redis_pipeline(self.redis_client) as pipe: ImportLocal(task, self.session, pipe, cell_type=cell_type)(filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def import_csv(self, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(self.celery_app) with self.get_csv(lo=lo, hi=hi, time=time) as path: with redis_pipeline(self.redis_client) as pipe: ImportLocal(task, pipe, cell_type=cell_type)( self.session, filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def _enqueue(self, items, queue_key, batch=100, pipe=None, json=True): if json: data = [str(internal_dumps(item)) for item in items] else: # make a copy, since _push is modifying the list in-place data = list(items) if pipe is not None: self._push(pipe, data, queue_key, batch=batch) else: with redis_pipeline(self.redis_client) as pipe: self._push(pipe, data, queue_key, batch=batch)
def import_csv(self, celery, redis, session, cell, lo=1, hi=10, time=1408604686, cell_type='ocid'): task = FakeTask(celery) with self.get_csv(cell, lo=lo, hi=hi, time=time) as path: with redis_pipeline(redis) as pipe: ImportLocal(task, cell_type=cell_type)( pipe, session, filename=path) if cell_type == 'ocid': update_cellarea_ocid.delay().get() else: update_cellarea.delay().get()
def add_counter(self, stat_key, time, value): stat_counter = StatCounter(stat_key, time) with redis_pipeline(self.redis_client) as pipe: stat_counter.incr(pipe, value)
def add_counter(self, redis, stat_key, time, value): stat_counter = StatCounter(stat_key, time) with redis_pipeline(redis) as pipe: stat_counter.incr(pipe, value)
def redis_pipeline(self, execute=True): # returns a context manager return redis_pipeline(self.redis_client, execute=execute)
def import_csv(self, lo=1, hi=10, time=1408604686): with self.get_csv(lo=lo, hi=hi, time=time) as path: with redis_pipeline(self.redis_client) as pipe: ImportLocal( None, self.session, pipe, update_area_task=update_area)(filename=path)
def read_stations_from_csv(session, file_handle, redis_client, cellarea_queue): """ Read stations from a public cell export CSV. :arg session: a database session :arg file_handle: an open file handle for the CSV data :arg redis_client: a Redis client :arg cellarea_queue: the DataQueue for updating cellarea IDs """ # Avoid circular imports from ichnaea.data.tasks import update_cellarea, update_statregion csv_content = peekable(reader(file_handle)) # UMTS was the original name for WCDMA stations radio_type = {"UMTS": "wcdma", "GSM": "gsm", "LTE": "lte", "": "Unknown"} counts = defaultdict(Counter) areas = set() areas_total = 0 total = 0 if not csv_content: LOGGER.warning("Nothing to process.") return first_row = csv_content.peek() if first_row == _FIELD_NAMES: # Skip the first row because it's a header row next(csv_content) else: LOGGER.warning("Expected header row, got data: %s", first_row) for row in csv_content: try: radio = radio_type[row[0]] except KeyError: raise InvalidCSV("Unknown radio type in row: %s" % row) if radio == "Unknown": LOGGER.warning("Skipping unknown radio: %s", row) continue try: data = { "radio": radio, "mcc": int(row[1]), "mnc": int(row[2]), "lac": int(row[3]), "cid": int(row[4]), "psc": int(row[5]) if row[5] else 0, "lon": float(row[6]), "lat": float(row[7]), # Some exported radiuses exceed the max and fail validation "radius": min(int(row[8]), CELL_MAX_RADIUS), "samples": int(row[9]), # row[10] is "changable", always 1 and not imported "created": datetime.fromtimestamp(int(row[11]), UTC), "modified": datetime.fromtimestamp(int(row[12]), UTC), } shard = CellShard.create(_raise_invalid=True, **data) except (colander.Invalid, ValueError) as e: if total == 0: # If the first row is invalid, it's likely the rest of the # file is, too--drop out here. raise InvalidCSV("first row %s is invalid: %s" % (row, e)) else: LOGGER.warning("row %s is invalid: %s", row, e) continue # Is this station in the database? shard_type = shard.__class__ existing = (session.query(shard_type).filter( shard_type.cellid == shard.cellid).options( load_only("modified")).one_or_none()) if existing: if existing.modified < data["modified"]: # Update existing station with new data operation = "updated" existing.psc = shard.psc existing.lon = shard.lon existing.lat = shard.lat existing.radius = shard.radius existing.samples = shard.samples existing.created = shard.created existing.modified = shard.modified else: # Do nothing to existing station record operation = "found" else: # Add a new station record operation = "new" shard.min_lat = shard.lat shard.max_lat = shard.lat shard.min_lon = shard.lon shard.max_lon = shard.lon session.add(shard) counts[data["radio"]][operation] += 1 # Process the cell area? if operation in {"new", "updated"}: areas.add(area_id(shard)) # Process a chunk of stations, report on progress total += 1 if total % 1000 == 0: session.commit() LOGGER.info("Processed %d stations", total) if areas and (len(areas) % 1000 == 0): session.commit() areas_total += len(areas) LOGGER.info("Processed %d station areas", areas_total) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() areas = set() # Commit remaining station data session.commit() # Update the remaining cell areas if areas: areas_total += len(areas) with redis_pipeline(redis_client) as pipe: cellarea_queue.enqueue(list(areas), pipe=pipe) update_cellarea.delay() # Now that we've updated all the cell areas, we need to update the # statregion update_statregion.delay() # Summarize results LOGGER.info("Complete, processed %d station%s:", total, "" if total == 1 else "s") for radio_type, op_counts in sorted(counts.items()): LOGGER.info( " %s: %d new, %d updated, %d already loaded", radio_type, op_counts["new"], op_counts["updated"], op_counts["found"], ) if areas_total: LOGGER.info(" %d station area%s updated", areas_total, "" if areas_total == 1 else "s")