def prototest(): sample = 'idfa\t1rfw452y52g2gq4g\t55.55\t42.42\t1423,43,567,3,7,23\ngaid\t7rfw452y52g2gq4g\t55.55\t42.42\t7423,424' for line in sample.splitlines(): dev_type, dev_id, lat, lon, raw_apps = line.strip().split('\t') apps = [int(a) for a in raw_apps.split(',') if a.isdigit()] lat, lon = float(lat), float(lon) ua = appsinstalled_pb2.UserApps() ua.lat = lat ua.lon = lon ua.apps.extend(apps) packed = ua.SerializeToString() unpacked = appsinstalled_pb2.UserApps() unpacked.ParseFromString(packed) assert ua == unpacked
def insert_appsinstalled(memc, appsinstalled, dry_run=False): attempts = 5 delay = 0.2 cur_attempt = 1 ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() # @TODO persistent connection # @TODO retry and timeouts! try: if dry_run: logging.debug("%s - %s -> %s" % (memc.servers[0], key, str(ua).replace("\n", " "))) else: result = memc.set(key, packed) while result == 0 and cur_attempt < attempts: time.sleep(delay) cur_attempt += 1 result = memc.set(key, packed) return result != 0 except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc.servers[0], e)) return False return True
def insert_appsinstalled(memc_pool, memc_addr, appsinstalled, dry_run=False): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = f'{appsinstalled.dev_type}:{appsinstalled.dev_id}' ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() try: if dry_run: logging.debug('{} - {} -> {}'.format(memc_addr, key, str(ua).replace('\n', ' '))) else: try: memc = memc_pool.get(timeout=0.1) except Queue.Empty: memc = memcache.Client([memc_addr], socket_timeout=config['MEMC_TIMEOUT']) ok = False for n in range(config['MEMC_MAX_RETRIES']): ok = memc.set(key, packed) if ok: break backoff_value = config['MEMC_BACKOFF_FACTOR'] * (2**n) time.sleep(backoff_value) memc_pool.put(memc) return ok except Exception as e: logging.exception(f'Cannot write to memc {memc_addr}: {e}') return False return True
def insert_appsinstalled(memc_client, appsinstalled, dry_run=False): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() memc_addr = ', '.join(['{}:{}'.format(s.address[0], s.address[1]) for s in memc_client.servers]) success = False try: if dry_run: logging.debug("[%s] %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) else: for i in xrange(MEMCACHE_MAX_RETRIES): if i != 0: time.sleep(MEMCACHE_RETRY_TIMEOUT) ok = memc_client.set(key, packed) if ok: success = True break except Exception, e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
def insert_appsinstalled_process(child_conn_to_packed, parent_conns, dry_run=False): i = 0 next_conn = get_next_conn(parent_conns) while True: if child_conn_to_packed.poll(TIMEOUT): task = child_conn_to_packed.recv() if isinstance(task, str) and task == SENTINEL: logging.info(f"insert_appsinstalled_process {os.getpid()} END, {i} proceesed") break i += 1 if i % 10000 == 0: logging.info(f"Insert {i} lines {os.getpid()}") appsinstalled, memc_addr = task ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() # @TODO persistent connection # @TODO retry and timeouts! try: if dry_run: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) else: task = (key, packed, memc_addr) parent_conn_to_cache = next(next_conn) parent_conn_to_cache.send(task) except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
def insert_appsinstalled(memc, appsinstalleds_group, dry_run=False, timeout=3, retry_connection=3): retry_connection_ = retry_connection memc_addr = str(memc.servers[0]) packeds = {} for appsinstalled in appsinstalleds_group: ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() packeds[key] = packed try: if dry_run: logging.debug("send nex group:") for key in packeds: logging.debug("%s - %s" % (memc_addr, key)) else: result = memc.set_multi(packeds) while not result and retry_connection_ > 0: logging.info( f"set failed. {retry_connection_} attempts left") result = memc.set_multi(packeds) retry_connection_ -= 1 if not retry_connection_: logging.info(f"set failed. for {memc_addr}") return result except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) return False
def insert_appsinstalled(memc_addr, appsinstalled, dry_run=False): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() if dry_run: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) return True #retry and timeouts retry_counter = 1 while True: try: mc[appsinstalled.dev_type].set(key, packed) except Exception as e: if retry_counter > 5: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) return False retry_counter += 1 sleep(5) else: return True
def buf_appsinstalled(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = '{}:{}'.format(appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) return key, ua
def insert_appsinstalled(queue_in, queue_out, device_memc, processed, errors, dry_run=False): i = 0 while True: appsinstalled = queue_in.get() if isinstance(appsinstalled, str) and appsinstalled == SENTINEL: break i += 1 if i % LOG_EVERY == 0: logging.info(f"{os.getpid()} - Insert {i}") memc_addr = device_memc.get(appsinstalled.dev_type) ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() try: if dry_run: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) else: queue_out.put((key, packed, memc_addr)) except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) errors.value += 1 processed.value += 1
def value_in_memcache_test(opts): sample = "idfa\t1rfw452y52g2gq4g\t55.55\t42.42\t1423,43,567,3,7,23\ngaid\t7rfw452y52g2gq4g\t55.55\t42.42\t7423,424" fn = "test.tsv.gz" with gzip.open(fn, mode="w") as f: f.write(sample.encode("utf8")) device_memc = { "idfa": opts.idfa, "gaid": opts.gaid, "adid": opts.adid, "dvid": opts.dvid, } opts.pattern = fn main(opts) for line in sample.splitlines(): dev_type, dev_id, lat, lon, raw_apps = line.strip().split("\t") apps = [int(a) for a in raw_apps.split(",") if a.isdigit()] lat, lon = float(lat), float(lon) ua = appsinstalled_pb2.UserApps() ua.lat = lat ua.lon = lon ua.apps.extend(apps) packed = ua.SerializeToString() memc = get_memcache(device_memc[dev_type]) value = memc.get(f"{dev_type}:{dev_id}") assert value == packed
def parse_value(line): appsinstalled = parse_appsinstalled(line) ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon ua.apps.extend(appsinstalled.apps) return ua.SerializeToString()
def insert_appsinstalled(memc_pool, memc_addr, appsinstalled, dry_run=False): """Writes installed applications to memcache""" ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() try: if dry_run: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) else: try: memc = memc_pool.get(timeout=0.1) except Queue.Empty: memc = memcache.Client([memc_addr], socket_timeout=3.0) ok = False for n in range(3): ok = memc.set(key, packed) if ok: break sleep(0.5) memc_pool.put(memc) except Exception, e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) return False
def listener(q, options): """Listen queue changes.""" start_time = time.time() while True: item = q.get() logging.info('LISTENER EXECUTION START {}'.format(start_time)) if item == TERMINATE: break memc_addr = item.get('memc_addr') appsinstalled = item.get('appsinstalled') dry = item.get('dry') ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() try: if not dry: memc = memcache.Client([memc_addr], socket_timeout=DEFAULT_TIMEOUT) memc.set(key, packed) diff = time.time() - start_time logging.info('LISTENER EXECUTION END {}'.format( timedelta(seconds=diff))) else: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) return False
def create_data_for_memcached(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) return {'key': key, 'ua': ua}
def serialize_data(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return packed, ua
def run(self): logging.debug('Processor started') buffered_lines = {} for k in self.queue_by_dev: buffered_lines[k] = {} # dict key->val for memcache while True: lines = self.input_queue.get() if self.input_queue.qsize() < 100: self.ev.set() else: self.ev.clear() # finished? if len(lines) == 0: err_rate = (float(self.errors) / self.processed) if self.processed > 0 else 0 if err_rate < NORMAL_ERR_RATE: logging.info( "Acceptable error rate (%s). Successfull load %s records" % (err_rate, self.processed)) else: logging.error("High error rate (%s > %s). Failed load" % (err_rate, NORMAL_ERR_RATE)) logging.debug('Processor finished') return # converting and sorting lines by dev for line in lines: line = line.strip() if not line: continue appsinstalled = parse_appsinstalled(line) if not appsinstalled: self.errors += 1 continue if appsinstalled.dev_type not in self.queue_by_dev: self.errors += 1 logging.error("Unknow device type: %s" % appsinstalled.dev_type) else: ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon ua.apps.extend(appsinstalled.apps) key = b"%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) packed = ua.SerializeToString() buffered_lines[appsinstalled.dev_type][key] = packed self.processed += 1 self.input_queue.task_done() # sending data to CacheWriters for dt, dd in buffered_lines.items(): if len(dd) > 0: self.queue_by_dev[dt].put(dict(dd)) dd.clear()
def get_appsinstalled_as_key_value(appsinstalled): key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return key, packed
def prepare_appsinstalled(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = '%s:%s' % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return (key, packed)
def serialize(self, appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return {'key': key, 'data': packed}
def get_packed(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return {key: packed}
def serialization_data(appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return Serialized_data(ua, key, packed)
def memc_serialyzer(self, appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "{}:{}".format(appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return key, packed
def _construct_protobuf(self, appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return (key, packed)
def serialised_line(line:str) -> tuple: '''function string serialization ''' ua = appsinstalled_pb2.UserApps() ua.lat = line.lat ua.lon = line.lon key = "%s:%s" % (line.dev_type, line.dev_id) ua.apps.extend(line.apps) packed = ua.SerializeToString() return key, packed
def serialize_installed_apps(apps_installed: AppsInstalled) -> Tuple[str, str]: user_apps = appsinstalled_pb2.UserApps() user_apps.lat = apps_installed.lat user_apps.lon = apps_installed.lon key = f"{apps_installed.dev_type}:{apps_installed.dev_id}" user_apps.apps.extend(apps_installed.apps) packed_user_apps = user_apps.SerializeToString() return key, packed_user_apps
def serialize_appsinstalled(appsinstalled): """ Serialize AppsInstalled object into protobuff """ ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() return key, packed
def apps_list_from_dict(self, chunk: List) -> Dict: chunk_dict = {} for app in chunk: key = "%s:%s" % (app.dev_type, app.dev_id) ua = appsinstalled_pb2.UserApps() ua.lat = app.lat ua.lon = app.lon ua.apps.extend(app.apps) value = ua.SerializeToString() chunk_dict[key] = value return chunk_dict
def bufferize_appsinstalled(memc_client, appsinstalled): ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() memc_clients_buff_size[memc_client] += len(packed) memc_clients_buff[memc_client].update({key: packed}) memc_clients_processed_apps[memc_client] += 1 return memc_clients_buff_size[memc_client]
def get_packed(recordline): dev_type, dev_id, lat, lon, raw_apps = recordline.strip().split( b"\t") apps = [int(a) for a in raw_apps.split(b",") if a.isdigit()] lat, lon = float(lat), float(lon) ua = appsinstalled_pb2.UserApps() ua.lat = lat ua.lon = lon ua.apps.extend(apps) packed = ua.SerializeToString() key = b'%s:%s' % (dev_type, dev_id) return dev_type, key, packed
def run(self): i = 0 while True: appsinstalled = self.in_queue.get() if isinstance(appsinstalled, str) and appsinstalled == SENTINEL: self.in_queue.task_done() logging.info(f"InsertAppsInstalledWorker {self.name} END") break i += 1 if i % 100000 == 0: logging.info(f"Insert {i} apps {self.name}") if not appsinstalled: self.errors += 1 self.in_queue.task_done() continue memc_addr = self.device_memc.get(appsinstalled.dev_type) if not memc_addr: logging.error("Unknow device type: %s" % appsinstalled.dev_type) self.errors += 1 self.in_queue.task_done() continue ua = appsinstalled_pb2.UserApps() ua.lat = appsinstalled.lat ua.lon = appsinstalled.lon key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id) ua.apps.extend(appsinstalled.apps) packed = ua.SerializeToString() # @TODO persistent connection # @TODO retry and timeouts! try: if self.dry_run: logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " "))) else: task = (key, packed, memc_addr) self.to_cache_queue.put(task) except Exception as e: logging.exception("Cannot write to memc %s: %s" % (memc_addr, e)) self.errors += 1 self.in_queue.task_done() continue self.processed += 1 self.in_queue.task_done()