Esempio n. 1
0
def prototest():
    sample = 'idfa\t1rfw452y52g2gq4g\t55.55\t42.42\t1423,43,567,3,7,23\ngaid\t7rfw452y52g2gq4g\t55.55\t42.42\t7423,424'
    for line in sample.splitlines():
        dev_type, dev_id, lat, lon, raw_apps = line.strip().split('\t')
        apps = [int(a) for a in raw_apps.split(',') if a.isdigit()]
        lat, lon = float(lat), float(lon)
        ua = appsinstalled_pb2.UserApps()
        ua.lat = lat
        ua.lon = lon
        ua.apps.extend(apps)
        packed = ua.SerializeToString()
        unpacked = appsinstalled_pb2.UserApps()
        unpacked.ParseFromString(packed)
        assert ua == unpacked
Esempio n. 2
0
def insert_appsinstalled(memc, appsinstalled, dry_run=False):
    attempts = 5
    delay = 0.2
    cur_attempt = 1
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    # @TODO persistent connection
    # @TODO retry and timeouts!
    try:
        if dry_run:
            logging.debug("%s - %s -> %s" %
                          (memc.servers[0], key, str(ua).replace("\n", " ")))
        else:
            result = memc.set(key, packed)
            while result == 0 and cur_attempt < attempts:
                time.sleep(delay)
                cur_attempt += 1
                result = memc.set(key, packed)
            return result != 0
    except Exception as e:
        logging.exception("Cannot write to memc %s: %s" % (memc.servers[0], e))
        return False
    return True
Esempio n. 3
0
def insert_appsinstalled(memc_pool, memc_addr, appsinstalled, dry_run=False):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = f'{appsinstalled.dev_type}:{appsinstalled.dev_id}'
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    try:
        if dry_run:
            logging.debug('{} - {} -> {}'.format(memc_addr, key,
                                                 str(ua).replace('\n', ' ')))
        else:
            try:
                memc = memc_pool.get(timeout=0.1)
            except Queue.Empty:
                memc = memcache.Client([memc_addr],
                                       socket_timeout=config['MEMC_TIMEOUT'])
            ok = False
            for n in range(config['MEMC_MAX_RETRIES']):
                ok = memc.set(key, packed)
                if ok:
                    break
                backoff_value = config['MEMC_BACKOFF_FACTOR'] * (2**n)
                time.sleep(backoff_value)
            memc_pool.put(memc)
            return ok
    except Exception as e:
        logging.exception(f'Cannot write to memc {memc_addr}: {e}')
        return False
    return True
Esempio n. 4
0
def insert_appsinstalled(memc_client, appsinstalled, dry_run=False):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()

    memc_addr = ', '.join(['{}:{}'.format(s.address[0], s.address[1]) for s in memc_client.servers])
    success = False
    try:
        if dry_run:
            logging.debug("[%s] %s -> %s" % (memc_addr, key, str(ua).replace("\n", " ")))
        else:
            for i in xrange(MEMCACHE_MAX_RETRIES):
                if i != 0:
                    time.sleep(MEMCACHE_RETRY_TIMEOUT)

                ok = memc_client.set(key, packed)
                if ok:
                    success = True
                    break

    except Exception, e:
        logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
def insert_appsinstalled_process(child_conn_to_packed, parent_conns, dry_run=False):
    i = 0
    next_conn = get_next_conn(parent_conns)
    while True:
        if child_conn_to_packed.poll(TIMEOUT):
            task = child_conn_to_packed.recv()


            if isinstance(task, str) and task == SENTINEL:
                logging.info(f"insert_appsinstalled_process {os.getpid()} END, {i} proceesed")
                break

            i += 1
            if i % 10000 == 0:
                logging.info(f"Insert {i} lines {os.getpid()}")

            appsinstalled, memc_addr = task
            ua = appsinstalled_pb2.UserApps()
            ua.lat = appsinstalled.lat
            ua.lon = appsinstalled.lon
            key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
            ua.apps.extend(appsinstalled.apps)
            packed = ua.SerializeToString()
            # @TODO persistent connection
            # @TODO retry and timeouts!
            try:
                if dry_run:
                    logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " ")))
                else:
                    task = (key, packed, memc_addr)
                    parent_conn_to_cache = next(next_conn)
                    parent_conn_to_cache.send(task)
            except Exception as e:
                logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
Esempio n. 6
0
 def insert_appsinstalled(memc,
                          appsinstalleds_group,
                          dry_run=False,
                          timeout=3,
                          retry_connection=3):
     retry_connection_ = retry_connection
     memc_addr = str(memc.servers[0])
     packeds = {}
     for appsinstalled in appsinstalleds_group:
         ua = appsinstalled_pb2.UserApps()
         ua.lat = appsinstalled.lat
         ua.lon = appsinstalled.lon
         key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
         ua.apps.extend(appsinstalled.apps)
         packed = ua.SerializeToString()
         packeds[key] = packed
     try:
         if dry_run:
             logging.debug("send nex group:")
             for key in packeds:
                 logging.debug("%s - %s" % (memc_addr, key))
         else:
             result = memc.set_multi(packeds)
             while not result and retry_connection_ > 0:
                 logging.info(
                     f"set failed. {retry_connection_} attempts left")
                 result = memc.set_multi(packeds)
                 retry_connection_ -= 1
             if not retry_connection_:
                 logging.info(f"set failed. for {memc_addr}")
             return result
     except Exception as e:
         logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
         return False
Esempio n. 7
0
def insert_appsinstalled(memc_addr, appsinstalled, dry_run=False):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    if dry_run:
        logging.debug("%s - %s -> %s" %
                      (memc_addr, key, str(ua).replace("\n", " ")))
        return True

    #retry and timeouts
    retry_counter = 1
    while True:
        try:
            mc[appsinstalled.dev_type].set(key, packed)
        except Exception as e:
            if retry_counter > 5:
                logging.exception("Cannot write to memc %s: %s" %
                                  (memc_addr, e))
                return False
            retry_counter += 1
            sleep(5)
        else:
            return True
Esempio n. 8
0
def buf_appsinstalled(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = '{}:{}'.format(appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    return key, ua
Esempio n. 9
0
def insert_appsinstalled(queue_in, queue_out, device_memc, processed, errors, dry_run=False):
    i = 0
    while True:
        appsinstalled = queue_in.get()
        if isinstance(appsinstalled, str) and appsinstalled == SENTINEL:
            break

        i += 1
        if i % LOG_EVERY == 0:
            logging.info(f"{os.getpid()} - Insert {i}")

        memc_addr = device_memc.get(appsinstalled.dev_type)

        ua = appsinstalled_pb2.UserApps()
        ua.lat = appsinstalled.lat
        ua.lon = appsinstalled.lon
        key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
        ua.apps.extend(appsinstalled.apps)
        packed = ua.SerializeToString()
        try:
            if dry_run:
                logging.debug("%s - %s -> %s" % (memc_addr, key, str(ua).replace("\n", " ")))
            else:
                queue_out.put((key, packed, memc_addr))
        except Exception as e:
            logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
            errors.value += 1
        processed.value += 1
Esempio n. 10
0
def value_in_memcache_test(opts):
    sample = "idfa\t1rfw452y52g2gq4g\t55.55\t42.42\t1423,43,567,3,7,23\ngaid\t7rfw452y52g2gq4g\t55.55\t42.42\t7423,424"
    fn = "test.tsv.gz"
    with gzip.open(fn, mode="w") as f:
        f.write(sample.encode("utf8"))

    device_memc = {
        "idfa": opts.idfa,
        "gaid": opts.gaid,
        "adid": opts.adid,
        "dvid": opts.dvid,
    }

    opts.pattern = fn
    main(opts)

    for line in sample.splitlines():
        dev_type, dev_id, lat, lon, raw_apps = line.strip().split("\t")
        apps = [int(a) for a in raw_apps.split(",") if a.isdigit()]
        lat, lon = float(lat), float(lon)
        ua = appsinstalled_pb2.UserApps()
        ua.lat = lat
        ua.lon = lon
        ua.apps.extend(apps)
        packed = ua.SerializeToString()
        memc = get_memcache(device_memc[dev_type])
        value = memc.get(f"{dev_type}:{dev_id}")
        assert value == packed
Esempio n. 11
0
 def parse_value(line):
     appsinstalled = parse_appsinstalled(line)
     ua = appsinstalled_pb2.UserApps()
     ua.lat = appsinstalled.lat
     ua.lon = appsinstalled.lon
     ua.apps.extend(appsinstalled.apps)
     return ua.SerializeToString()
Esempio n. 12
0
def insert_appsinstalled(memc_pool, memc_addr, appsinstalled, dry_run=False):
    """Writes installed applications to memcache"""
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    try:
        if dry_run:
            logging.debug("%s - %s -> %s" %
                          (memc_addr, key, str(ua).replace("\n", " ")))
        else:
            try:
                memc = memc_pool.get(timeout=0.1)
            except Queue.Empty:
                memc = memcache.Client([memc_addr], socket_timeout=3.0)
            ok = False
            for n in range(3):
                ok = memc.set(key, packed)
                if ok:
                    break
                sleep(0.5)
            memc_pool.put(memc)
    except Exception, e:
        logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
        return False
Esempio n. 13
0
def listener(q, options):
    """Listen queue changes."""
    start_time = time.time()

    while True:
        item = q.get()
        logging.info('LISTENER EXECUTION START {}'.format(start_time))
        if item == TERMINATE:
            break
        memc_addr = item.get('memc_addr')
        appsinstalled = item.get('appsinstalled')
        dry = item.get('dry')
        ua = appsinstalled_pb2.UserApps()
        ua.lat = appsinstalled.lat
        ua.lon = appsinstalled.lon
        key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
        ua.apps.extend(appsinstalled.apps)
        packed = ua.SerializeToString()

        try:
            if not dry:
                memc = memcache.Client([memc_addr],
                                       socket_timeout=DEFAULT_TIMEOUT)
                memc.set(key, packed)
                diff = time.time() - start_time
                logging.info('LISTENER EXECUTION END {}'.format(
                    timedelta(seconds=diff)))
            else:
                logging.debug("%s - %s -> %s" %
                              (memc_addr, key, str(ua).replace("\n", " ")))
        except Exception as e:
            logging.exception("Cannot write to memc %s: %s" % (memc_addr, e))
            return False
def create_data_for_memcached(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    return {'key': key, 'ua': ua}
Esempio n. 15
0
def serialize_data(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return packed, ua
Esempio n. 16
0
    def run(self):
        logging.debug('Processor started')
        buffered_lines = {}
        for k in self.queue_by_dev:
            buffered_lines[k] = {}  # dict key->val for memcache

        while True:
            lines = self.input_queue.get()
            if self.input_queue.qsize() < 100:
                self.ev.set()
            else:
                self.ev.clear()

            # finished?
            if len(lines) == 0:
                err_rate = (float(self.errors) /
                            self.processed) if self.processed > 0 else 0
                if err_rate < NORMAL_ERR_RATE:
                    logging.info(
                        "Acceptable error rate (%s). Successfull load %s records"
                        % (err_rate, self.processed))
                else:
                    logging.error("High error rate (%s > %s). Failed load" %
                                  (err_rate, NORMAL_ERR_RATE))
                logging.debug('Processor finished')
                return

            # converting and sorting lines by dev
            for line in lines:
                line = line.strip()
                if not line:
                    continue
                appsinstalled = parse_appsinstalled(line)
                if not appsinstalled:
                    self.errors += 1
                    continue

                if appsinstalled.dev_type not in self.queue_by_dev:
                    self.errors += 1
                    logging.error("Unknow device type: %s" %
                                  appsinstalled.dev_type)
                else:
                    ua = appsinstalled_pb2.UserApps()
                    ua.lat = appsinstalled.lat
                    ua.lon = appsinstalled.lon
                    ua.apps.extend(appsinstalled.apps)
                    key = b"%s:%s" % (appsinstalled.dev_type,
                                      appsinstalled.dev_id)
                    packed = ua.SerializeToString()
                    buffered_lines[appsinstalled.dev_type][key] = packed
                    self.processed += 1

            self.input_queue.task_done()

            # sending data to CacheWriters
            for dt, dd in buffered_lines.items():
                if len(dd) > 0:
                    self.queue_by_dev[dt].put(dict(dd))
                    dd.clear()
Esempio n. 17
0
def get_appsinstalled_as_key_value(appsinstalled):
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return key, packed
Esempio n. 18
0
def prepare_appsinstalled(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = '%s:%s' % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return (key, packed)
Esempio n. 19
0
 def serialize(self, appsinstalled):
     ua = appsinstalled_pb2.UserApps()
     ua.lat = appsinstalled.lat
     ua.lon = appsinstalled.lon
     key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
     ua.apps.extend(appsinstalled.apps)
     packed = ua.SerializeToString()
     return {'key': key, 'data': packed}
Esempio n. 20
0
def get_packed(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return {key: packed}
Esempio n. 21
0
def serialization_data(appsinstalled):
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return Serialized_data(ua, key, packed)
Esempio n. 22
0
 def memc_serialyzer(self, appsinstalled):
     ua = appsinstalled_pb2.UserApps()
     ua.lat = appsinstalled.lat
     ua.lon = appsinstalled.lon
     key = "{}:{}".format(appsinstalled.dev_type, appsinstalled.dev_id)
     ua.apps.extend(appsinstalled.apps)
     packed = ua.SerializeToString()
     return key, packed
Esempio n. 23
0
 def _construct_protobuf(self, appsinstalled):
     ua = appsinstalled_pb2.UserApps()
     ua.lat = appsinstalled.lat
     ua.lon = appsinstalled.lon
     key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
     ua.apps.extend(appsinstalled.apps)
     packed = ua.SerializeToString()
     return (key, packed)
Esempio n. 24
0
def serialised_line(line:str) -> tuple:
    '''function string serialization
    '''
    ua = appsinstalled_pb2.UserApps()
    ua.lat = line.lat
    ua.lon = line.lon
    key = "%s:%s" % (line.dev_type, line.dev_id)
    ua.apps.extend(line.apps)
    packed = ua.SerializeToString()
    return key, packed                
Esempio n. 25
0
def serialize_installed_apps(apps_installed: AppsInstalled) -> Tuple[str, str]:

    user_apps = appsinstalled_pb2.UserApps()
    user_apps.lat = apps_installed.lat
    user_apps.lon = apps_installed.lon
    key = f"{apps_installed.dev_type}:{apps_installed.dev_id}"
    user_apps.apps.extend(apps_installed.apps)
    packed_user_apps = user_apps.SerializeToString()

    return key, packed_user_apps
Esempio n. 26
0
def serialize_appsinstalled(appsinstalled):
    """
    Serialize AppsInstalled object into protobuff
    """
    ua = appsinstalled_pb2.UserApps()
    ua.lat = appsinstalled.lat
    ua.lon = appsinstalled.lon
    key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
    ua.apps.extend(appsinstalled.apps)
    packed = ua.SerializeToString()
    return key, packed
Esempio n. 27
0
 def apps_list_from_dict(self, chunk: List) -> Dict:
     chunk_dict = {}
     for app in chunk:
         key = "%s:%s" % (app.dev_type, app.dev_id)
         ua = appsinstalled_pb2.UserApps()
         ua.lat = app.lat
         ua.lon = app.lon
         ua.apps.extend(app.apps)
         value = ua.SerializeToString()
         chunk_dict[key] = value
     return chunk_dict
Esempio n. 28
0
 def bufferize_appsinstalled(memc_client, appsinstalled):
     ua = appsinstalled_pb2.UserApps()
     ua.lat = appsinstalled.lat
     ua.lon = appsinstalled.lon
     key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
     ua.apps.extend(appsinstalled.apps)
     packed = ua.SerializeToString()
     memc_clients_buff_size[memc_client] += len(packed)
     memc_clients_buff[memc_client].update({key: packed})
     memc_clients_processed_apps[memc_client] += 1
     return memc_clients_buff_size[memc_client]
Esempio n. 29
0
 def get_packed(recordline):
     dev_type, dev_id, lat, lon, raw_apps = recordline.strip().split(
         b"\t")
     apps = [int(a) for a in raw_apps.split(b",") if a.isdigit()]
     lat, lon = float(lat), float(lon)
     ua = appsinstalled_pb2.UserApps()
     ua.lat = lat
     ua.lon = lon
     ua.apps.extend(apps)
     packed = ua.SerializeToString()
     key = b'%s:%s' % (dev_type, dev_id)
     return dev_type, key, packed
    def run(self):
        i = 0
        while True:
            appsinstalled = self.in_queue.get()

            if isinstance(appsinstalled, str) and appsinstalled == SENTINEL:
                self.in_queue.task_done()
                logging.info(f"InsertAppsInstalledWorker {self.name} END")
                break
            i += 1
            if i % 100000 == 0:
                logging.info(f"Insert {i} apps {self.name}")

            if not appsinstalled:
                self.errors += 1
                self.in_queue.task_done()
                continue

            memc_addr = self.device_memc.get(appsinstalled.dev_type)
            if not memc_addr:
                logging.error("Unknow device type: %s" %
                              appsinstalled.dev_type)
                self.errors += 1
                self.in_queue.task_done()
                continue

            ua = appsinstalled_pb2.UserApps()
            ua.lat = appsinstalled.lat
            ua.lon = appsinstalled.lon
            key = "%s:%s" % (appsinstalled.dev_type, appsinstalled.dev_id)
            ua.apps.extend(appsinstalled.apps)
            packed = ua.SerializeToString()
            # @TODO persistent connection
            # @TODO retry and timeouts!
            try:
                if self.dry_run:
                    logging.debug("%s - %s -> %s" %
                                  (memc_addr, key, str(ua).replace("\n", " ")))
                else:
                    task = (key, packed, memc_addr)
                    self.to_cache_queue.put(task)
            except Exception as e:
                logging.exception("Cannot write to memc %s: %s" %
                                  (memc_addr, e))
                self.errors += 1
                self.in_queue.task_done()
                continue

            self.processed += 1
            self.in_queue.task_done()