Beispiel #1
0
 def _run(topdir, name, seed, device, command):
   """ Run the attack experiments with the given named parameters.
   Args:
     topdir  Parent result directory
     name    Experiment unique name
     seed    Experiment seed
     device  Device on which to run the experiments
     command Command to run
   """
   # Add seed to name
   name = "%s-%d" % (name, seed)
   # Process experiment
   with tools.Context(name, "info"):
     finaldir = topdir / name
     # Check whether the experiment was already successful
     if finaldir.exists():
       tools.info("Experiment already processed.")
       return
     # Move-make the pending result directory
     resdir = move_directory(topdir / f"{name}.pending")
     resdir.mkdir(mode=0o755, parents=True)
     # Build the command
     args = command.build(seed, device, resdir)
     # Launch the experiment and write the standard output/error
     tools.trace((" ").join(shlex.quote(arg) for arg in args))
     cmd_res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
     if cmd_res.returncode == 0:
       tools.info("Experiment successful")
     else:
       tools.warning("Experiment failed")
       finaldir = topdir / f"{name}.failed"
       move_directory(finaldir)
     resdir.rename(finaldir)
     (finaldir / "stdout.log").write_bytes(cmd_res.stdout)
     (finaldir / "stderr.log").write_bytes(cmd_res.stderr)
Beispiel #2
0
def attach_and_clean_iris(db, filename):
    info('Attaching French IRIS to their region')
    processed = 0
    for zone in db.find({'level': iris.id}):
        candidates_ids = [p for p in zone['parents'] if p.startswith(town.id)]
        if len(candidates_ids) < 1:
            warning('No parent candidate found for: {0}'.format(zone['_id']))
            continue
        town_id = candidates_ids[0]
        town_zone = db.find_one({'_id': town_id})
        if not town_zone:
            warning('Town {0} not found'.format(town_id))
            continue
        if zone.get('_type') == 'Z':
            name = town_zone['name']
        else:
            name = ''.join((town_zone['name'], ' (', zone['name'], ')'))
        ops = {
            '$addToSet': {'parents': {'$each': town_zone['parents']}},
            '$set': {'name': name},
            '$unset': {'_town': 1, '_type': 1}
        }
        if db.find_one_and_update({'_id': zone['_id']}, ops):
            processed += 1
    success('Attached {0} french IRIS to their parents'.format(processed))
Beispiel #3
0
    def process_dataset(self, workdir, db, url, extractor):
        '''Extract territories from a given file for a given level with a given extractor function'''
        loaded = 0
        filename = join(workdir, basename(url))

        with fiona.open('/', vfs='zip://{0}'.format(filename), encoding='utf8') as collection:
            info('Extracting {0} elements from {1} ({2} {3})'.format(
                len(collection), basename(filename), collection.driver, to_string(collection.crs)
            ))

            for polygon in collection:
                try:
                    zone = extractor(polygon)
                    if not zone:
                        continue
                    zone['keys'] = dict((k, v) for k, v in zone.get('keys', {}).items() if v is not None)
                    geom = shape(polygon['geometry'])
                    if extractor.simplify:
                        geom = geom.simplify(extractor.simplify)
                    if geom.geom_type == 'Polygon':
                        geom = MultiPolygon([geom])
                    elif geom.geom_type != 'MultiPolygon':
                        warning('Unsupported geometry type "{0}" for "{1}"'.format(geom.geom_type, zone['name']))
                        continue
                    zoneid = '/'.join((self.id, zone['code']))
                    zone.update(_id=zoneid, level=self.id, geom=geom.__geo_interface__)
                    db.find_one_and_replace({'_id': zoneid}, zone, upsert=True)
                    loaded += 1
                except Exception as e:
                    error('Error extracting polygon {0}: {1}', polygon['properties'], str(e))

        info('Loaded {0} zones for level {1} from file {2}'.format(loaded, self.id, filename))
        return loaded
Beispiel #4
0
 def build_aggregates(self, db):
     processed = 0
     for code, name, zones, properties in self.aggregates:
         info('Building aggregate "{0}" (level={1}, code={2})'.format(name, self.id, code))
         zone = self.build_aggregate(code, name, zones, properties, db)
         db.find_one_and_replace({'_id': zone['_id']}, zone, upsert=True)
         processed += 1
     return processed
Beispiel #5
0
def process_insee_cog(db, filename):
    '''Use informations from INSEE COG to attach parents.
    http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement.asp
    '''
    info('Processing INSEE COG')
    processed = 0
    counties = {}
    districts = {}
    with ZipFile(filename) as cogzip:
        with cogzip.open('comsimp2016.txt') as tsvfile:
            tsvio = io.TextIOWrapper(tsvfile, encoding='cp1252')
            reader = csv.DictReader(tsvio, delimiter='\t')
            for row in reader:
                # Lower everything, from 2B to 2b for instance.
                region_code = row['REG'].lower()
                county_code = row['DEP'].lower()
                district_code = row['AR'].lower()
                town_code = row['COM'].lower()
                insee_code = ''.join((county_code, town_code))

                region_id = 'fr/region/{0}'.format(region_code)
                county_id = 'fr/county/{0}'.format(county_code)

                parents = [region_id, county_id]

                if county_id not in counties:
                    counties[county_id] = region_id

                if district_code:
                    district_code = ''.join((county_code, district_code))
                    district_id = 'fr/district/{0}'.format(district_code)
                    parents.append(district_id)
                    if district_id not in districts:
                        districts[district_id] = [region_id, county_id]

                if db.find_one_and_update(
                        {'level': town.id, 'code': insee_code},
                        {'$addToSet': {'parents': {'$each': parents}}}):
                    processed += 1
    success('Attached {0} french towns to their parents'.format(processed))

    processed = 0
    for district_id, parents in districts.items():
        if db.find_one_and_update(
                {'_id': district_id},
                {'$addToSet': {
                    'parents': {'$each': parents},
                }}):
            processed += 1
    success('Attached {0} french districts to their parents'.format(processed))

    processed = 0
    for county_id, parent in counties.items():
        if db.find_one_and_update(
                {'_id': county_id},
                {'$addToSet': {'parents': parent}}):
            processed += 1
    success('Attached {0} french counties to their parents'.format(processed))
Beispiel #6
0
 def build_aggregates(self, db):
     processed = 0
     for code, name, zones, properties in self.aggregates:
         info('Building aggregate "{0}" (level={1}, code={2})'.format(
             name, self.id, code))
         zone = self.build_aggregate(code, name, zones, properties, db)
         db.find_one_and_replace({'_id': zone['_id']}, zone, upsert=True)
         processed += 1
     return processed
def extract_features_old(X_list, model, len_sample=5, sr=SAMPLING_RATE):
    X = get_fixed_array(X_list, len_sample=len_sample, sr=sr)
    info("note: exft get fix done.")
    X = model.predict(X)
    info("note: exft model predict done.")
    X = X.transpose(0, 2, 1, 3)
    # squeeze.
    X = np.squeeze(X)
    # info("note: exft model transpose and squeeze done.")
    return X
Beispiel #8
0
    def process_dataset(self, workdir, db, url, extractor):
        '''Extract territories from a given file for a given level with a given extractor function'''
        loaded = 0
        filename = join(workdir, basename(url))

        # Identify the shapefile to avoid multiple file error on GDAL 2

        with ZipFile(filename) as z:
            candidates = [n for n in z.namelist() if n.endswith('.shp')]
            if len(candidates) != 1:
                raise ValueError(
                    'Unable to find a unique shpaefile into {0}'.format(
                        filename))
            shp = candidates[0]

        with fiona.open('/{0}'.format(shp),
                        vfs='zip://{0}'.format(filename),
                        encoding='utf8') as collection:
            info('Extracting {0} elements from {1} ({2} {3})'.format(
                len(collection), basename(filename), collection.driver,
                to_string(collection.crs)))

            for polygon in collection:
                try:
                    zone = extractor(polygon)
                    if not zone:
                        continue
                    zone['keys'] = dict(
                        (k, v) for k, v in zone.get('keys', {}).items()
                        if v is not None)
                    geom = shape(polygon['geometry'])
                    if extractor.simplify:
                        geom = geom.simplify(extractor.simplify)
                    if geom.geom_type == 'Polygon':
                        geom = MultiPolygon([geom])
                    elif geom.geom_type != 'MultiPolygon':
                        warning(
                            'Unsupported geometry type "{0}" for "{1}"'.format(
                                geom.geom_type, zone['name']))
                        continue
                    zoneid = '/'.join((self.id, zone['code']))
                    zone.update(_id=zoneid,
                                level=self.id,
                                geom=geom.__geo_interface__)
                    db.find_one_and_replace({'_id': zoneid}, zone, upsert=True)
                    loaded += 1
                except Exception as e:
                    error('Error extracting polygon {0}: {1}',
                          polygon['properties'], str(e))

        info('Loaded {0} zones for level {1} from file {2}'.format(
            loaded, self.id, filename))
        return loaded
    def _run(name, seed, device, params):
        """ Run the attack experiments with the given named parameters.
    Args:
      name   Experiment unique name
      seed   Experiment seed
      device Device on which to run the experiments
      params Named parameters
    """
        # Add seed to name
        name = "%s-%d" % (name, seed)
        # Process experiment
        with tools.Context(name, "info"):
            # Build and set the result directory
            result_dir = args.data_directory / name
            if result_dir.exists():
                tools.info("Experiment already processed.")
                return
            result_dir.mkdir(mode=0o755, parents=True)
            # Add the missing options
            params["seed"] = str(seed)
            params["device"] = device
            params["result-directory"] = str(result_dir)

            # Launch the experiment and write the standard output/error
            def is_multi_param(param):
                return any(isinstance(param, typ) for typ in (list, tuple))

            def param_to_str(param):
                if is_multi_param(param):
                    return (" ").join(shlex.quote(str(val)) for val in param)
                return shlex.quote(str(param))

            tools.trace("python3 -OO attack.py %s" %
                        (" ").join("--%s %s" % (key, param_to_str(val))
                                   for key, val in params.items()))
            command = ["python3", "-OO", "attack.py"]
            for key, val in params.items():
                command.append("--%s" % (key, ))
                if is_multi_param(val):
                    for subval in val:
                        command.append(str(subval))
                else:
                    command.append(str(val))
            cmd_res = subprocess.run(command,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
            if cmd_res.returncode == 0:
                tools.info("Experiment successful")
            else:
                tools.warning("Experiment failed")
            (result_dir / "stdout.log").write_bytes(cmd_res.stdout)
            (result_dir / "stderr.log").write_bytes(cmd_res.stderr)
Beispiel #10
0
def add_ue_to_parents(db, filename):
    info('Adding European Union to countries parents')
    result = db.update_many(
        {
            'level': country.id,
            'code': {
                '$in': UE_COUNTRIES
            }
        }, {'$addToSet': {
            'parents': 'country-group/ue'
        }})
    success('Added European Union as parent to {0} countries'.format(
        result.modified_count))
def extract_features(X_list,
                     len_sample=5,
                     sr=SAMPLING_RATE,
                     use_power_db=False):
    X = get_fixed_array(X_list, len_sample=len_sample, sr=sr)
    info("note: exft get fix done.")
    # X = model.predict(X)
    X = kapre_extractor.predict(X)
    info(
        "note: exft model kapre_extractor predict done, kapre predict shape={}"
        .format(X.shape))

    # basic: (147, 30, 125, 1) to (147, 125, 30, 1) to (147, 125, 30)
    # X = X.transpose(0, 2, 1, 3)
    # X = np.squeeze(X)

    # basic + powertodb. squeeze->powertodb->transpose
    # squeeze.
    X = np.squeeze(X)
    info("note: exft model transpose and squeeze done, shape={}".format(
        X.shape))
    if use_power_db:
        X = np.asarray([librosa.power_to_db(r) for r in X])
        info("note: exft model kapre_extractor power_to_db done.")

    X = X.transpose(0, 2, 1)
    # info("note: X transpose shape={}".format(X.shape))
    return X
def lr_preprocess_update(x):
    x = [sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x]
    info(
        "note: lr_preprocess update, x type={}, len={}, x[0] shape={}, ele_type={}, value={}, x[-1] shape={}"
        .format(type(x), len(x), x[0].shape, type(x[0][0]), x[0][0],
                x[-1].shape))
    t1 = time.time()
    if FEET_MODE == "LIBROSA":
        x_mel = extract_melspectrogram_parallel(x,
                                                n_mels=30,
                                                use_power_db=True)
    elif FEET_MODE == "KAPRE":
        # x_mel = extract_features(x, model=kapre_extractor)
        x_mel = extract_features(x)

    # x_contrast = extract_bandwidth_parallel(x)
    t2 = time.time()
    info_log = list()
    x_feas = []
    for i in range(len(x_mel)):
        mel = np.mean(x_mel[i], axis=0).reshape(-1)
        mel_std = np.std(x_mel[i], axis=0).reshape(-1)
        # contrast = np.mean(x_contrast[i], axis=0).reshape(-1)
        # contrast_std = np.std(x_contrast[i], axis=0).reshape(-1)
        # contrast, contrast_std
        fea_item = np.concatenate([mel, mel_std], axis=-1)
        x_feas.append(fea_item)
        if i < 1:
            info_log.append("i={}, x_mel type={}, shape={}".format(
                i, type(x_mel[i]), x_mel[i].shape))
            info_log.append("i={}, mel type={}, shape={}".format(
                i, type(mel), mel.shape))
            info_log.append("i={}, mel_std type={}, shape={}".format(
                i, type(mel_std), mel_std.shape))
            info_log.append("i={}, fea_item type={}, shape={}".format(
                i, type(fea_item), fea_item.shape))

    x_feas = np.asarray(x_feas)
    scaler = StandardScaler()
    X = scaler.fit_transform(x_feas[:, :])

    info_log.append(
        "FEET_MODE = {}, x_mel type={}, shape={}, cost_time={}s".format(
            FEET_MODE, type(x_mel), x_mel.shape, round(t2 - t1, 3)))
    info_log.append("x_feas type={}, shape={}".format(type(x_feas),
                                                      x_feas.shape))
    info_log.append("X type={}, shape={}".format(type(X), X.shape))
    info(json.dumps(info_log, indent=4))
    return X
Beispiel #13
0
def process_postal_codes(db, filename):
    '''
    Extract postal codes from https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/
    '''
    info('Processing french postal codes')
    processed = 0
    with open(filename, encoding='cp1252') as csvfile:
        reader = csv.reader(csvfile, delimiter=';')
        # skip header
        next(reader, None)
        for insee, _, postal, _, _ in reader:
            ops = {'$addToSet': {'keys.postal': postal}}
            if db.find_one_and_update({'level': town.id, 'code': insee}, ops):
                processed += 1
    success('Processed {0} french postal codes'.format(processed))
Beispiel #14
0
def compute_district_population(db, filename):
    info('Computing french district population by aggregation')
    processed = 0
    pipeline = [
        {'$match': {'level': town.id}},
        {'$unwind': '$parents'},
        {'$match': {'parents': {'$regex': district.id}}},
        {'$group': {'_id': '$parents', 'population': {'$sum': '$population'}}}
    ]
    for result in db.aggregate(pipeline):
        if result.get('population'):
            if db.find_one_and_update(
                    {'_id': result['_id']},
                    {'$set': {'population': result['population']}}):
                processed += 1
    success('Computed population for {0} french districts'.format(processed))
Beispiel #15
0
def summary_thread(coord, mngr, sess, path, rstrd):
    """ Summary thread entry point.
  Args:
    coord Coordinator to use
    mngr  Graph manager to use
    sess  Session to use
    path  Path to the manager to use
    rstrd Whether the model was just restored from a checkpoint
  """
    global args
    delta = args.summary_delta
    period = args.summary_period
    if delta < 0 and period < 0:  # Effectively disabled
        tools.info("Summary saving is effectively disabled")
        return
    if mngr.summary_tn is None:
        tools.warning("No summary to save")
        return
    if rstrd:
        last_step = sess.run(mngr.step)
        last_time = time.time()
    else:
        last_step = -delta
        last_time = -period
    # Save summaries
    with mngr.graph.as_default():
        with tf.summary.FileWriter(args.summary_dir,
                                   graph=mngr.graph) as writer:
            writer.add_session_log(tf.SessionLog(status=tf.SessionLog.START),
                                   sess.run(mngr.step))
            while True:
                time.sleep(config.thread_idle_delay)
                step = sess.run(mngr.step)
                now = time.time()
                stop = coord.should_stop()
                if stop or (delta >= 0 and step - last_step >= delta) or (
                        period >= 0. and now - last_time >= period):
                    writer.add_summary(sess.run(mngr.summary_tn), step)
                    tools.info("Summaries saved (took " +
                               repr(time.time() - now) + " s)")
                    last_step = sess.run(mngr.step)
                    last_time = time.time()
                    if stop:
                        break
            writer.add_session_log(tf.SessionLog(status=tf.SessionLog.STOP),
                                   step)
Beispiel #16
0
def attach_canton_parents(db, filename):
    info('Attaching French Canton to their parents')
    canton_processed = 0
    for zone in db.find({'level': canton.id}):
        candidates_ids = [p for p in zone['parents'] if p.startswith(county.id)]
        if len(candidates_ids) < 1:
            warning('No parent candidate found for: {0}'.format(zone['_id']))
            continue
        county_id = candidates_ids[0]
        county_zone = db.find_one({'_id': county_id})
        ops = {
            '$addToSet': {'parents': {'$each': county_zone['parents']}},
            '$unset': {'_dep': 1}
        }
        if db.find_one_and_update({'_id': zone['_id']}, ops):
            canton_processed += 1

    success('Attached {0} french cantons to their parents'.format(canton_processed))
Beispiel #17
0
def attach_epci(db, filename):
    '''
    Attach EPCI towns to their EPCI from http://www.collectivites-locales.gouv.fr/liste-et-composition-2015
    '''
    info('Processing EPCI town list')
    processed = 0
    # epci_region = {}
    with open(filename, encoding='cp1252') as csvfile:
        reader = csv.DictReader(csvfile, delimiter=';')
        for row in reader:
            siren = row['siren_epci']
            insee = row['insee'].lower()
            # region = row['region']
            # epci_region[siren] = region
            epci_id = 'fr/epci/{0}'.format(siren)
            if db.find_one_and_update(
                    {'level': town.id, 'code': insee},
                    {'$addToSet': {'parents': epci_id}}):
                processed += 1
    success('Attached {0} french town to their EPCI'.format(processed))
Beispiel #18
0
def town_with_districts(db, filename):
    info('Attaching Paris town districts')
    paris = db.find_one({'_id': 'fr/town/75056'})
    parents = paris['parents']
    parents.append(paris['_id'])
    result = db.update_many(
        {'_id': {'$in': PARIS_DISTRICTS}},
        {'$addToSet': {'parents': {'$each': parents}}})
    success('Attached {0} districts to Paris'.format(result.modified_count))

    info('Attaching Marseille town districts')
    marseille = db.find_one({'_id': 'fr/town/13055'})
    parents = marseille['parents']
    parents.append(marseille['_id'])
    result = db.update_many(
        {'_id': {'$in': MARSEILLE_DISTRICTS}},
        {'$addToSet': {'parents': {'$each': parents}}})
    success('Attached {0} districts to Marseille'.format(result.modified_count))

    info('Attaching Lyon town districts')
    lyon = db.find_one({'_id': 'fr/town/69123'})
    parents = lyon['parents']
    parents.append(lyon['_id'])
    result = db.update_many(
        {'_id': {'$in': LYON_DISTRICTS}},
        {'$addToSet': {'parents': {'$each': parents}}})
    success('Attached {0} districts to Lyon'.format(result.modified_count))
Beispiel #19
0
def compute_county_area_and_population(db, filename):
    info('Computing french counties areas and population by aggregation')
    processed = 0
    pipeline = [
        {'$match': {'level': town.id}},
        {'$unwind': '$parents'},
        {'$match': {'parents': {'$regex': county.id}}},
        {'$group': {
            '_id': '$parents',
            'area': {'$sum': '$area'},
            'population': {'$sum': '$population'}
        }}
    ]
    for result in db.aggregate(pipeline):
        if db.find_one_and_update(
                {'_id': result['_id']},
                {'$set': {
                    'area': result['area'],
                    'population': result['population']
                }}):
            processed += 1
    success('Computed area and population for {0} french counties'.format(processed))
Beispiel #20
0
def checkpoint_thread(coord, mngr, sess, chck, rstrd):
    """ Checkpoint thread entry point.
  Args:
    coord Coordinator to use
    mngr  Graph manager to use
    sess  Session to use
    chck  Checkpoint manager to use
    rstrd Whether the model was just restored from a checkpoint
  """
    # Check arguments
    global args
    delta = args.checkpoint_delta
    period = args.checkpoint_period
    if delta < 0 and period < 0:  # Effectively disabled
        tools.info("Checkpoint saving is effectively disabled")
        return
    if rstrd:
        last_step = sess.run(mngr.step)
        last_time = time.time()
    else:
        last_step = -delta
        last_time = -period
    # Save checkpoints
    with mngr.graph.as_default():
        while True:
            time.sleep(config.thread_idle_delay)
            step = sess.run(mngr.step)
            now = time.time()
            stop = coord.should_stop()
            if stop or (delta >= 0 and step - last_step >= delta) or (
                    period >= 0. and now - last_time >= period):
                chck.save(sess, step)
                tools.info("Checkpoint saved (took " +
                           repr(time.time() - now) + " s)")
                last_step = sess.run(mngr.step)
                last_time = time.time()
                if stop:
                    break
Beispiel #21
0
def fetch_missing_data_from_dbpedia(db, filename):
    info('Fetching DBPedia data')
    processed = 0
    for zone in db.find({
            'wikipedia': {'$exists': True, '$ne': None},
            '$or': [
                {'population': None},
                {'population': {'$exists': False}},
                {'area': None},
                {'area': {'$exists': False}},
            ]
            }, no_cursor_timeout=True):

        dbpedia = DBPedia(zone['wikipedia'])
        metadata = {
            'dbpedia': dbpedia.resource_url,
        }
        metadata.update(dbpedia.fetch_population_or_area())
        metadata.update(dbpedia.fetch_flag_or_blazon())
        if db.find_one_and_update({'_id': zone['_id']},
                                  {'$set': metadata}):
            processed += 1
    success('Fetched DBPedia data for {0} zones'.format(processed))
Beispiel #22
0
def download(ctx):
    '''Download sources datasets'''
    title('Downloading required datasets')
    if not exists(DL_DIR):
        os.makedirs(DL_DIR)

    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        info('Dealing with {0}'.format(url))
        filename, size = extract_meta_from_headers(url)
        info('Downloading {0}'.format(filename))
        with click.progressbar(length=size) as bar:
            def reporthook(blocknum, blocksize, totalsize):
                read = blocknum * blocksize
                if read <= 0:
                    return
                if read > totalsize:
                    bar.update(size)
                else:
                    bar.update(read)

            urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)
Beispiel #23
0
def download(ctx):
    '''Download sources datasets'''
    title('Downloading required datasets')
    if not exists(DL_DIR):
        os.makedirs(DL_DIR)

    urls = (level.urls for level in ctx.obj['levels'] if level.urls)
    urls = set([url for lst in urls for url in lst])
    for url in urls:
        info('Dealing with {0}'.format(url))
        filename, size = extract_meta_from_headers(url)
        info('Downloading {0}'.format(filename))
        with click.progressbar(length=size) as bar:

            def reporthook(blocknum, blocksize, totalsize):
                read = blocknum * blocksize
                if read <= 0:
                    return
                if read > totalsize:
                    bar.update(size)
                else:
                    bar.update(read)

            urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)
def extract_for_one_sample(tuple, extract, use_power_db=False, **kwargs):
    data, idx = tuple
    r = extract(data, **kwargs)
    info("note: feee=librosa, extract r shape={}".format(r.shape))
    # for melspectrogram
    if use_power_db:
        r = librosa.power_to_db(r)

    info("note: feee=librosa, after power_to_db r shape={}".format(r.shape))
    r = r.transpose()
    info("note: feee=librosa, after transpose r shape={}".format(r.shape))
    return r, idx
Beispiel #25
0
def load(ctx, drop):
    '''Load zones from a folder of zip files containing shapefiles'''
    title('Extracting zones from datasets')
    zones = DB()

    if drop:
        info('Drop existing collection')
        zones.drop()

    with ok('Creating index (level,code)'):
        zones.create_index([('level', ASCENDING), ('code', ASCENDING)])
    info('Creating index (level,keys)')
    zones.create_index([('level', ASCENDING), ('keys', ASCENDING)])
    info('Creating index (parents)')
    zones.create_index('parents')

    total = 0

    for level in ctx.obj['levels']:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success('Done: Loaded {0} zones'.format(total))
Beispiel #26
0
def load(ctx, drop):
    '''Load zones from a folder of zip files containing shapefiles'''
    title('Extracting zones from datasets')
    zones = DB()

    if drop:
        info('Drop existing collection')
        zones.drop()

    with ok('Creating index (level,code)'):
        zones.create_index([('level', ASCENDING), ('code', ASCENDING)])
    info('Creating index (level,keys)')
    zones.create_index([('level', ASCENDING), ('keys', ASCENDING)])
    info('Creating index (parents)')
    zones.create_index('parents')

    total = 0

    for level in ctx.obj['levels']:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success('Done: Loaded {0} zones'.format(total))
Beispiel #27
0
def load(ctx, drop):
    """Load zones from a folder of zip files containing shapefiles"""
    title("Extracting zones from datasets")
    zones = DB()

    if drop:
        info("Drop existing collection")
        zones.drop()

    with ok("Creating index (level,code)"):
        zones.create_index([("level", ASCENDING), ("code", ASCENDING)])
    info("Creating index (level,keys)")
    zones.create_index([("level", ASCENDING), ("keys", ASCENDING)])
    info("Creating index (parents)")
    zones.create_index("parents")

    total = 0

    for level in ctx.obj["levels"]:
        info('Processing level "{0}"'.format(level.id))
        total += level.load(DL_DIR, zones)

    success("Done: Loaded {0} zones".format(total))
Beispiel #28
0
def compute_town_with_districts_population(db, filename):
    info('Computing Paris town districts population')
    districts = db.find({'_id': {'$in': PARIS_DISTRICTS}})
    population = sum(district['population'] for district in districts)
    db.find_one_and_update(
        {'_id': 'fr/town/75056'},
        {'$set': {'population': population}})
    success('Computed population for Paris')

    info('Computing Marseille town districts population')
    districts = db.find({'_id': {'$in': MARSEILLE_DISTRICTS}})
    population = sum(district['population'] for district in districts)
    db.find_one_and_update(
        {'_id': 'fr/town/13055'},
        {'$set': {'population': population}})
    success('Computed population for Marseille')

    info('Computing Lyon town districts population')
    districts = db.find({'_id': {'$in': LYON_DISTRICTS}})
    population = sum(district['population'] for district in districts)
    db.find_one_and_update(
        {'_id': 'fr/town/69123'},
        {'$set': {'population': population}})
    success('Computed population for Lyon')
Beispiel #29
0
def attach_counties_to_subcountries(db, filename):
    info('Attaching French Metropolitan counties')
    ids = ['fr/county/{0}' .format(c) for c in FR_METRO_COUNTIES]
    result = db.update_many(
        {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]},
        {'$addToSet': {'parents': 'country-subset/fr/metro'}}
    )
    success('Attached {0} French Metropolitan children'.format(result.modified_count))

    info('Attaching French DOM counties')
    ids = ['fr/county/{0}' .format(c) for c in FR_DOM_COUNTIES]
    result = db.update_many(
        {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]},
        {'$addToSet': {'parents': 'country-subset/fr/dom'}}
    )
    success('Attached {0} French DOM children'.format(result.modified_count))

    info('Attaching French DOM/TOM counties')
    ids = ['fr/county/{0}' .format(c) for c in FR_DOMTOM_COUNTIES]
    result = db.update_many(
        {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]},
        {'$addToSet': {'parents': 'country-subset/fr/domtom'}}
    )
    success('Attached {0} French DOM/TOM children'.format(result.modified_count))
Beispiel #30
0
def get_phishing(root, url):
  """ Lazy-load the phishing dataset.
  Args:
    root Dataset cache root directory
    url  URL to fetch raw dataset from, if not already in cache (None for no download)
  Returns:
    Input tensor,
    Label tensor
  """
  global raw_phishing
  const_filename = "phishing.pt"
  const_features = 68
  const_datatype = torch.float32
  # Fast path: return loaded dataset
  if raw_phishing is not None:
    return raw_phishing
  # Make dataset path
  dataset_file = root / const_filename
  # Fast path: pre-processed dataset already locally available
  if dataset_file.exists():
    with dataset_file.open("rb") as fd:
      # Load, lazy-store and return dataset
      dataset = torch.load(fd)
      raw_phishing = dataset
      return dataset
  elif url is None:
    raise RuntimeError("Phishing dataset not in cache and download disabled")
  # Download dataset
  tools.info("Downloading dataset...", end="", flush=True)
  try:
    response = requests.get(url)
  except Exception as err:
    tools.warning(" fail.")
    raise RuntimeError(f"Unable to get dataset (at {url}): {err}")
  tools.info(" done.")
  if response.status_code != 200:
    raise RuntimeError(f"Unable to fetch raw dataset (at {url}): GET status code {response.status_code}")
  # Pre-process dataset
  tools.info("Pre-processing dataset...", end="", flush=True)
  entries = response.text.strip().split("\n")
  inputs = torch.zeros(len(entries), const_features, dtype=const_datatype)
  labels = torch.empty(len(entries), dtype=const_datatype)
  for index, entry in enumerate(entries):
    entry = entry.split(" ")
    # Set label
    labels[index] = 1 if entry[0] == "1" else 0
    # Set input
    line = inputs[index]
    for pos, setter in enumerate(entry[1:]):
      try:
        offset, value = setter.split(":")
        line[int(offset) - 1] = float(value)
      except Exception as err:
        tools.warning(" fail.")
        raise RuntimeError(f"Unable to parse dataset (line {index + 1}, position {pos + 1}): {err}")
  labels.unsqueeze_(1)
  tools.info(" done.")
  # (Try to) save pre-processed dataset
  try:
    with dataset_file.open("wb") as fd:
      torch.save((inputs, labels), fd)
  except Exception as err:
    tools.warning(f"Unable to save pre-processed dataset: {err}")
  # Lazy-store and return dataset
  dataset = (inputs, labels)
  raw_phishing = dataset
  return dataset
Beispiel #31
0
def add_ue_to_parents(db, filename):
    info('Adding European Union to countries parents')
    result = db.update_many({'level': country.id, 'code': {'$in': UE_COUNTRIES}},
        {'$addToSet': {'parents': 'country-group/ue'}})
    success('Added European Union as parent to {0} countries'.format(result.modified_count))
Beispiel #32
0
 def build_and_load_one(path, deps=[]):
     """ Check if the given directory is a module to build and load, and if yes recursively build and load its dependencies before it.
 Args:
   path Given directory path
   deps Dependent module paths
 Returns:
   True on success, False on failure, None if not a module
 """
     nonlocal done_modules
     nonlocal fail_modules
     with tools.Context(path.name, "info"):
         ident = path.name[:3]
         if ident in ident_to_is_python.keys():
             # Is a module directory
             if len(path.name) <= 3 or path.name[3] == "_":
                 tools.warning("Skipped invalid module directory name " +
                               repr(path.name))
                 return None
             if not path.exists():
                 tools.warning("Unable to build and load " +
                               repr(str(path.name)) +
                               ": module does not exist")
                 fail_modules.append(path)  # Mark as failed
                 return False
             is_python_module = ident_to_is_python[ident]
             # Check if already built and loaded, or failed
             if path in done_modules:
                 if len(deps) == 0 and debug_mode:
                     tools.info("Already built and loaded " +
                                repr(str(path.name)))
                 return True
             if path in fail_modules:
                 if len(deps) == 0:
                     tools.warning("Was unable to build and load " +
                                   repr(str(path.name)))
                 return False
             # Check for dependency cycle (disallowed as they may mess with the linker)
             if path in deps:
                 tools.warning("Unable to build and load " +
                               repr(str(path.name)) +
                               ": dependency cycle found")
                 fail_modules.append(path)  # Mark as failed
                 return False
             # Build and load dependencies
             this_ldflags = list(extra_ldflags)
             depsfile = path / dependencies_file
             if depsfile.exists():
                 for modname in depsfile.read_text().splitlines():
                     res = build_and_load_one(base_directory / modname,
                                              deps + [path])
                     if res == False:  # Unable to build a dependency
                         if len(deps) == 0:
                             tools.warning("Unable to build and load " +
                                           repr(str(path.name)) +
                                           ": dependency " + repr(modname) +
                                           " build and load failed")
                         fail_modules.append(path)  # Mark as failed
                         return False
                     elif res == True:  # Module and its sub-dependencies was/were built and loaded successfully
                         this_ldflags.append("-Wl,--library=:" + str(
                             (base_directory / modname /
                              (modname + ".so")).resolve()))
             # List sources
             sources = []
             for subpath in path.iterdir():
                 if subpath.is_file() and ("").join(
                         subpath.suffixes) in source_suffixes:
                     sources.append(str(subpath))
             # Build and load this module
             try:
                 res = torch.utils.cpp_extension.load(
                     name=path.name,
                     sources=sources,
                     extra_cflags=extra_cflags,
                     extra_cuda_cflags=extra_cuda_cflags,
                     extra_ldflags=this_ldflags,
                     extra_include_paths=extra_include_paths,
                     build_directory=str(path),
                     verbose=debug_mode,
                     is_python_module=is_python_module)
                 if is_python_module:
                     glob[path.name[3:]] = res
             except Exception as err:
                 tools.warning("Unable to build and load " +
                               repr(str(path.name)) + ": " + str(err))
                 fail_modules.append(path)  # Mark as failed
                 return False
             done_modules.append(path)  # Mark as built and loaded
             return True
Beispiel #33
0
    }
    if not noopts:
        ticketdata['issue']['fields']['labels'] = TLABELS
        ticketdata['issue']['fields']['components'] = TCOMP
    return ticketdata

# Ticket Object Without Labels and Components
ticket2 = JiraTicket(generate_data(True))
# Ticket Object With Labels and Components
ticket = JiraTicket(generate_data())

# Tests
fails = 0
passes = 0

tools.info("Starting JIRAPy Tests")
tools.sep()

# Check Ticket Summary
if ticket.summary == TSUM:
    tools.info(tools.padstat("TEST Ticket Summary", "SUCCESS"))
    passes += 1
else:
    tools.error(tools.padstat("TEST Ticket Summary", "FAILED"))
    fails += 1

# Check Ticket Description
if ticket.description == TDESC:
    tools.info(tools.padstat("TEST Ticket Description", "SUCCESS"))
    passes += 1
else:
Beispiel #34
0
import tools
import graphics

#week = ["Segunda", "Terca", "Quarta", "Quinta", "Sexta"]
#month = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31" ]
#year = ["Janeiro", "Fevereiro", "Marco", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"]

date_full = date.datetime.now().strftime("%Y-%m-%d")
date_day = date.datetime.now().strftime("%d")
date_month = date.datetime.now().strftime("%m")
date_year = date.datetime.now().strftime("%Y")

filename = "7"
compare_from = "BRL"
compare_to = "EUR"
start_date = "2019-01-01"
#end_date = date_full
end_date = "2019-10-11"

responseJSON = currency.periodData(compare_from, start_date, date_full)
rates = responseJSON['rates']

#generate_graph("1", rates, "BRL", "CAD")
graphics.generate('Images/' + filename, rates, compare_from, compare_to)

dados = tools.info(rates, compare_from, compare_to)

print("Minimo: ", dados[0], '\nMedia: ', dados[1], '\nMaximo: ', dados[2])

tools.fibonacci_graph(dados[0], dados[2], 200)
Beispiel #35
0
# -----------------------------------------------------------------------------
# From Numpy to Python
# Copyright (2017) Nicolas P. Rougier - BSD license
# More information at https://github.com/rougier/numpy-book
# -----------------------------------------------------------------------------
import numpy as np
from tools import info

if __name__ == '__main__':
    Z = np.arange(4 * 4).reshape(4, 4)

    Z = np.array(Z, order='C')
    info(Z)

    Z = np.array(Z, order='F')
    info(Z)
from multiprocessing.pool import ThreadPool, Pool
import json

import librosa
import numpy as np
from tensorflow.python.keras.preprocessing import sequence

from CONSTANT import NUM_MFCC, FFT_DURATION, HOP_DURATION
import tools

print("tools file={}".format(tools.__file__))
from tools import timeit, log, info
import time
import tensorflow

info("tensorflow version = {}".format(tensorflow.__version__))


def ohe2cat(label):
    return np.argmax(label, axis=1)


@timeit
def get_max_length(x, ratio=0.95):
    """
    Get the max length cover 95% data.
    """
    lens = [len(_) for _ in x]
    max_len = max(lens)
    min_len = min(lens)
    lens.sort()
Beispiel #37
0
                     args.learning_rate,
                     args.learning_rate_args,
                     head="")
    tools.print_args("optimizer", args.optimizer, args.optimizer_args, head="")
    tools.print_args("attack", args.attack, args.attack_args, head="")

if exit_pending:
    exit(0)
# ---------------------------------------------------------------------------- #
# Cluster management
tools.success("Cluster analysis and allocation phase...")

with tools.Context("cluster", "info"):
    # Cluster manager instantiation
    if args.server:  # Assume the role of the parameter server, which allows the use of 'tf.py_func'
        tools.info("Acting as node " + args.ps_job_name + ":0 in the cluster")
        if args.MPI:
            proto = 'grpc+mpi'
            print(
                "Using MPI..........................................................."
            )
            sys.stdout.flush()
        else:
            proto = 'grpc'
        args.client = tf.train.Server(tf.train.ClusterSpec(args.server),
                                      job_name=args.ps_job_name,
                                      task_index=0,
                                      start=True,
                                      protocol=proto).target
    cluster_mgr = cluster.Manager(args.client,
                                  devs=device_prefs,
Beispiel #38
0
def evaluation_thread(coord, mngr, sess, path, first):
    """ Evaluation thread entry point.
  Args:
    coord Coordinator to use
    mngr  Graph manager to use
    sess  Session to use
    path  Path to the storage file
    first Event notifying first evaluation is complete
  """
    # Check arguments
    global args
    delta = args.evaluation_delta
    period = args.evaluation_period
    if delta < 0 and period < 0:  # Effectively disabled
        tools.info("Evaluation is effectively disabled")
        first.set()
        return
    last_step = -delta
    last_time = -period
    # Open file (if parent exists)
    if path:
        path = pathlib.Path(path)
        try:
            path.parent.mkdir(parents=True, exist_ok=True)
            fd = path.open("a")
        except Exception:
            fd = None
    else:
        fd = None
    # Evaluate (and save) accuracy
    with mngr.graph.as_default():
        while True:
            time.sleep(config.thread_idle_delay)
            step = sess.run(mngr.step)
            now = time.time()
            stop = coord.should_stop()
            if stop or (delta >= 0 and step - last_step >= delta) or (
                    period >= 0. and now - last_time >= period):
                accuracies = sess.run(mngr.eval_tns)
                if fd is not None:
                    line = str(now) + "\t" + str(step)
                    for key, val in accuracies.items():
                        line += "\t" + key + ":" + str(val)
                    fd.write(line + os.linesep)
                    fd.flush()
                line = ""
                for key, val in accuracies.items():
                    if len(line) > 0:
                        line += ", "
                    line += key + " = " + str(val)
                tools.info(" Step " + str(step) + ": " + line + " (took " +
                           repr(time.time() - now) + " s)")
                if first is not None:
                    first.set()
                    first = None
                last_step = sess.run(mngr.step)
                last_time = time.time()
                if stop:
                    break
    # Close file (if any)
    if fd is not None:
        fd.close()
Beispiel #39
0
 def __init__(self,
              experiment,
              aggregator,
              dev_tuples,
              optimizer,
              optimizer_args,
              learning_rate,
              learning_rate_args,
              regularizations=(-1., -1.),
              trace=False):
     """ Full graph (training + evaluation) constructor.
 Args:
   experiment         Experiment instance to use
   aggregator         Aggregator instance to use
   dev_tuples         Tuple of devices (i.e. tuples of strings (job name, task ID, device type, device ID)) for (parameter server, each workers' inference/loss/gradient computation, evaluator)
   optimizer          Optimizer name to use
   optimizer_args     Additional optimizer key-value arguments
   learning_rate      Learning rate name to use
   learning_rate_args Additional learning rate key-value arguments
   regularizations    Pair of (l1, l2) regularization values, non-positive values for no regularization
   trace              Whether to add trace prints for every important step of the computations
 """
     # Tuple extraction and device name reconstruction
     ps_tuple, wk_tuples, ev_tuple = dev_tuples
     ps_device = tools.device_from_tuple(*ps_tuple)
     wk_jobs = {}  # Map job -> taskid -> list of pairs of (devtype, devid)
     for job, taskid, devtype, devid in wk_tuples:
         if job in wk_jobs:
             wk_tasks = wk_jobs[job]
             if taskid in wk_tasks:
                 wk_tasks[taskid].append((devtype, devid))
             else:
                 wk_tasks[taskid] = [(devtype, devid)]
         else:
             wk_jobs[job] = {taskid: [(devtype, devid)]}
     # Graph building
     graph = tf.Graph()
     with graph.as_default():
         with tf.name_scope("ps/"):
             with tf.device(ps_device):
                 # Instantiate global step counter, optimizer and learning rate
                 global_step = tf.train.create_global_step()
                 learning_rate = build(learning_rates,
                                       "learning rate decay",
                                       learning_rate,
                                       learning_rate_args,
                                       global_step=global_step)
                 optimizer = build(optimizers,
                                   "optimizer",
                                   optimizer,
                                   optimizer_args,
                                   learning_rate=learning_rate)
                 tf.summary.scalar("learning_rate", learning_rate)
                 # Create workers' gradient computation
                 totlosses = [
                 ]  # List of losses, for summary (and printing) only
                 gradients = [
                 ]  # List of gradients, one per non-Byzantine worker
                 flatmap = None  # Flat map used to flatten the gradients coherently
                 with tf.name_scope("workers/"):
                     for job, wk_tasks in wk_jobs.items():
                         for taskid, models in wk_tasks.items():
                             device_dataset = tools.device_from_tuple(
                                 job, taskid, "CPU", "*")
                             device_models = [
                                 replica_device_setter(
                                     ps_device,
                                     tools.device_from_tuple(
                                         job, taskid, devtype, devid))
                                 for devtype, devid in models
                             ]
                             # Compute losses
                             losses = experiment.losses(device_dataset,
                                                        device_models,
                                                        trace=trace)
                             totlosses += losses
                             # Compute gradients
                             for i in range(len(device_models)):
                                 with tf.device(device_models[i]):
                                     loss = losses[i]
                                     for norm in [1, 2]:
                                         strength = regularizations[
                                             norm -
                                             1]  # 'norm - 1' is just a basic numbering trick...
                                         if strength > 0.:
                                             loss = loss + strength * regularization(
                                                 norm)
                                     if trace:
                                         loss = tools.trace_graph(
                                             loss, "Worker " +
                                             str(len(gradients)) +
                                             ": loss computation")
                                     grad_vars = optimizer.compute_gradients(
                                         loss)
                                     if flatmap is None:
                                         gradient, flatmap = flatten(
                                             grad_vars)
                                     else:
                                         gradient = flatten(
                                             grad_vars, flatmap)
                                     if trace:
                                         gradient = tools.trace_graph(
                                             gradient, "Worker " +
                                             str(len(gradients)) +
                                             ": gradient computation")
                                     gradients.append(gradient)
                 total_loss = tf.add_n(totlosses, name="total_loss")
                 tools.info(
                     "Created workers' dataset, inference, loss and gradient computation nodes"
                 )
                 # Aggregate and apply the workers' gradients
                 with tf.name_scope("GAR"):
                     time1 = time.time()
                     aggregated = aggregator.aggregate(gradients)
                     time2 = time.time()
                     #print("ms=$$$$$$$$$$$$$$$$$$$$$$",(time2-time1)*1000)
                     if trace:
                         aggregated = tools.trace_graph(
                             aggregated,
                             "Master: aggregated gradient computation")
                 apply_op = optimizer.apply_gradients(
                     inflate(aggregated, mapflat(flatmap)),
                     global_step=global_step)
                 if trace:
                     apply_op = tools.trace_graph(
                         apply_op,
                         "Master: aggregated gradient application")
                 tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, apply_op)
                 tools.info(
                     "Created parameter server's gradient aggregation and application nodes"
                 )
                 # Create accuracy computation
                 with tf.name_scope("eval/"):
                     device_dataset = tools.device_from_tuple(
                         ev_tuple[0], ev_tuple[1], "CPU", "*")
                     device_model = tools.device_from_tuple(*ev_tuple)
                     accuracy_tns = experiment.accuracy(
                         device_dataset,
                         [replica_device_setter(ps_device, device_model)],
                         trace=trace)
                 for key, val in accuracy_tns.items():
                     tf.add_to_collection(
                         tf.GraphKeys.SUMMARIES,
                         tf.summary.scalar("eval-" + key, val))
                 tools.info(
                     "Created evaluator's dataset, inference and accuracy computation nodes"
                 )
                 # Global summary protocol buffer
                 summary_tn = tf.summary.merge(
                     list(set(tf.get_collection(tf.GraphKeys.SUMMARIES))))
                 # Full initialization operation
                 rsrc_init_ops = []
                 for resource in tf.get_collection(tf.GraphKeys.RESOURCES):
                     rsrc_init_ops.append(resource.initializer)
                 for resource in tf.get_collection(
                         tf.GraphKeys.LOCAL_RESOURCES):
                     rsrc_init_ops.append(resource.initializer)
                 init_op = tf.group(
                     tf.variables_initializer(tf.global_variables() +
                                              tf.local_variables()),
                     tf.tables_initializer(), *rsrc_init_ops)
                 # Build the training operation
                 with tf.control_dependencies(
                         tf.get_collection(tf.GraphKeys.UPDATE_OPS)):
                     train_tn = tf.identity(total_loss, name="train_tn")
     # Finalization
     self.graph = graph
     self.step = global_step
     self.rate = learning_rate
     self.optimizer = optimizer
     self.total_loss = total_loss
     self.summary_tn = summary_tn
     self.init_op = init_op
     self.train_tn = train_tn
     self.eval_tns = accuracy_tns