def _run(topdir, name, seed, device, command): """ Run the attack experiments with the given named parameters. Args: topdir Parent result directory name Experiment unique name seed Experiment seed device Device on which to run the experiments command Command to run """ # Add seed to name name = "%s-%d" % (name, seed) # Process experiment with tools.Context(name, "info"): finaldir = topdir / name # Check whether the experiment was already successful if finaldir.exists(): tools.info("Experiment already processed.") return # Move-make the pending result directory resdir = move_directory(topdir / f"{name}.pending") resdir.mkdir(mode=0o755, parents=True) # Build the command args = command.build(seed, device, resdir) # Launch the experiment and write the standard output/error tools.trace((" ").join(shlex.quote(arg) for arg in args)) cmd_res = subprocess.run(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if cmd_res.returncode == 0: tools.info("Experiment successful") else: tools.warning("Experiment failed") finaldir = topdir / f"{name}.failed" move_directory(finaldir) resdir.rename(finaldir) (finaldir / "stdout.log").write_bytes(cmd_res.stdout) (finaldir / "stderr.log").write_bytes(cmd_res.stderr)
def attach_and_clean_iris(db, filename): info('Attaching French IRIS to their region') processed = 0 for zone in db.find({'level': iris.id}): candidates_ids = [p for p in zone['parents'] if p.startswith(town.id)] if len(candidates_ids) < 1: warning('No parent candidate found for: {0}'.format(zone['_id'])) continue town_id = candidates_ids[0] town_zone = db.find_one({'_id': town_id}) if not town_zone: warning('Town {0} not found'.format(town_id)) continue if zone.get('_type') == 'Z': name = town_zone['name'] else: name = ''.join((town_zone['name'], ' (', zone['name'], ')')) ops = { '$addToSet': {'parents': {'$each': town_zone['parents']}}, '$set': {'name': name}, '$unset': {'_town': 1, '_type': 1} } if db.find_one_and_update({'_id': zone['_id']}, ops): processed += 1 success('Attached {0} french IRIS to their parents'.format(processed))
def process_dataset(self, workdir, db, url, extractor): '''Extract territories from a given file for a given level with a given extractor function''' loaded = 0 filename = join(workdir, basename(url)) with fiona.open('/', vfs='zip://{0}'.format(filename), encoding='utf8') as collection: info('Extracting {0} elements from {1} ({2} {3})'.format( len(collection), basename(filename), collection.driver, to_string(collection.crs) )) for polygon in collection: try: zone = extractor(polygon) if not zone: continue zone['keys'] = dict((k, v) for k, v in zone.get('keys', {}).items() if v is not None) geom = shape(polygon['geometry']) if extractor.simplify: geom = geom.simplify(extractor.simplify) if geom.geom_type == 'Polygon': geom = MultiPolygon([geom]) elif geom.geom_type != 'MultiPolygon': warning('Unsupported geometry type "{0}" for "{1}"'.format(geom.geom_type, zone['name'])) continue zoneid = '/'.join((self.id, zone['code'])) zone.update(_id=zoneid, level=self.id, geom=geom.__geo_interface__) db.find_one_and_replace({'_id': zoneid}, zone, upsert=True) loaded += 1 except Exception as e: error('Error extracting polygon {0}: {1}', polygon['properties'], str(e)) info('Loaded {0} zones for level {1} from file {2}'.format(loaded, self.id, filename)) return loaded
def build_aggregates(self, db): processed = 0 for code, name, zones, properties in self.aggregates: info('Building aggregate "{0}" (level={1}, code={2})'.format(name, self.id, code)) zone = self.build_aggregate(code, name, zones, properties, db) db.find_one_and_replace({'_id': zone['_id']}, zone, upsert=True) processed += 1 return processed
def process_insee_cog(db, filename): '''Use informations from INSEE COG to attach parents. http://www.insee.fr/fr/methodes/nomenclatures/cog/telechargement.asp ''' info('Processing INSEE COG') processed = 0 counties = {} districts = {} with ZipFile(filename) as cogzip: with cogzip.open('comsimp2016.txt') as tsvfile: tsvio = io.TextIOWrapper(tsvfile, encoding='cp1252') reader = csv.DictReader(tsvio, delimiter='\t') for row in reader: # Lower everything, from 2B to 2b for instance. region_code = row['REG'].lower() county_code = row['DEP'].lower() district_code = row['AR'].lower() town_code = row['COM'].lower() insee_code = ''.join((county_code, town_code)) region_id = 'fr/region/{0}'.format(region_code) county_id = 'fr/county/{0}'.format(county_code) parents = [region_id, county_id] if county_id not in counties: counties[county_id] = region_id if district_code: district_code = ''.join((county_code, district_code)) district_id = 'fr/district/{0}'.format(district_code) parents.append(district_id) if district_id not in districts: districts[district_id] = [region_id, county_id] if db.find_one_and_update( {'level': town.id, 'code': insee_code}, {'$addToSet': {'parents': {'$each': parents}}}): processed += 1 success('Attached {0} french towns to their parents'.format(processed)) processed = 0 for district_id, parents in districts.items(): if db.find_one_and_update( {'_id': district_id}, {'$addToSet': { 'parents': {'$each': parents}, }}): processed += 1 success('Attached {0} french districts to their parents'.format(processed)) processed = 0 for county_id, parent in counties.items(): if db.find_one_and_update( {'_id': county_id}, {'$addToSet': {'parents': parent}}): processed += 1 success('Attached {0} french counties to their parents'.format(processed))
def build_aggregates(self, db): processed = 0 for code, name, zones, properties in self.aggregates: info('Building aggregate "{0}" (level={1}, code={2})'.format( name, self.id, code)) zone = self.build_aggregate(code, name, zones, properties, db) db.find_one_and_replace({'_id': zone['_id']}, zone, upsert=True) processed += 1 return processed
def extract_features_old(X_list, model, len_sample=5, sr=SAMPLING_RATE): X = get_fixed_array(X_list, len_sample=len_sample, sr=sr) info("note: exft get fix done.") X = model.predict(X) info("note: exft model predict done.") X = X.transpose(0, 2, 1, 3) # squeeze. X = np.squeeze(X) # info("note: exft model transpose and squeeze done.") return X
def process_dataset(self, workdir, db, url, extractor): '''Extract territories from a given file for a given level with a given extractor function''' loaded = 0 filename = join(workdir, basename(url)) # Identify the shapefile to avoid multiple file error on GDAL 2 with ZipFile(filename) as z: candidates = [n for n in z.namelist() if n.endswith('.shp')] if len(candidates) != 1: raise ValueError( 'Unable to find a unique shpaefile into {0}'.format( filename)) shp = candidates[0] with fiona.open('/{0}'.format(shp), vfs='zip://{0}'.format(filename), encoding='utf8') as collection: info('Extracting {0} elements from {1} ({2} {3})'.format( len(collection), basename(filename), collection.driver, to_string(collection.crs))) for polygon in collection: try: zone = extractor(polygon) if not zone: continue zone['keys'] = dict( (k, v) for k, v in zone.get('keys', {}).items() if v is not None) geom = shape(polygon['geometry']) if extractor.simplify: geom = geom.simplify(extractor.simplify) if geom.geom_type == 'Polygon': geom = MultiPolygon([geom]) elif geom.geom_type != 'MultiPolygon': warning( 'Unsupported geometry type "{0}" for "{1}"'.format( geom.geom_type, zone['name'])) continue zoneid = '/'.join((self.id, zone['code'])) zone.update(_id=zoneid, level=self.id, geom=geom.__geo_interface__) db.find_one_and_replace({'_id': zoneid}, zone, upsert=True) loaded += 1 except Exception as e: error('Error extracting polygon {0}: {1}', polygon['properties'], str(e)) info('Loaded {0} zones for level {1} from file {2}'.format( loaded, self.id, filename)) return loaded
def _run(name, seed, device, params): """ Run the attack experiments with the given named parameters. Args: name Experiment unique name seed Experiment seed device Device on which to run the experiments params Named parameters """ # Add seed to name name = "%s-%d" % (name, seed) # Process experiment with tools.Context(name, "info"): # Build and set the result directory result_dir = args.data_directory / name if result_dir.exists(): tools.info("Experiment already processed.") return result_dir.mkdir(mode=0o755, parents=True) # Add the missing options params["seed"] = str(seed) params["device"] = device params["result-directory"] = str(result_dir) # Launch the experiment and write the standard output/error def is_multi_param(param): return any(isinstance(param, typ) for typ in (list, tuple)) def param_to_str(param): if is_multi_param(param): return (" ").join(shlex.quote(str(val)) for val in param) return shlex.quote(str(param)) tools.trace("python3 -OO attack.py %s" % (" ").join("--%s %s" % (key, param_to_str(val)) for key, val in params.items())) command = ["python3", "-OO", "attack.py"] for key, val in params.items(): command.append("--%s" % (key, )) if is_multi_param(val): for subval in val: command.append(str(subval)) else: command.append(str(val)) cmd_res = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE) if cmd_res.returncode == 0: tools.info("Experiment successful") else: tools.warning("Experiment failed") (result_dir / "stdout.log").write_bytes(cmd_res.stdout) (result_dir / "stderr.log").write_bytes(cmd_res.stderr)
def add_ue_to_parents(db, filename): info('Adding European Union to countries parents') result = db.update_many( { 'level': country.id, 'code': { '$in': UE_COUNTRIES } }, {'$addToSet': { 'parents': 'country-group/ue' }}) success('Added European Union as parent to {0} countries'.format( result.modified_count))
def extract_features(X_list, len_sample=5, sr=SAMPLING_RATE, use_power_db=False): X = get_fixed_array(X_list, len_sample=len_sample, sr=sr) info("note: exft get fix done.") # X = model.predict(X) X = kapre_extractor.predict(X) info( "note: exft model kapre_extractor predict done, kapre predict shape={}" .format(X.shape)) # basic: (147, 30, 125, 1) to (147, 125, 30, 1) to (147, 125, 30) # X = X.transpose(0, 2, 1, 3) # X = np.squeeze(X) # basic + powertodb. squeeze->powertodb->transpose # squeeze. X = np.squeeze(X) info("note: exft model transpose and squeeze done, shape={}".format( X.shape)) if use_power_db: X = np.asarray([librosa.power_to_db(r) for r in X]) info("note: exft model kapre_extractor power_to_db done.") X = X.transpose(0, 2, 1) # info("note: X transpose shape={}".format(X.shape)) return X
def lr_preprocess_update(x): x = [sample[0:MAX_AUDIO_DURATION * AUDIO_SAMPLE_RATE] for sample in x] info( "note: lr_preprocess update, x type={}, len={}, x[0] shape={}, ele_type={}, value={}, x[-1] shape={}" .format(type(x), len(x), x[0].shape, type(x[0][0]), x[0][0], x[-1].shape)) t1 = time.time() if FEET_MODE == "LIBROSA": x_mel = extract_melspectrogram_parallel(x, n_mels=30, use_power_db=True) elif FEET_MODE == "KAPRE": # x_mel = extract_features(x, model=kapre_extractor) x_mel = extract_features(x) # x_contrast = extract_bandwidth_parallel(x) t2 = time.time() info_log = list() x_feas = [] for i in range(len(x_mel)): mel = np.mean(x_mel[i], axis=0).reshape(-1) mel_std = np.std(x_mel[i], axis=0).reshape(-1) # contrast = np.mean(x_contrast[i], axis=0).reshape(-1) # contrast_std = np.std(x_contrast[i], axis=0).reshape(-1) # contrast, contrast_std fea_item = np.concatenate([mel, mel_std], axis=-1) x_feas.append(fea_item) if i < 1: info_log.append("i={}, x_mel type={}, shape={}".format( i, type(x_mel[i]), x_mel[i].shape)) info_log.append("i={}, mel type={}, shape={}".format( i, type(mel), mel.shape)) info_log.append("i={}, mel_std type={}, shape={}".format( i, type(mel_std), mel_std.shape)) info_log.append("i={}, fea_item type={}, shape={}".format( i, type(fea_item), fea_item.shape)) x_feas = np.asarray(x_feas) scaler = StandardScaler() X = scaler.fit_transform(x_feas[:, :]) info_log.append( "FEET_MODE = {}, x_mel type={}, shape={}, cost_time={}s".format( FEET_MODE, type(x_mel), x_mel.shape, round(t2 - t1, 3))) info_log.append("x_feas type={}, shape={}".format(type(x_feas), x_feas.shape)) info_log.append("X type={}, shape={}".format(type(X), X.shape)) info(json.dumps(info_log, indent=4)) return X
def process_postal_codes(db, filename): ''' Extract postal codes from https://www.data.gouv.fr/fr/datasets/base-officielle-des-codes-postaux/ ''' info('Processing french postal codes') processed = 0 with open(filename, encoding='cp1252') as csvfile: reader = csv.reader(csvfile, delimiter=';') # skip header next(reader, None) for insee, _, postal, _, _ in reader: ops = {'$addToSet': {'keys.postal': postal}} if db.find_one_and_update({'level': town.id, 'code': insee}, ops): processed += 1 success('Processed {0} french postal codes'.format(processed))
def compute_district_population(db, filename): info('Computing french district population by aggregation') processed = 0 pipeline = [ {'$match': {'level': town.id}}, {'$unwind': '$parents'}, {'$match': {'parents': {'$regex': district.id}}}, {'$group': {'_id': '$parents', 'population': {'$sum': '$population'}}} ] for result in db.aggregate(pipeline): if result.get('population'): if db.find_one_and_update( {'_id': result['_id']}, {'$set': {'population': result['population']}}): processed += 1 success('Computed population for {0} french districts'.format(processed))
def summary_thread(coord, mngr, sess, path, rstrd): """ Summary thread entry point. Args: coord Coordinator to use mngr Graph manager to use sess Session to use path Path to the manager to use rstrd Whether the model was just restored from a checkpoint """ global args delta = args.summary_delta period = args.summary_period if delta < 0 and period < 0: # Effectively disabled tools.info("Summary saving is effectively disabled") return if mngr.summary_tn is None: tools.warning("No summary to save") return if rstrd: last_step = sess.run(mngr.step) last_time = time.time() else: last_step = -delta last_time = -period # Save summaries with mngr.graph.as_default(): with tf.summary.FileWriter(args.summary_dir, graph=mngr.graph) as writer: writer.add_session_log(tf.SessionLog(status=tf.SessionLog.START), sess.run(mngr.step)) while True: time.sleep(config.thread_idle_delay) step = sess.run(mngr.step) now = time.time() stop = coord.should_stop() if stop or (delta >= 0 and step - last_step >= delta) or ( period >= 0. and now - last_time >= period): writer.add_summary(sess.run(mngr.summary_tn), step) tools.info("Summaries saved (took " + repr(time.time() - now) + " s)") last_step = sess.run(mngr.step) last_time = time.time() if stop: break writer.add_session_log(tf.SessionLog(status=tf.SessionLog.STOP), step)
def attach_canton_parents(db, filename): info('Attaching French Canton to their parents') canton_processed = 0 for zone in db.find({'level': canton.id}): candidates_ids = [p for p in zone['parents'] if p.startswith(county.id)] if len(candidates_ids) < 1: warning('No parent candidate found for: {0}'.format(zone['_id'])) continue county_id = candidates_ids[0] county_zone = db.find_one({'_id': county_id}) ops = { '$addToSet': {'parents': {'$each': county_zone['parents']}}, '$unset': {'_dep': 1} } if db.find_one_and_update({'_id': zone['_id']}, ops): canton_processed += 1 success('Attached {0} french cantons to their parents'.format(canton_processed))
def attach_epci(db, filename): ''' Attach EPCI towns to their EPCI from http://www.collectivites-locales.gouv.fr/liste-et-composition-2015 ''' info('Processing EPCI town list') processed = 0 # epci_region = {} with open(filename, encoding='cp1252') as csvfile: reader = csv.DictReader(csvfile, delimiter=';') for row in reader: siren = row['siren_epci'] insee = row['insee'].lower() # region = row['region'] # epci_region[siren] = region epci_id = 'fr/epci/{0}'.format(siren) if db.find_one_and_update( {'level': town.id, 'code': insee}, {'$addToSet': {'parents': epci_id}}): processed += 1 success('Attached {0} french town to their EPCI'.format(processed))
def town_with_districts(db, filename): info('Attaching Paris town districts') paris = db.find_one({'_id': 'fr/town/75056'}) parents = paris['parents'] parents.append(paris['_id']) result = db.update_many( {'_id': {'$in': PARIS_DISTRICTS}}, {'$addToSet': {'parents': {'$each': parents}}}) success('Attached {0} districts to Paris'.format(result.modified_count)) info('Attaching Marseille town districts') marseille = db.find_one({'_id': 'fr/town/13055'}) parents = marseille['parents'] parents.append(marseille['_id']) result = db.update_many( {'_id': {'$in': MARSEILLE_DISTRICTS}}, {'$addToSet': {'parents': {'$each': parents}}}) success('Attached {0} districts to Marseille'.format(result.modified_count)) info('Attaching Lyon town districts') lyon = db.find_one({'_id': 'fr/town/69123'}) parents = lyon['parents'] parents.append(lyon['_id']) result = db.update_many( {'_id': {'$in': LYON_DISTRICTS}}, {'$addToSet': {'parents': {'$each': parents}}}) success('Attached {0} districts to Lyon'.format(result.modified_count))
def compute_county_area_and_population(db, filename): info('Computing french counties areas and population by aggregation') processed = 0 pipeline = [ {'$match': {'level': town.id}}, {'$unwind': '$parents'}, {'$match': {'parents': {'$regex': county.id}}}, {'$group': { '_id': '$parents', 'area': {'$sum': '$area'}, 'population': {'$sum': '$population'} }} ] for result in db.aggregate(pipeline): if db.find_one_and_update( {'_id': result['_id']}, {'$set': { 'area': result['area'], 'population': result['population'] }}): processed += 1 success('Computed area and population for {0} french counties'.format(processed))
def checkpoint_thread(coord, mngr, sess, chck, rstrd): """ Checkpoint thread entry point. Args: coord Coordinator to use mngr Graph manager to use sess Session to use chck Checkpoint manager to use rstrd Whether the model was just restored from a checkpoint """ # Check arguments global args delta = args.checkpoint_delta period = args.checkpoint_period if delta < 0 and period < 0: # Effectively disabled tools.info("Checkpoint saving is effectively disabled") return if rstrd: last_step = sess.run(mngr.step) last_time = time.time() else: last_step = -delta last_time = -period # Save checkpoints with mngr.graph.as_default(): while True: time.sleep(config.thread_idle_delay) step = sess.run(mngr.step) now = time.time() stop = coord.should_stop() if stop or (delta >= 0 and step - last_step >= delta) or ( period >= 0. and now - last_time >= period): chck.save(sess, step) tools.info("Checkpoint saved (took " + repr(time.time() - now) + " s)") last_step = sess.run(mngr.step) last_time = time.time() if stop: break
def fetch_missing_data_from_dbpedia(db, filename): info('Fetching DBPedia data') processed = 0 for zone in db.find({ 'wikipedia': {'$exists': True, '$ne': None}, '$or': [ {'population': None}, {'population': {'$exists': False}}, {'area': None}, {'area': {'$exists': False}}, ] }, no_cursor_timeout=True): dbpedia = DBPedia(zone['wikipedia']) metadata = { 'dbpedia': dbpedia.resource_url, } metadata.update(dbpedia.fetch_population_or_area()) metadata.update(dbpedia.fetch_flag_or_blazon()) if db.find_one_and_update({'_id': zone['_id']}, {'$set': metadata}): processed += 1 success('Fetched DBPedia data for {0} zones'.format(processed))
def download(ctx): '''Download sources datasets''' title('Downloading required datasets') if not exists(DL_DIR): os.makedirs(DL_DIR) urls = (level.urls for level in ctx.obj['levels'] if level.urls) urls = set([url for lst in urls for url in lst]) for url in urls: info('Dealing with {0}'.format(url)) filename, size = extract_meta_from_headers(url) info('Downloading {0}'.format(filename)) with click.progressbar(length=size) as bar: def reporthook(blocknum, blocksize, totalsize): read = blocknum * blocksize if read <= 0: return if read > totalsize: bar.update(size) else: bar.update(read) urlretrieve(url, join(DL_DIR, filename), reporthook=reporthook)
def extract_for_one_sample(tuple, extract, use_power_db=False, **kwargs): data, idx = tuple r = extract(data, **kwargs) info("note: feee=librosa, extract r shape={}".format(r.shape)) # for melspectrogram if use_power_db: r = librosa.power_to_db(r) info("note: feee=librosa, after power_to_db r shape={}".format(r.shape)) r = r.transpose() info("note: feee=librosa, after transpose r shape={}".format(r.shape)) return r, idx
def load(ctx, drop): '''Load zones from a folder of zip files containing shapefiles''' title('Extracting zones from datasets') zones = DB() if drop: info('Drop existing collection') zones.drop() with ok('Creating index (level,code)'): zones.create_index([('level', ASCENDING), ('code', ASCENDING)]) info('Creating index (level,keys)') zones.create_index([('level', ASCENDING), ('keys', ASCENDING)]) info('Creating index (parents)') zones.create_index('parents') total = 0 for level in ctx.obj['levels']: info('Processing level "{0}"'.format(level.id)) total += level.load(DL_DIR, zones) success('Done: Loaded {0} zones'.format(total))
def load(ctx, drop): """Load zones from a folder of zip files containing shapefiles""" title("Extracting zones from datasets") zones = DB() if drop: info("Drop existing collection") zones.drop() with ok("Creating index (level,code)"): zones.create_index([("level", ASCENDING), ("code", ASCENDING)]) info("Creating index (level,keys)") zones.create_index([("level", ASCENDING), ("keys", ASCENDING)]) info("Creating index (parents)") zones.create_index("parents") total = 0 for level in ctx.obj["levels"]: info('Processing level "{0}"'.format(level.id)) total += level.load(DL_DIR, zones) success("Done: Loaded {0} zones".format(total))
def compute_town_with_districts_population(db, filename): info('Computing Paris town districts population') districts = db.find({'_id': {'$in': PARIS_DISTRICTS}}) population = sum(district['population'] for district in districts) db.find_one_and_update( {'_id': 'fr/town/75056'}, {'$set': {'population': population}}) success('Computed population for Paris') info('Computing Marseille town districts population') districts = db.find({'_id': {'$in': MARSEILLE_DISTRICTS}}) population = sum(district['population'] for district in districts) db.find_one_and_update( {'_id': 'fr/town/13055'}, {'$set': {'population': population}}) success('Computed population for Marseille') info('Computing Lyon town districts population') districts = db.find({'_id': {'$in': LYON_DISTRICTS}}) population = sum(district['population'] for district in districts) db.find_one_and_update( {'_id': 'fr/town/69123'}, {'$set': {'population': population}}) success('Computed population for Lyon')
def attach_counties_to_subcountries(db, filename): info('Attaching French Metropolitan counties') ids = ['fr/county/{0}' .format(c) for c in FR_METRO_COUNTIES] result = db.update_many( {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]}, {'$addToSet': {'parents': 'country-subset/fr/metro'}} ) success('Attached {0} French Metropolitan children'.format(result.modified_count)) info('Attaching French DOM counties') ids = ['fr/county/{0}' .format(c) for c in FR_DOM_COUNTIES] result = db.update_many( {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]}, {'$addToSet': {'parents': 'country-subset/fr/dom'}} ) success('Attached {0} French DOM children'.format(result.modified_count)) info('Attaching French DOM/TOM counties') ids = ['fr/county/{0}' .format(c) for c in FR_DOMTOM_COUNTIES] result = db.update_many( {'$or': [{'_id': {'$in': ids}}, {'parents': {'$in': ids}}]}, {'$addToSet': {'parents': 'country-subset/fr/domtom'}} ) success('Attached {0} French DOM/TOM children'.format(result.modified_count))
def get_phishing(root, url): """ Lazy-load the phishing dataset. Args: root Dataset cache root directory url URL to fetch raw dataset from, if not already in cache (None for no download) Returns: Input tensor, Label tensor """ global raw_phishing const_filename = "phishing.pt" const_features = 68 const_datatype = torch.float32 # Fast path: return loaded dataset if raw_phishing is not None: return raw_phishing # Make dataset path dataset_file = root / const_filename # Fast path: pre-processed dataset already locally available if dataset_file.exists(): with dataset_file.open("rb") as fd: # Load, lazy-store and return dataset dataset = torch.load(fd) raw_phishing = dataset return dataset elif url is None: raise RuntimeError("Phishing dataset not in cache and download disabled") # Download dataset tools.info("Downloading dataset...", end="", flush=True) try: response = requests.get(url) except Exception as err: tools.warning(" fail.") raise RuntimeError(f"Unable to get dataset (at {url}): {err}") tools.info(" done.") if response.status_code != 200: raise RuntimeError(f"Unable to fetch raw dataset (at {url}): GET status code {response.status_code}") # Pre-process dataset tools.info("Pre-processing dataset...", end="", flush=True) entries = response.text.strip().split("\n") inputs = torch.zeros(len(entries), const_features, dtype=const_datatype) labels = torch.empty(len(entries), dtype=const_datatype) for index, entry in enumerate(entries): entry = entry.split(" ") # Set label labels[index] = 1 if entry[0] == "1" else 0 # Set input line = inputs[index] for pos, setter in enumerate(entry[1:]): try: offset, value = setter.split(":") line[int(offset) - 1] = float(value) except Exception as err: tools.warning(" fail.") raise RuntimeError(f"Unable to parse dataset (line {index + 1}, position {pos + 1}): {err}") labels.unsqueeze_(1) tools.info(" done.") # (Try to) save pre-processed dataset try: with dataset_file.open("wb") as fd: torch.save((inputs, labels), fd) except Exception as err: tools.warning(f"Unable to save pre-processed dataset: {err}") # Lazy-store and return dataset dataset = (inputs, labels) raw_phishing = dataset return dataset
def add_ue_to_parents(db, filename): info('Adding European Union to countries parents') result = db.update_many({'level': country.id, 'code': {'$in': UE_COUNTRIES}}, {'$addToSet': {'parents': 'country-group/ue'}}) success('Added European Union as parent to {0} countries'.format(result.modified_count))
def build_and_load_one(path, deps=[]): """ Check if the given directory is a module to build and load, and if yes recursively build and load its dependencies before it. Args: path Given directory path deps Dependent module paths Returns: True on success, False on failure, None if not a module """ nonlocal done_modules nonlocal fail_modules with tools.Context(path.name, "info"): ident = path.name[:3] if ident in ident_to_is_python.keys(): # Is a module directory if len(path.name) <= 3 or path.name[3] == "_": tools.warning("Skipped invalid module directory name " + repr(path.name)) return None if not path.exists(): tools.warning("Unable to build and load " + repr(str(path.name)) + ": module does not exist") fail_modules.append(path) # Mark as failed return False is_python_module = ident_to_is_python[ident] # Check if already built and loaded, or failed if path in done_modules: if len(deps) == 0 and debug_mode: tools.info("Already built and loaded " + repr(str(path.name))) return True if path in fail_modules: if len(deps) == 0: tools.warning("Was unable to build and load " + repr(str(path.name))) return False # Check for dependency cycle (disallowed as they may mess with the linker) if path in deps: tools.warning("Unable to build and load " + repr(str(path.name)) + ": dependency cycle found") fail_modules.append(path) # Mark as failed return False # Build and load dependencies this_ldflags = list(extra_ldflags) depsfile = path / dependencies_file if depsfile.exists(): for modname in depsfile.read_text().splitlines(): res = build_and_load_one(base_directory / modname, deps + [path]) if res == False: # Unable to build a dependency if len(deps) == 0: tools.warning("Unable to build and load " + repr(str(path.name)) + ": dependency " + repr(modname) + " build and load failed") fail_modules.append(path) # Mark as failed return False elif res == True: # Module and its sub-dependencies was/were built and loaded successfully this_ldflags.append("-Wl,--library=:" + str( (base_directory / modname / (modname + ".so")).resolve())) # List sources sources = [] for subpath in path.iterdir(): if subpath.is_file() and ("").join( subpath.suffixes) in source_suffixes: sources.append(str(subpath)) # Build and load this module try: res = torch.utils.cpp_extension.load( name=path.name, sources=sources, extra_cflags=extra_cflags, extra_cuda_cflags=extra_cuda_cflags, extra_ldflags=this_ldflags, extra_include_paths=extra_include_paths, build_directory=str(path), verbose=debug_mode, is_python_module=is_python_module) if is_python_module: glob[path.name[3:]] = res except Exception as err: tools.warning("Unable to build and load " + repr(str(path.name)) + ": " + str(err)) fail_modules.append(path) # Mark as failed return False done_modules.append(path) # Mark as built and loaded return True
} if not noopts: ticketdata['issue']['fields']['labels'] = TLABELS ticketdata['issue']['fields']['components'] = TCOMP return ticketdata # Ticket Object Without Labels and Components ticket2 = JiraTicket(generate_data(True)) # Ticket Object With Labels and Components ticket = JiraTicket(generate_data()) # Tests fails = 0 passes = 0 tools.info("Starting JIRAPy Tests") tools.sep() # Check Ticket Summary if ticket.summary == TSUM: tools.info(tools.padstat("TEST Ticket Summary", "SUCCESS")) passes += 1 else: tools.error(tools.padstat("TEST Ticket Summary", "FAILED")) fails += 1 # Check Ticket Description if ticket.description == TDESC: tools.info(tools.padstat("TEST Ticket Description", "SUCCESS")) passes += 1 else:
import tools import graphics #week = ["Segunda", "Terca", "Quarta", "Quinta", "Sexta"] #month = ["01", "02", "03", "04", "05", "06", "07", "08", "09", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31" ] #year = ["Janeiro", "Fevereiro", "Marco", "Abril", "Maio", "Junho", "Julho", "Agosto", "Setembro", "Outubro", "Novembro", "Dezembro"] date_full = date.datetime.now().strftime("%Y-%m-%d") date_day = date.datetime.now().strftime("%d") date_month = date.datetime.now().strftime("%m") date_year = date.datetime.now().strftime("%Y") filename = "7" compare_from = "BRL" compare_to = "EUR" start_date = "2019-01-01" #end_date = date_full end_date = "2019-10-11" responseJSON = currency.periodData(compare_from, start_date, date_full) rates = responseJSON['rates'] #generate_graph("1", rates, "BRL", "CAD") graphics.generate('Images/' + filename, rates, compare_from, compare_to) dados = tools.info(rates, compare_from, compare_to) print("Minimo: ", dados[0], '\nMedia: ', dados[1], '\nMaximo: ', dados[2]) tools.fibonacci_graph(dados[0], dados[2], 200)
# ----------------------------------------------------------------------------- # From Numpy to Python # Copyright (2017) Nicolas P. Rougier - BSD license # More information at https://github.com/rougier/numpy-book # ----------------------------------------------------------------------------- import numpy as np from tools import info if __name__ == '__main__': Z = np.arange(4 * 4).reshape(4, 4) Z = np.array(Z, order='C') info(Z) Z = np.array(Z, order='F') info(Z)
from multiprocessing.pool import ThreadPool, Pool import json import librosa import numpy as np from tensorflow.python.keras.preprocessing import sequence from CONSTANT import NUM_MFCC, FFT_DURATION, HOP_DURATION import tools print("tools file={}".format(tools.__file__)) from tools import timeit, log, info import time import tensorflow info("tensorflow version = {}".format(tensorflow.__version__)) def ohe2cat(label): return np.argmax(label, axis=1) @timeit def get_max_length(x, ratio=0.95): """ Get the max length cover 95% data. """ lens = [len(_) for _ in x] max_len = max(lens) min_len = min(lens) lens.sort()
args.learning_rate, args.learning_rate_args, head="") tools.print_args("optimizer", args.optimizer, args.optimizer_args, head="") tools.print_args("attack", args.attack, args.attack_args, head="") if exit_pending: exit(0) # ---------------------------------------------------------------------------- # # Cluster management tools.success("Cluster analysis and allocation phase...") with tools.Context("cluster", "info"): # Cluster manager instantiation if args.server: # Assume the role of the parameter server, which allows the use of 'tf.py_func' tools.info("Acting as node " + args.ps_job_name + ":0 in the cluster") if args.MPI: proto = 'grpc+mpi' print( "Using MPI..........................................................." ) sys.stdout.flush() else: proto = 'grpc' args.client = tf.train.Server(tf.train.ClusterSpec(args.server), job_name=args.ps_job_name, task_index=0, start=True, protocol=proto).target cluster_mgr = cluster.Manager(args.client, devs=device_prefs,
def evaluation_thread(coord, mngr, sess, path, first): """ Evaluation thread entry point. Args: coord Coordinator to use mngr Graph manager to use sess Session to use path Path to the storage file first Event notifying first evaluation is complete """ # Check arguments global args delta = args.evaluation_delta period = args.evaluation_period if delta < 0 and period < 0: # Effectively disabled tools.info("Evaluation is effectively disabled") first.set() return last_step = -delta last_time = -period # Open file (if parent exists) if path: path = pathlib.Path(path) try: path.parent.mkdir(parents=True, exist_ok=True) fd = path.open("a") except Exception: fd = None else: fd = None # Evaluate (and save) accuracy with mngr.graph.as_default(): while True: time.sleep(config.thread_idle_delay) step = sess.run(mngr.step) now = time.time() stop = coord.should_stop() if stop or (delta >= 0 and step - last_step >= delta) or ( period >= 0. and now - last_time >= period): accuracies = sess.run(mngr.eval_tns) if fd is not None: line = str(now) + "\t" + str(step) for key, val in accuracies.items(): line += "\t" + key + ":" + str(val) fd.write(line + os.linesep) fd.flush() line = "" for key, val in accuracies.items(): if len(line) > 0: line += ", " line += key + " = " + str(val) tools.info(" Step " + str(step) + ": " + line + " (took " + repr(time.time() - now) + " s)") if first is not None: first.set() first = None last_step = sess.run(mngr.step) last_time = time.time() if stop: break # Close file (if any) if fd is not None: fd.close()
def __init__(self, experiment, aggregator, dev_tuples, optimizer, optimizer_args, learning_rate, learning_rate_args, regularizations=(-1., -1.), trace=False): """ Full graph (training + evaluation) constructor. Args: experiment Experiment instance to use aggregator Aggregator instance to use dev_tuples Tuple of devices (i.e. tuples of strings (job name, task ID, device type, device ID)) for (parameter server, each workers' inference/loss/gradient computation, evaluator) optimizer Optimizer name to use optimizer_args Additional optimizer key-value arguments learning_rate Learning rate name to use learning_rate_args Additional learning rate key-value arguments regularizations Pair of (l1, l2) regularization values, non-positive values for no regularization trace Whether to add trace prints for every important step of the computations """ # Tuple extraction and device name reconstruction ps_tuple, wk_tuples, ev_tuple = dev_tuples ps_device = tools.device_from_tuple(*ps_tuple) wk_jobs = {} # Map job -> taskid -> list of pairs of (devtype, devid) for job, taskid, devtype, devid in wk_tuples: if job in wk_jobs: wk_tasks = wk_jobs[job] if taskid in wk_tasks: wk_tasks[taskid].append((devtype, devid)) else: wk_tasks[taskid] = [(devtype, devid)] else: wk_jobs[job] = {taskid: [(devtype, devid)]} # Graph building graph = tf.Graph() with graph.as_default(): with tf.name_scope("ps/"): with tf.device(ps_device): # Instantiate global step counter, optimizer and learning rate global_step = tf.train.create_global_step() learning_rate = build(learning_rates, "learning rate decay", learning_rate, learning_rate_args, global_step=global_step) optimizer = build(optimizers, "optimizer", optimizer, optimizer_args, learning_rate=learning_rate) tf.summary.scalar("learning_rate", learning_rate) # Create workers' gradient computation totlosses = [ ] # List of losses, for summary (and printing) only gradients = [ ] # List of gradients, one per non-Byzantine worker flatmap = None # Flat map used to flatten the gradients coherently with tf.name_scope("workers/"): for job, wk_tasks in wk_jobs.items(): for taskid, models in wk_tasks.items(): device_dataset = tools.device_from_tuple( job, taskid, "CPU", "*") device_models = [ replica_device_setter( ps_device, tools.device_from_tuple( job, taskid, devtype, devid)) for devtype, devid in models ] # Compute losses losses = experiment.losses(device_dataset, device_models, trace=trace) totlosses += losses # Compute gradients for i in range(len(device_models)): with tf.device(device_models[i]): loss = losses[i] for norm in [1, 2]: strength = regularizations[ norm - 1] # 'norm - 1' is just a basic numbering trick... if strength > 0.: loss = loss + strength * regularization( norm) if trace: loss = tools.trace_graph( loss, "Worker " + str(len(gradients)) + ": loss computation") grad_vars = optimizer.compute_gradients( loss) if flatmap is None: gradient, flatmap = flatten( grad_vars) else: gradient = flatten( grad_vars, flatmap) if trace: gradient = tools.trace_graph( gradient, "Worker " + str(len(gradients)) + ": gradient computation") gradients.append(gradient) total_loss = tf.add_n(totlosses, name="total_loss") tools.info( "Created workers' dataset, inference, loss and gradient computation nodes" ) # Aggregate and apply the workers' gradients with tf.name_scope("GAR"): time1 = time.time() aggregated = aggregator.aggregate(gradients) time2 = time.time() #print("ms=$$$$$$$$$$$$$$$$$$$$$$",(time2-time1)*1000) if trace: aggregated = tools.trace_graph( aggregated, "Master: aggregated gradient computation") apply_op = optimizer.apply_gradients( inflate(aggregated, mapflat(flatmap)), global_step=global_step) if trace: apply_op = tools.trace_graph( apply_op, "Master: aggregated gradient application") tf.add_to_collection(tf.GraphKeys.UPDATE_OPS, apply_op) tools.info( "Created parameter server's gradient aggregation and application nodes" ) # Create accuracy computation with tf.name_scope("eval/"): device_dataset = tools.device_from_tuple( ev_tuple[0], ev_tuple[1], "CPU", "*") device_model = tools.device_from_tuple(*ev_tuple) accuracy_tns = experiment.accuracy( device_dataset, [replica_device_setter(ps_device, device_model)], trace=trace) for key, val in accuracy_tns.items(): tf.add_to_collection( tf.GraphKeys.SUMMARIES, tf.summary.scalar("eval-" + key, val)) tools.info( "Created evaluator's dataset, inference and accuracy computation nodes" ) # Global summary protocol buffer summary_tn = tf.summary.merge( list(set(tf.get_collection(tf.GraphKeys.SUMMARIES)))) # Full initialization operation rsrc_init_ops = [] for resource in tf.get_collection(tf.GraphKeys.RESOURCES): rsrc_init_ops.append(resource.initializer) for resource in tf.get_collection( tf.GraphKeys.LOCAL_RESOURCES): rsrc_init_ops.append(resource.initializer) init_op = tf.group( tf.variables_initializer(tf.global_variables() + tf.local_variables()), tf.tables_initializer(), *rsrc_init_ops) # Build the training operation with tf.control_dependencies( tf.get_collection(tf.GraphKeys.UPDATE_OPS)): train_tn = tf.identity(total_loss, name="train_tn") # Finalization self.graph = graph self.step = global_step self.rate = learning_rate self.optimizer = optimizer self.total_loss = total_loss self.summary_tn = summary_tn self.init_op = init_op self.train_tn = train_tn self.eval_tns = accuracy_tns