def main(): metadata = util.get_metadata() # Exctract mel spectrum features F = audioFeatureExtraction.dirWavFeatureExtraction("songs", 1.0, 1.0, 0.025, 0.025) with open("F", "wb") as f: pickle.dump(F, f)
def main(): dataset = None if len(sys.argv) > 1: dataset = sys.argv[1] metadata = util.get_metadata((dataset + "_metadata") if dataset else None) mfcc = dict(zip([metadata[i][0] for i in range(1, len(metadata))], util.load_features((dataset + "_features") if dataset else None))) # Load pyAudioAnalysis features with open("F", "rb") as f: feats, files = pickle.load(f, encoding="latin1") files = [f.split(".")[0].split("XC")[-1] for f in files] F = dict(zip(files, feats)) full_dataset = True for item in metadata[1:]: if item[0] not in F: full_dataset = False X2, X3 = [], [] if full_dataset: X3 = [np.concatenate((F[item[0]], mfcc[item[0]]), axis=0) for item in metadata[1:]] X2 = [F[item[0]] for item in metadata[1:]] X1 = [mfcc[item[0]] for item in metadata[1:]] for X in [X1, X2]: NUM_RUNS = 50 Y = util.load_labels((dataset + "_metadata") if dataset else None) samples = range(len(X))#range(1, len(X), 12)#random.sample(range(len(X)), 25) samps = samples#range(len(X))#samples x = [X[i] for i in samps] y = [Y[i] for i in samples] N_ESTIMATORS = 20 avg_mat = None for run in range(NUM_RUNS): clf = RandomForestClassifier(n_estimators=N_ESTIMATORS, max_features=20, oob_score=True).fit(X, Y) similarity = dict() for dt in clf.estimators_: leaves = dt.apply(X) for i in samps: for j in samps: if leaves[i] == leaves[j]: similarity[(i,j)] = similarity.get((i,j), 0) + 1 mat = np.array([[(1.0 - similarity.get((i,j), 0)/N_ESTIMATORS)**2 for j in samples] for i in samples]) mat = squareform(mat) if avg_mat is None: avg_mat = mat else: avg_mat = np.add(avg_mat, mat) avg_mat = avg_mat / NUM_RUNS linkage_matrix = linkage(avg_mat, "single") matplotlib.rcParams['lines.linewidth'] = 2.5 dendrogram(linkage_matrix, color_threshold=0.8, labels=y, show_leaf_counts=True) plt.xlabel("label") plt.ylabel("distance") plt.show()
def metadata() -> Response: # Validate path and construct an absolute path try: path = get_complete_path(request.args.get("path", "/"), root) except Exception as e: msg = "Error in get_complete_path: {}".format(str(e)) app.logger.warning(msg) return error("Could not browse") try: result = json.dumps(get_metadata(path)) return Response(result, mimetype='application/json') except Exception as e: msg = "Error getting metadata: {}".format(str(e)) app.logger.error(msg) return error("Could not retrieve metadata")
def __init__(self, path, parent=None): self.path = path if parent: self.parent = parent if util.is_supported(root_join(path)): self.supported = True tags = util.get_metadata(root_join(path)) self.artist = tags['artist'] self.album_artist = tags['performer'] if not self.album_artist: self.album_artist = self.artist self.album = tags['album'] self.track = tags['title'] else: self.supported = False self.size = os.path.getsize(root_join(self.path)) self.rel_path = root_join(self.path) self.change_alerted = False
def __init__(self, path, parent = None): self.path = path if parent: self.parent = parent if util.is_supported(root_join(path)): self.supported = True tags = util.get_metadata(root_join(path)) self.artist = tags['artist'] self.album_artist = tags['performer'] if not self.album_artist: self.album_artist = self.artist self.album = tags['album'] self.track = tags['title'] else: self.supported = False self.size = os.path.getsize(root_join(self.path)) self.rel_path = root_join(self.path) self.change_alerted = False
def __init__(self, name, versionrange='', pkfile=None, search=True): metadata = None self.pkfile = pkfile or cf['pkfile'] self.name, self.version = util.separate_fullname(name) if not self.version and search: if versionrange: match = util.get_match_version( self.name, versionrange, self.pkfile ) else: match = util.get_latest_version(self.name, self.pkfile) else: match = name self.name, self.version = util.separate_fullname(match) self.fullname = util.format_pk_name(self.name, self.version) self.metadata = util.get_metadata(self.fullname) self.shortname = self.name self.base_path = join(cf['home'], self.fullname) self.install_path = join(self.base_path, "install") self.build_path = join(self.base_path, "build") self.log_path = join(self.base_path, "log") self.log = join(self.log_path, "chip.log") self.pkgfile = join(self.base_path, 'package.json') self.pkgpy = join(self.build_path, 'package.py') self.ptype = self.metadata.get('type') self.url = self.metadata.get('url') self.requirements = self.metadata.get("requires") self.data = self.metadata.get('data') self.env = {} self.activated = False self.deps = None
def main(): dataset = None if len(sys.argv) > 1: dataset = sys.argv[1] metadata = util.get_metadata((dataset + "_metadata") if dataset else None) mfcc = dict(zip([metadata[i][0] for i in range(1, len(metadata))], util.load_features((dataset + "_features") if dataset else None))) feats, files = None,None with open("F", "rb") as f: feats, files = pickle.load(f, encoding="latin1") files = [f.split(".")[0].split("XC")[-1] for f in files] F = dict(zip(files, feats)) full_dataset = True for item in metadata[1:]: if item[0] not in F: full_dataset = False X2, X3 = [], [] if full_dataset: X3 = [np.concatenate((F[item[0]], mfcc[item[0]]), axis=0) for item in metadata[1:]] X2 = [F[item[0]] for item in metadata[1:]] X1 = [mfcc[item[0]] for item in metadata[1:]] Y = util.load_labels((dataset + "_metadata") if dataset else None)#"bbsmd.csv") for X in [X1, X2] if full_dataset else [X1,]: print("------") classifiers = [ RandomForestClassifier(n_estimators=50, max_features=15, oob_score=True), KNeighborsClassifier(3), svm.SVC(kernel='linear', C=1), svm.SVC(gamma=2, C=1), GaussianNB() ] for clf in classifiers: scores = cross_val_score(clf, X, Y, cv=5) score = sum(scores)/len(scores) print(type(clf).__name__, "\t", score)
def main(): threads = [] metadata = util.get_metadata() q = Queue(maxsize=7) oq = Queue() X = [] for item in metadata[1:]: q.put(1) time.sleep(0.1) t = threading.Thread(target=process, args=(item[0], q, oq)) t.start() threads.append(t) print("{}/{}".format(oq.qsize(), len(metadata) - 1), end='\r', flush=True) for thread in threads: print("{}/{}".format(oq.qsize(), len(metadata) - 1), end='\r', flush=True) thread.join() features = dict() while not oq.empty(): item_id, feature = oq.get() features[item_id] = feature for item in metadata[1:]: if item[0] in features and features[item[0]] is not None: X.append(features[item[0]]) else: # Note: Items with missing features will need to be removed from metadata.csv to preserve (X,Y) order print("Error getting features for {}".format(item[0])) with open("features", "wb") as f: pickle.dump(X, f)
try: client = DropboxClient(session['access_token']) except ErrorResponse, e: abort(401) account = client.account_info() session['username'] = account['display_name'] quota = float(account['quota_info']['quota']) shared = float(account['quota_info']['shared']) normal = float(account['quota_info']['normal']) total_bytes = int(normal + shared) session['used'] = human_readable(normal + shared) session['quota'] = human_readable(quota) job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes) job.meta['access_token'] = session['access_token']; job.save() update_progress(job, 0, "/") session['job'] = job.key return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used']) @app.route('/display_result') def display_result(): if 'job' not in session: return jsonify(ready=False, progress=0) job = get_job_from_key(session['job'], conn) if job is None: abort(400)
try: client = DropboxClient(session['access_token']) except ErrorResponse, e: abort(401) account = client.account_info() session['username'] = account['display_name'] quota = float(account['quota_info']['quota']) shared = float(account['quota_info']['shared']) normal = float(account['quota_info']['normal']) total_bytes = int(normal + shared) session['used'] = human_readable(normal + shared) session['quota'] = human_readable(quota) job = q.enqueue(walk, client, get_metadata(client, '/'), 0, total_bytes) job.meta['access_token'] = session['access_token'] job.save() update_progress(job, 0, "/") session['job'] = job.key return render_template('display.html', username=session['username'], quota=session['quota'], used=session['used']) @app.route('/display_result') def display_result(): if 'job' not in session: return jsonify(ready=False, progress=0)
def main(): dataset = None if len(sys.argv) > 1: dataset = sys.argv[1] metadata = util.get_metadata((dataset + "_metadata") if dataset else None) mfcc = dict( zip([metadata[i][0] for i in range(1, len(metadata))], util.load_features((dataset + "_features") if dataset else None))) # Load pyAudioAnalysis features with open("F", "rb") as f: feats, files = pickle.load(f, encoding="latin1") files = [f.split(".")[0].split("XC")[-1] for f in files] F = dict(zip(files, feats)) full_dataset = True for item in metadata[1:]: if item[0] not in F: full_dataset = False X2, X3 = [], [] if full_dataset: X3 = [ np.concatenate((F[item[0]], mfcc[item[0]]), axis=0) for item in metadata[1:] ] X2 = [F[item[0]] for item in metadata[1:]] X1 = [mfcc[item[0]] for item in metadata[1:]] #X = util.load_features((dataset + "_features") if dataset else None) for X in [X1, X2]: labels = [] avg_mat = None all_sims = dict() Y = util.load_labels((dataset + "_metadata") if dataset else None) samples = range( len(X)) #range(1, len(X), 12)#random.sample(range(len(X)), 25) samps = range(len(X)) #samples x = [X[i] for i in samps] y = [Y[i] for i in samples] N_ESTIMATORS = 80 NUM_RUNS = 5 for run in range(NUM_RUNS): clf = RandomForestClassifier(n_estimators=N_ESTIMATORS, max_features=25, oob_score=True).fit(X, Y) similarity = dict() for dt in clf.estimators_: leaves = dt.apply(X) for i in samps: for j in samps: if leaves[i] == leaves[j]: similarity[(i, j)] = similarity.get( (i, j), 0) + (1 / N_ESTIMATORS) species_similarity = dict() for i in samps: for j in samps: species_similarity[(Y[i], Y[j])] = species_similarity.get( (Y[i], Y[j]), 0) + similarity.get( (i, j), 0)**2 / (Y.count(Y[i]) * Y.count(Y[j])) for k in species_similarity: species_similarity[k] = species_similarity[k]**(0.5) labels = clf.classes_ for i in range(len(labels)): normal = species_similarity[(labels[i], labels[i])] for j in range(i, len(labels)): k = labels[i], labels[j] species_similarity[k] /= normal species_similarity[(k[1], k[0])] = species_similarity[k] all_sims[k] = all_sims.get( k, 0) + species_similarity[k] / NUM_RUNS mat = np.array([[(1.0 - species_similarity.get((i, j), 0))**2 for j in labels] for i in labels]) print(mat) mat = squareform(mat) if avg_mat is None: avg_mat = mat else: avg_mat = np.add(avg_mat, mat) avg_mat = avg_mat / NUM_RUNS print(avg_mat) for k in all_sims: if k[0] != k[1] and all_sims[k] > 0.1: print("{}\t{}\t{}".format(k[0], k[1], all_sims[k])) linkage_matrix = linkage(avg_mat, "single") matplotlib.rcParams['lines.linewidth'] = 2.5 dendrogram(linkage_matrix, color_threshold=0.65, labels=labels, show_leaf_counts=True) plt.xlabel("label") plt.ylabel("distance") plt.show()
def publish_initiator_pair(candidate_pair, publish_job_data, orbit_data, aoi_id, wuid=None, job_num=None): logger.info("\nPUBLISH CANDIDATE PAIR : %s" % candidate_pair) master_ids_str = "" slave_ids_str = "" job_priority = 0 master_acquisitions = candidate_pair["master_acqs"] slave_acquisitions = candidate_pair["slave_acqs"] union_geojson = candidate_pair["intersect_geojson"] starttime = candidate_pair["starttime"] endtime = candidate_pair["endtime"] orbitNumber = candidate_pair['orbitNumber'] direction = candidate_pair['direction'] platform = orbit_data['platform'] logger.info("publish_data : orbitNumber : %s, direction : %s" % (orbitNumber, direction)) project = publish_job_data["project"] ''' spyddder_extract_version = job_data["spyddder_extract_version"] standard_product_ifg_version = job_data["standard_product_ifg_version"] acquisition_localizer_version = job_data["acquisition_localizer_version"] standard_product_localizer_version = job_data["standard_product_localizer_version"] ''' #job_data["job_type"] = job_type #job_data["job_version"] = job_version job_priority = publish_job_data["job_priority"] logger.info("MASTER : %s " % master_acquisitions) logger.info("SLAVE : %s" % slave_acquisitions) logger.info("project: %s" % project) #version = get_version() version = "v2.0.0" # set job type and disk space reqs disk_usage = "300GB" # query doc uu = UrlUtils() es_url = uu.rest_url grq_index_prefix = "grq" rest_url = es_url[:-1] if es_url.endswith('/') else es_url url = "{}/{}/_search?search_type=scan&scroll=60&size=100".format( rest_url, grq_index_prefix) # get metadata master_md = { i: util.get_metadata(i, rest_url, url) for i in master_acquisitions } #logger.info("master_md: {}".format(json.dumps(master_md, indent=2))) slave_md = { i: util.get_metadata(i, rest_url, url) for i in slave_acquisitions } #logger.info("slave_md: {}".format(json.dumps(slave_md, indent=2))) # get tracks track = util.get_track(master_md) logger.info("master_track: {}".format(track)) slave_track = util.get_track(slave_md) logger.info("slave_track: {}".format(slave_track)) if track != slave_track: raise RuntimeError( "Slave track {} doesn't match master track {}.".format( slave_track, track)) ref_scence = master_md if len(master_acquisitions) == 1: ref_scence = master_md elif len(slave_acquisitions) == 1: ref_scence = slave_md elif len(master_acquisitions) > 1 and len(slave_acquisitions) > 1: raise RuntimeError("Single Scene Reference Required.") dem_type = util.get_dem_type(master_md) # get dem_type dem_type = util.get_dem_type(master_md) logger.info("master_dem_type: {}".format(dem_type)) slave_dem_type = util.get_dem_type(slave_md) logger.info("slave_dem_type: {}".format(slave_dem_type)) if dem_type != slave_dem_type: dem_type = "SRTM+v3" job_queue = "%s-job_worker-large" % project logger.info("submit_localize_job : Queue : %s" % job_queue) #localizer_job_type = "job-standard_product_localizer:%s" % standard_product_localizer_version logger.info("master acq type : %s of length %s" % (type(master_acquisitions), len(master_acquisitions))) logger.info("slave acq type : %s of length %s" % (type(slave_acquisitions), len(master_acquisitions))) if type(project) is list: project = project[0] for acq in sorted(master_acquisitions): #logger.info("master acq : %s" %acq) if master_ids_str == "": master_ids_str = acq else: master_ids_str += " " + acq for acq in sorted(slave_acquisitions): #logger.info("slave acq : %s" %acq) if slave_ids_str == "": slave_ids_str = acq else: slave_ids_str += " " + acq list_master_dt, list_slave_dt = util.get_scene_dates_from_metadata( master_md, slave_md) list_master_dt_str = list_master_dt.strftime('%Y%m%dT%H%M%S') list_slave_dt_str = list_slave_dt.strftime('%Y%m%dT%H%M%S') #ACQ_LIST_ID_TMPL = "acq_list-R{}_M{:d}S{:d}_TN{:03d}_{:%Y%m%dT%H%M%S}-{:%Y%m%dT%H%M%S}-{}-{}" id_hash = hashlib.md5( json.dumps([job_priority, master_ids_str, slave_ids_str, dem_type]).encode("utf8")).hexdigest() ''' id_hash = hashlib.md5(json.dumps([ ACQ_LIST_ID_TMPL, m, master_orbit_urls[-1], slave_zip_urls[-1], slave_orbit_urls[-1], projects[-1], filter_strength, dem_type ]).encode("utf8")).hexdigest() ''' orbit_type = 'poeorb' aoi_id = aoi_id.strip().replace(' ', '_') id = ACQ_LIST_ID_TMPL.format('M', len(master_acquisitions), len(slave_acquisitions), track, list_master_dt, list_slave_dt, orbit_type, id_hash[0:4], aoi_id) #id = "acq-list-%s" %id_hash[0:4] prod_dir = id os.makedirs(prod_dir, 0o755) met_file = os.path.join(prod_dir, "{}.met.json".format(id)) ds_file = os.path.join(prod_dir, "{}.dataset.json".format(id)) logger.info("\n\nPUBLISHING %s : " % id) #with open(met_file) as f: md = json.load(f) md = {} md['id'] = id md['project'] = project, md['master_acquisitions'] = master_ids_str md['slave_acquisitions'] = slave_ids_str ''' md['spyddder_extract_version'] = spyddder_extract_version md['acquisition_localizer_version'] = acquisition_localizer_version md['standard_product_ifg_version'] = standard_product_ifg_version ''' md['job_priority'] = job_priority md['_disk_usage'] = disk_usage md['soft_time_limit'] = 86400 md['time_limit'] = 86700 md['dem_type'] = dem_type md['track_number'] = track md['starttime'] = "%sZ" % starttime md['endtime'] = "%sZ" % endtime md['union_geojson'] = union_geojson md['master_scenes'] = master_acquisitions md['slave_scenes'] = slave_acquisitions md['orbitNumber'] = orbitNumber md['direction'] = direction md['platform'] = platform md['list_master_dt'] = list_master_dt_str md['list_slave_dt'] = list_slave_dt_str md['tags'] = aoi_id try: geom = ogr.CreateGeometryFromJson(json.dumps(union_geojson)) env = geom.GetEnvelope() bbox = [ [env[3], env[0]], [env[3], env[1]], [env[2], env[1]], [env[2], env[0]], ] md['bbox'] = bbox except Exception as e: logger.warn("Got exception creating bbox : {}".format(str(e))) traceback.print_exc() #logger.warn("Traceback: {}".format(traceback.format_exc())) with open(met_file, 'w') as f: json.dump(md, f, indent=2) print("creating dataset file : %s" % ds_file) util.create_dataset_json(id, version, met_file, ds_file)