def ExtractFeature(feature, target_spec_ids = None, data_dir = './repaired/*.pkl', nproc = 1): '''get the designated feature and related info from data ''' input_files = ExpandWildcard(data_dir) reducer = FeatureExtractor(feature, target_spec_ids) engine = mr.ReduceEngine(reducer, nproc) result = engine.Start(input_files) return result
dummy, tag = dr7pca.DoPCA(None, feature, 0) pca_model_file = "pca_model_{0}.pkl".format(tag) if os.path.exists(pca_model_file): log.info( 'Using existing PCA model from {0}'.format(pca_model_file)) pca_model = ml.PCA(pca_model_file) else: pca_model = dr7pca.DoPCA(input_files, feature, 0, poolsize)[0] pca_model.Save(pca_model_file) pca_model.R = 2 # !! else: pca_model = None # clustering reducer = ClusteringReducer(feature, n_clusters, sample_rate, pca_model) engine = mr.ReduceEngine(reducer, poolsize) centers, distortion = engine.Start(input_files) run += '[{0:.3}]'.format(distortion) reducer = QuantizationReducer(feature, centers, pca_model if dopca else None) engine = mr.ReduceEngine(reducer, poolsize) result = engine.Start(input_files) coord = result['feature'].T f = figure() scatter(coord[0], coord[1], s=20, c=result['cluster_id']) show() # SavePickle("{0}_{1}.pkl".format(output_file, run), result)
# zmax = 0.36 log.info('Rebin: raw wave in [%0.1f,%0.1f], c in [%0.5f,%0.5f], z in [%0.2f,%0.2f], No O line = %s' % ( 10**rebin_c0, 10**(rebin_c0 + rebin_c1*rebin_nbin), rebin_c0, rebin_c0 + rebin_c1*rebin_nbin, zmin, zmax, remove_sky_absorption)) # fields to extract from the SDSS DB fields = ['specObjID', 'mjd', 'plate', 'fiberID', 'bestObjID', 'specClass', 'fiberMag_g', 'fiberMag_r', 'fiberMag_i', 'fiberMag_u', 'fiberMag_z'] # retrieving the plate list log.info('Retrieving object list...') db = GetDB(input_dir + '/sdss.db3', 1000) cmd = "SELECT {0}, fits_url from object_list where {1}".format(','.join(fields), selector) cur = db.execute(cmd) rows = [list(r) for r in cur] plates = [r[2] for r in rows] # update the path to the fits file for r in rows: r[-1] = GetFITSPath(input_dir, r[1], r[2], r[3]) log.info('{0} objects found'.format(len(rows))) reducer = PlateReducer(fields, rebin_c0, rebin_c1, rebin_nbin, zmin, zmax, remove_sky_absorption, {1215.67:'Ly\\d\\ga'}) engine = mr.ReduceEngine(reducer, nproc) jobs = zip(plates, rows) nobjs = engine.Start(jobs) log.info("{0} objects processed".format(nobjs))