def get_gal_pa(subject_id): try: p = Pipeline.load('lib/pipelines/{}.json'.format(subject_id)) except FileNotFoundError: drawn_arms = gu.get_drawn_arms(subject_id, gu.classifications) gal, angle = gu.get_galaxy_and_angle(subject_id) pic_array, deprojected_image = gu.get_image(gal, subject_id, angle) p = Pipeline(drawn_arms, phi=angle, ba=gal['PETRO_BA90'], image_size=pic_array.shape[0]) arms = (Arm.load(os.path.join('lib/spiral_arms', f)) for f in os.listdir('lib/spiral_arms') if re.match('^{}-[0-9]+.pickle$'.format(subject_id), f)) arms = [arm for arm in arms if not arm.FLAGGED_AS_BAD] pa = np.zeros(len(arms)) sigma_pa = np.zeros(pa.shape) length = np.zeros(pa.shape) for i, arm in enumerate(arms): pa[i] = arm.pa length[i] = arm.length sigma_pa[i] = arm.sigma_pa if len(arms) == 0: return (np.nan, np.nan, np.stack((np.tile(subject_id, len(pa)), pa, sigma_pa, length), axis=1)) combined_pa = (pa * length).sum() / length.sum() combined_sigma_pa = np.sqrt((length**2 * sigma_pa**2).sum()) / length.sum() return ( combined_pa, combined_sigma_pa, np.stack((np.tile(subject_id, len(pa)), pa, sigma_pa, length), axis=1), )
def get_log_spirals(subject_id, gal=None, angle=None, pic_array=None, bar_length=0): drawn_arms = gu.get_drawn_arms(subject_id, gu.classifications) if gal is None or angle is None: gal, angle = gu.get_galaxy_and_angle(subject_id) if pic_array is None: pic_array, deprojected_image = gu.get_image(gal, subject_id, angle) path_to_subject = './lib/distances/subject-{}.npy'.format(subject_id) distances = gu.get_distances(subject_id) if distances is None or distances.shape[0] != len( drawn_arms) or not os.path.exists(path_to_subject): # print('\t- Calculating distances') distances = metric.calculate_distance_matrix(drawn_arms) np.save('./lib/distances/subject-{}.npy'.format(subject_id), distances) p = Pipeline(drawn_arms, phi=angle, ba=gal['PETRO_BA90'], image_size=pic_array.shape[0], distances=distances) arms = p.get_arms(clean_points=True, bar_length=bar_length) # print('Identified {} spiral arms'.format(len(arms))) return [arm.reprojected_log_spiral for arm in arms]
def make_arm_plots(): outfile = 'lib/duplicate_comb_spirals' bar = Bar('Plotting arms', max=len(dr8ids), suffix='%(percent).1f%% - %(eta)ds') arm_loc = 'lib/duplicate_spiral_arms' for i in range(len(dr8ids)): original_id = ss_ids[i] gal, angle = gu.get_galaxy_and_angle(original_id) pic_array, _ = gu.get_image(gal, original_id, angle) arms = [ Arm.load(os.path.join(arm_loc, f)) for f in os.listdir(arm_loc) if re.match('^{}-[0-9]+.pickle$'.format(dr8ids[i]), f) ] plt.figure(figsize=(8, 8)) plt.imshow(pic_array, cmap='gray') for i, arm in enumerate(arms): plt.plot( *arm.reprojected_log_spiral.T, c=('C2' if not arm.FLAGGED_AS_BAD else 'C1') ) plt.savefig(os.path.join(outfile, '{}.png'.format(original_id))) plt.close() bar.next() bar.finish()
def get_best_classification(subject_id, should_plot=False, should_save=False): # grab all the required metadata for this galaxy psf = gu.get_psf(subject_id) diff_data = gu.get_image_data(subject_id) pixel_mask = 1 - np.array(diff_data['mask'])[::-1] galaxy_data = np.array(diff_data['imageData'])[::-1] size_diff = diff_data['width'] / diff_data['imageWidth'] def _lf(rendered_model, y=galaxy_data): Y = rg.convolve2d(rendered_model, psf, mode='same', boundary='symm') * pixel_mask return mean_squared_error(Y.flatten(), 0.8 * (y * pixel_mask).flatten()) classifications = gu.classifications.query( 'subject_ids == {}'.format(subject_id)) annotations = classifications['annotations'].apply(json.loads) models = annotations.apply(pa.parse_annotation, size_diff=size_diff) rendered_models = models.apply(rg.calculate_model, args=(diff_data['width'], )) scores = rendered_models.apply(_lf) best_index = scores.idxmin() best_cls = classifications.loc[best_index] best_model = models.loc[best_index] best_rendered_model = rendered_models.loc[best_index] if should_plot: gal, angle = gu.get_galaxy_and_angle(subject_id) pic_array, deprojected_image = gu.get_image(gal, subject_id, angle) # arcseconds per pixel for zooniverse image pix_size = pic_array.shape[0] / (gal['PETRO_THETA'].iloc[0] * 4) # arcseconds per pixel for galaxy data pix_size2 = galaxy_data.shape[0] / (gal['PETRO_THETA'].iloc[0] * 4) imshow_kwargs = { 'cmap': 'gray_r', 'origin': 'lower', 'extent': ( # left of image in arcseconds from centre -pic_array.shape[0] / 2 / pix_size, pic_array.shape[0] / 2 / pix_size, # right... -pic_array.shape[1] / 2 / pix_size, # bottom... pic_array.shape[1] / 2 / pix_size # top... ), } tc, tp = make_transforms(galaxy_data, pix_size2) plot_model(best_rendered_model, galaxy_data, psf, best_model, pixel_mask, imshow_kwargs, tc, tp, best_cls) plt.savefig('best_residual/{}.pdf'.format(subject_id)) plt.close() if should_save: with open('best_annotation/{}.json'.format(subject_id), 'w') as f: f.write(json.dumps(pa.make_json(best_model))) return best_cls
def get_optimized_model(subject_id, mode='best'): gal, angle = gu.get_galaxy_and_angle(subject_id) sep = coords.separation( SkyCoord(ra=gal['RA'] * u.degree, dec=gal['DEC'] * u.degree)) idxmin_sep = np.argmin(sep) if not sep[idxmin_sep] < 1 * u.arcsec: return None pic_array, deprojected_image = gu.get_image(gal, subject_id, angle) psf = gu.get_psf(subject_id) diff_data = gu.get_image_data(subject_id) pixel_mask = 1 - np.array(diff_data['mask'])[::-1] galaxy_data = np.array(diff_data['imageData'])[::-1] size_diff = diff_data['width'] / diff_data['imageWidth'] # arcseconds per pixel for zooniverse image pix_size = pic_array.shape[0] / (gal['PETRO_THETA'].iloc[0] * 4) # arcseconds per pixel for galaxy data pix_size2 = galaxy_data.shape[0] / (gal['PETRO_THETA'].iloc[0] * 4) try: if mode == 'agg': agg_fname = os.path.join('..', 'component-clustering', 'cluster-output', '{}.json'.format(subject_id)) with open(agg_fname) as f: model = pa.parse_aggregate_model(json.load(f), size_diff=size_diff) elif mode == 'best': c = gu.classifications.query('classification_id == {}'.format( best_cls[str(subject_id)])).iloc[0] a = json.loads(c['annotations']) model = pa.parse_annotation(a, size_diff=size_diff) else: raise ValueError('Invalid value for "mode"') except KeyError: print('\nFailed: {}'.format(subject_id)) return None no_spiral_model = deepcopy(model) no_spiral_model['spiral'] = [] mf_nosp = ModelFitter(no_spiral_model, galaxy_data, psf, pixel_mask) md_nosp = mf_nosp.model try: new_nosp_model, res = mf_nosp.fit(options={'maxiter': 100}) except ValueError: print('\nCould not fit: {}'.format(subject_id)) return None m0_nosp = Model(no_spiral_model, galaxy_data, psf, pixel_mask) m1_nosp = Model(new_nosp_model, galaxy_data, psf, pixel_mask) return (subject_id, m0_nosp, m1_nosp, sd.iloc[idxmin_sep], pix_size2)
s = clf.score( X_test[:, :-1], t_test, sample_weight=point_weights[test] ) params.append(clf.coef_) score += s / n_splits return score, params if __name__ == '__main__': chosenId = 21097008 # chosenId = 21686558 gal, angle = gu.get_galaxy_and_angle(chosenId) pic_array, deprojected_image = gu.get_image( gal, chosenId, angle ) drawn_arms = gu.get_drawn_arms(chosenId, gu.classifications) galaxy_object = GalaxySpirals( drawn_arms, ba=gal['SERSIC_BA'].iloc[0], phi=-angle ) try: distances except NameError: distances = galaxy_object.calculate_distances() db = galaxy_object.cluster_lines(distances)
plt.title('Combined galaxy') plt.imshow(pic_array, origin='lower', cmap='gray_r') ax = plt.gca() for p in patches: ax.add_patch(p) plt.axis('off') if outfile is not None: plt.savefig(outfile) if __name__ == "__main__": sid_list = sorted(np.loadtxt('lib/subject-id-list.csv', dtype='u8')) to_iter = sid_list for subject_id in tqdm(to_iter): gal, angle = gu.get_galaxy_and_angle(subject_id) pic_array, deprojected_image = gu.get_image(gal, subject_id, angle) pix_size = pic_array.shape[0] / (gal['PETRO_THETA'].iloc[0] * 4 ) # pixels per arcsecond disk_res, bulge_res, bar_res = cluster_components(subject_id) spirals = get_log_spirals(subject_id, gal=gal, angle=angle, pic_array=pic_array, bar_length=10) xtick_labels = np.linspace(-100, 100, 11).astype(int) xtick_positions = xtick_labels * pix_size + pic_array.shape[0] / 2 xtick_mask = (xtick_positions > 0) & (xtick_positions < pic_array.shape[0])
import numpy as np import gzbuilder_analysis.parsing as parsing import gzbuilder_analysis.spirals as spirals from scipy.integrate import odeint from scipy.optimize import minimize import lib.galaxy_utilities as gu subject_id = 20902040 galaxy_classifcations = gu.classifications.query( 'subject_ids == {}'.format(subject_id)) drawn_arms = spirals.get_drawn_arms(galaxy_classifcations) gal, angle = gu.get_galaxy_and_angle(subject_id) ba = gal['PETRO_BA90'] im = gu.get_image(subject_id) psf = gu.get_psf(subject_id) diff_data = gu.get_diff_data(subject_id) pixel_mask = 1 - np.array(diff_data['mask'])[::-1] galaxy_data = np.array(diff_data['imageData'])[::-1] size_diff = diff_data['width'] / diff_data['imageWidth'] # functions for plotting # tv = lambda v: parsing.transform_val(v, np.array(im).shape[0], gal['PETRO_THETA']) # ts = lambda v: parsing.transform_shape(v, galaxy_data.shape[0], gal['PETRO_THETA']) # ts_a = lambda v: parsing.transform_shape(v, galaxy_data.shape[0], gal['PETRO_THETA']) # imshow_kwargs = dict(cmap='gray', origin='lower', extent=[tv(0), tv(np.array(im).shape[0])]*2) # Swing amplification model (not using sklearn pipelines) def _swing_amplification_dydt(r, theta, b):
def plot_aggregation(subject_id, model=None, cluster_masks=None, arms=None): if model is None or cluster_masks is None or arms is None: print(model) model_path = os.path.join( 'cluster-output', '{}.json'.format(subject_id) ) masks_path = os.path.join('cluster_masks', '{}.npy'.format(subject_id)) if not (os.path.exists(model_path) and os.path.exists(masks_path)): return with open(model_path) as f: model = json.load(f) with open(masks_path) as f: cluster_masks = np.load(f) arms = get_spiral_arms(subject_id, should_recreate=False) annotations = gu.classifications[ gu.classifications['subject_ids'] == subject_id ]['annotations'].apply(json.loads) models = annotations\ .apply(ash.remove_scaling)\ .apply(pa.parse_annotation)\ .apply(sanitize_model) spirals = models.apply(lambda d: d.get('spiral', None)) geoms = pd.DataFrame( models.apply(get_geoms).values.tolist(), columns=('disk', 'bulge', 'bar') ) logsps = [arm.reprojected_log_spiral for arm in arms] disk_cluster_geoms = geoms['disk'][cluster_masks[0]] bulge_cluster_geoms = geoms['bulge'][cluster_masks[1]] bar_cluster_geoms = geoms['bar'][cluster_masks[2]] aggregate_disk_geom = ash.make_ellipse(model['disk']) aggregate_bulge_geom = ash.make_ellipse(model['bulge']) aggregate_bar_geom = ash.make_box(model['bar']) gal, angle = gu.get_galaxy_and_angle(subject_id) pic_array, _ = gu.get_image(gal, subject_id, angle) def ts(s): return ash.transform_shape(s, pic_array.shape[0], gal['PETRO_THETA'].iloc[0]) def tv(v): return ash.transform_val(v, pic_array.shape[0], gal['PETRO_THETA'].iloc[0]) imshow_kwargs = { 'cmap': 'gray', 'origin': 'lower', 'extent': [tv(0), tv(pic_array.shape[0])]*2, } fig, ((ax0, ax1), (ax2, ax3)) = plt.subplots( ncols=2, nrows=2, figsize=(10, 10), sharex=True, sharey=True ) ax0.imshow(pic_array, **imshow_kwargs) for comp in geoms['disk'].values: if comp: ax0.add_patch( PolygonPatch(ts(comp), fc='C0', ec='k', alpha=0.2, zorder=3) ) ax1.imshow(pic_array, **imshow_kwargs) for comp in geoms['bulge'].values: if comp: ax1.add_patch( PolygonPatch(ts(comp), fc='C1', ec='k', alpha=0.5, zorder=3) ) ax2.imshow(pic_array, **imshow_kwargs) for comp in geoms['bar'].values: if comp: ax2.add_patch( PolygonPatch(ts(comp), fc='C2', ec='k', alpha=0.2, zorder=3) ) ax3.imshow(pic_array, **imshow_kwargs) for arm in arms: for a in arm.arms: ax3.plot(*tv(a).T) for i, ax in enumerate((ax0, ax1, ax2, ax3)): ax.set_xlim(imshow_kwargs['extent'][:2]) ax.set_ylim(imshow_kwargs['extent'][2:]) if i % 2 == 0: ax.set_ylabel('Arcseconds from center') if i > 1: ax.set_xlabel('Arcseconds from center') fig.subplots_adjust(wspace=0.05, hspace=0.05) plt.savefig('drawn_shapes/{}.pdf'.format(subject_id), bbox_inches='tight') plt.close() fig, ((ax0, ax1), (ax2, ax3)) = plt.subplots( ncols=2, nrows=2, figsize=(10, 10), sharex=True, sharey=True ) ax0.imshow(pic_array, **imshow_kwargs) for comp in disk_cluster_geoms.values: ax0.add_patch( PolygonPatch(ts(comp), fc='C0', ec='k', alpha=0.1, zorder=3) ) if model['disk'] is not None: aggregate_disk_geom = ash.make_ellipse(model['disk']) ax0.add_patch( PolygonPatch(ts(aggregate_disk_geom), fc='C1', ec='k', alpha=0.5, zorder=3) ) ax1.imshow(pic_array, **imshow_kwargs) for comp in bulge_cluster_geoms.values: ax1.add_patch( PolygonPatch(ts(comp), fc='C1', ec='k', alpha=0.1, zorder=3) ) if aggregate_bulge_geom is not None: ax1.add_patch( PolygonPatch(ts(aggregate_bulge_geom), fc='C2', ec='k', alpha=0.5, zorder=3) ) ax2.imshow(pic_array, **imshow_kwargs) for comp in bar_cluster_geoms.values: ax2.add_patch( PolygonPatch(ts(comp), fc='C2', ec='k', alpha=0.1, zorder=3) ) if aggregate_bar_geom is not None: ax2.add_patch( PolygonPatch(ts(aggregate_bar_geom), fc='C3', ec='k', alpha=0.5, zorder=3) ) ax3.imshow(pic_array, **imshow_kwargs) for arm in arms: plt.plot(*tv(arm.coords).T, '.', alpha=0.5, markersize=0.5) for arm in logsps: plt.plot(*tv(arm).T) for i, ax in enumerate((ax0, ax1, ax2, ax3)): ax.set_xlim(imshow_kwargs['extent'][:2]) ax.set_ylim(imshow_kwargs['extent'][2:]) if i % 2 == 0: ax.set_ylabel('Arcseconds from center') if i > 1: ax.set_xlabel('Arcseconds from center') fig.subplots_adjust(wspace=0.05, hspace=0.05) plt.savefig('clustered_shapes/{}.pdf'.format(subject_id), bbox_inches='tight') plt.close() fig = plt.figure(figsize=(10, 10)) ax = plt.gca() ax.imshow(pic_array, **imshow_kwargs) if aggregate_disk_geom is not None: ax.add_patch( PolygonPatch(ts(aggregate_disk_geom), fc='C0', ec='k', alpha=0.25, zorder=3) ) if aggregate_bulge_geom is not None: ax.add_patch( PolygonPatch(ts(aggregate_bulge_geom), fc='C1', ec='k', alpha=0.25, zorder=3) ) if aggregate_bar_geom is not None: ax.add_patch( PolygonPatch(ts(aggregate_bar_geom), fc='C2', ec='k', alpha=0.25, zorder=3) ) for arm in logsps: plt.plot(*tv(arm).T, c='C3') ax.set_xlim(imshow_kwargs['extent'][:2]) ax.set_ylim(imshow_kwargs['extent'][2:]) ax.set_ylabel('Arcseconds from center') ax.set_xlabel('Arcseconds from center') plt.savefig('aggregate_model/{}.pdf'.format(subject_id), bbox_inches='tight') plt.close()
def main(mangaid, subject_id): gal, angle = gu.get_galaxy_and_angle(subject_id) unit_converter = convert_arcsec_to_km(gal) df = read_file(mangaid) invalid_mask = df.values == -9999.0 mask = np.any(invalid_mask, axis=1) df.iloc[mask] = np.nan df = df.dropna() df['R-arcsec'] = df['R'] df['R'] = unit_converter(df['R']) scale = 4 * float(gal['PETRO_THETA']) zoo_coords_r = df['R-arcsec'].values / scale keys = ( 'GAS_IC-V', 'GAS___-V', 'BTH_IC-V', 'BTH___-V', ) labels = ( r'$H_\alpha$ velocity, fixed center & inclination', r'$H_\alpha$ velocity, varying center & inclination', r'$H_\alpha$ and stellar velocity, fixed center & inclination', r'$H_\alpha$ and stellar velocity, varying centre and inclination', ) drawn_arms = gu.get_drawn_arms(subject_id, gu.classifications) arm_pipeline = Pipeline(drawn_arms, phi=angle, ba=gal['PETRO_BA90'], image_size=512, parallel=True) arms = arm_pipeline.get_arms() gzb_pa, gzb_sigma_pa = arm_pipeline.get_pitch_angle(arms) arm_details = [{ 'pa': arm.pa, 'sigma_pa': arm.sigma_pa, 'min_r': unit_converter( np.linalg.norm(arm.log_spiral - (256, 256), axis=1).min() * float(gal['PETRO_THETA']) * 4 / 512), 'max_r': unit_converter( np.linalg.norm(arm.log_spiral - (256, 256), axis=1).max() * float(gal['PETRO_THETA']) * 4 / 512) } for arm in arms] min_r = min(a['min_r'] for a in arm_details) max_r = max(a['max_r'] for a in arm_details) fitted = {} fig, ax = plt.subplots(figsize=(8, 6)) sa_pas = [] sa_pa_datas = [] for i, (key, label) in enumerate(zip(keys, labels)): f = tanh_model(df['R'].values, df[key].values) p = least_squares(f, (160, 1E-17), x_scale=(10, 1E-17))['x'] fitted[key] = f(p) + df[key].values # Calculate shear from analytic solve of dln(Ω)/dln(R) shear = shear_from_tanh(p[1], df['R'].values) omega = df[key] / (2 * np.pi * df['R']) shear_data = get_shear(omega[:-1], df['R'].values[:-1]) plt.plot(df['R'], shear, c='C{}'.format(i % 10), label=label) plt.plot(np.stack((df['R'][:-1], df['R'][1:])).mean(axis=0), shear_data, '--', c='C{}'.format(i % 10)) sa_pa = np.rad2deg(get_predicted_pa(shear)) sa_pa_data = np.rad2deg(get_predicted_pa(shear_data)) sa_pas.append(sa_pa) sa_pa_datas.append(sa_pa_data) print('For key: {}'.format(key)) msk = (df['R'] > min_r) & (df['R'] < max_r) print('\tRotation-predicted: {:.4f}°'.format(sa_pa[msk].mean())) print('\tGZB measured PA: {:.4f} ± {:.4f}°'.format( gzb_pa, gzb_sigma_pa)) plt.plot([], [], 'k-', label=r'Analytic differentiation') plt.plot([], [], 'k--', label='Numerical differentiation') plt.xlabel('Distance from galaxy centre [km]') plt.ylabel(r'Shear rate, $\Gamma$') plt.legend() plt.savefig('{}_shear.pdf'.format(mangaid), bbox_inches='tight') plt.close() np.save('pavr', np.stack((zoo_coords_r, sa_pas[0]), axis=1)) imshow_kwargs = { 'cmap': 'gray', 'origin': 'lower', 'extent': [-0.5 * scale, 0.5 * scale] * 2, } pic_array, _ = gu.get_image(gal, subject_id, angle) fig, ax = plt.subplots(ncols=1, figsize=(5, 5)) plt.imshow(pic_array, **imshow_kwargs) for i, arm in enumerate(arms): varying_arm_t = fit_varying_pa(arm, zoo_coords_r, np.stack(sa_pas).mean(axis=0)) t_predict = np.linspace(varying_arm_t.min(), varying_arm_t.max(), 100) f = interp1d(varying_arm_t, zoo_coords_r) varying_arm = xy_from_r_theta(f(t_predict), t_predict) log_spiral = xy_from_r_theta(*np.flipud(arm.polar_logsp)) plt.plot(*arm.deprojected_coords.T * scale, '.', markersize=1, alpha=1) plt.plot(*log_spiral * scale, c='r', linewidth=3, alpha=0.8) plt.plot(*varying_arm * scale, c='g', linewidth=3, alpha=0.8) # plots for legend plt.plot([], [], c='g', linewidth=3, alpha=0.8, label='Swing-amplified spiral') plt.plot([], [], c='r', linewidth=3, alpha=0.8, label='Logarithmic spiral') plt.axis('equal') plt.xlabel('Arcseconds from galaxy centre') plt.ylabel('Arcseconds from galaxy centre') plt.xlim(-25, 25) plt.ylim(-25, 25) plt.legend() plt.savefig('{}_varying-pa.pdf'.format(mangaid), bbox_inches='tight') plt.close() return fig, ax = plt.subplots(figsize=(8, 6)) for sa_pa, label in zip(sa_pas, labels): plt.plot(df['R'], sa_pa, label=label) for row in arm_details: plt.hlines(row['pa'], row['min_r'], row['max_r']) plt.fill_between( np.linspace(row['min_r'], row['max_r'], 2), row['pa'] - row['sigma_pa'], row['pa'] + row['sigma_pa'], color='k', alpha=0.2, ) plt.legend() plt.xlabel('Distance from galaxy centre [km]') plt.ylabel('Pitch angle [degrees]') plt.savefig('{}_pa.pdf'.format(mangaid), bbox_inches='tight') plt.close() fig, ax = plt.subplots(figsize=(8, 6)) # df.plot('R', keys, label=labels, ax=ax) for i, key in enumerate(keys): plt.fill_between( df['R'].values, df[key].values - df[key + 'e'].values, df[key].values + df[key + 'e'].values, color='C{}'.format(i % 10), alpha=0.1, ) plt.plot(df['R'].values, df[key].values, '--', c='C{}'.format(i % 10)) plt.plot(df['R'].values, fitted[key], c='C{}'.format(i % 10)) for i, label in enumerate(labels): plt.plot([], [], c='C{}'.format(i % 10), label=label) plt.plot([], [], 'k-', label=r'$A\tanh(bR)$ model') plt.plot([], [], 'k--', label='Data') plt.legend() plt.xlabel('Distance from galaxy centre [km]') plt.ylabel(r'Rotational velocity [$\mathrm{km}\mathrm{s}^{-1}$]') plt.savefig('{}_rotational-velocity_2.pdf'.format(mangaid), bbox_inches='tight') plt.close()