def find_extra(cat_name,plot_cat,cat_data,image_cats,over_plots,source_names,handles,plotted_names,ra_main,dec_main,extra_lines,freq): '''Finds any sources in the specified catalogue that wasn't in the original search, and plots them to the relevant over_plot subplot''' plot_ind = image_cats.index(cat_name) ##Select the correct plot and wsc extra_plot = over_plots[plot_ind] ##Search the catalogue and plot the found sources for line in cat_data[:-1]: info = line.split('|')[1:-1] #print info name,ra,rerr,dec,derr,flux,ferr,major,minor,PA,flag,ID = info name = name.split()[0] ##Gets around long strings ra,dec = float(ra[:10]),float(dec[:10]) if (name not in source_names) and mkl.arcdist(ra_main,ra,dec_main,dec) < 4*closeness: flag = '-100000.0' ID = '-100000.0' edit_nums = [rerr,derr,flux,ferr,major,minor,PA] for i in xrange(len(edit_nums)): try: if 'E' in edit_nums[i]: print edit_nums[i] edit_nums[i] = float(edit_nums[i][:-3]) * 10 ** float(edit_nums[i][:-2]) print edit_nums[i] else: edit_nums[i] = float(edit_nums[i]) except: edit_nums[i] = -100000.0 rerr,derr,flux,ferr,major,minor,PA = edit_nums extra_line = "%s %s %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %.5f %s %s\n" %(plot_cat,name.split()[0],ra, rerr, dec, derr, freq, flux, ferr, major, major, PA,flag,ID) extra_lines.append(extra_line) if plot_cat == 'gleam': ind = 0 else: ind = matched_cats.index(plot_cat) extra_plot.show_markers(float(ra),float(dec),marker='x',s=50,facecolor=marker_colours[ind],edgecolor=marker_colours[ind],linewidth=2,label=name) if float(minor)!=-100000.0: if float(major)!=-100000.0: extra_plot.show_ellipses(ra,dec,float(minor),float(major),float(PA),linewidth=1.5,edgecolor=marker_colours[ind],facecolor='none',alpha=0.6)
def combine_flux(src_all,src_g,accepted_inds,num_matches): '''Takes a src_group() class that contains all group info. Indentifies which catalogue is repeated, combines the fluxes and fits a new line. Returns the reduced frequency list, the combined flux and flux error arrays, as well as the line_fit object and residuals''' ##Find repeated catalogues repeated_cats = set([src_all.cats[ind] for ind in accepted_inds if src_all.cats.count(src_all.cats[ind])>1]) ##This won't neccesarily be in the that the cats appear in src_all.cats so reorder repeat_indexs = [src_all.cats.index(cat) for cat in repeated_cats] repeated_cats = [cat for ind,cat in sorted(zip(repeat_indexs,repeated_cats),key=lambda pair: pair[0])] ##These are used to test the combined spectrum temp_freqs = [src_all.freqs[i] for i in xrange(len(src_all.freqs)) if src_all.cats[i] not in repeated_cats] temp_fluxs = [src_all.fluxs[i] for i in xrange(len(src_all.fluxs)) if src_all.cats[i] not in repeated_cats] temp_ferrs = [src_all.ferrs[i] for i in xrange(len(src_all.ferrs)) if src_all.cats[i] not in repeated_cats] ##Will need these for fitting/passing on to plotting function comb_freqs = [] comb_fluxs = [] comb_ferrs = [] ra_ws = [] dec_ws = [] rerr_ws = [] derr_ws = [] ##Need these in case of doing the split test resolved_diff_inds = [] unrepeat_dists = [] num_of_repeats = [] for repeat_cat in repeated_cats: num_of_repeat = [i for i in xrange(len(src_all.names)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] num_of_repeats.append(num_of_repeat) for cat,name in zip(src_g.cats[1:],src_g.names[1:]): if cat not in repeated_cats and cat != '100000.0': if cat == 'vlssr': src_g.vlssr = name elif cat == 'mrc': src_g.mrc = name elif cat == 'sumss': src_g.sumss = name elif cat == 'nvss': src_g.nvss = name else: pass ##For each repeated catalogue: for repeat_cat in repeated_cats: ##Find the frequency/ies of repeated cat comb_freq = src_all.freqs[src_all.cats.index(repeat_cat)] ##Find the flux/es of the repeat_cat sources that were accepted by retained_sources() flux_to_comb = [src_all.fluxs[i] for i in xrange(len(src_all.fluxs)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] ##Find the flux error/s of the repeat_cat sources that were accepted by retained_sources() ferr_to_comb = [src_all.ferrs[i] for i in xrange(len(src_all.ferrs)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] ##ALso find all of the positional information to combine ras_to_comb = [src_all.ras[i] for i in xrange(len(src_all.ras)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] rerrs_to_comb = [src_all.rerrs[i] for i in xrange(len(src_all.rerrs)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] decs_to_comb = [src_all.decs[i] for i in xrange(len(src_all.decs)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] derrs_to_comb = [src_all.derrs[i] for i in xrange(len(src_all.derrs)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] ##This is to write down all the names of the sources combined names_to_comb = [src_all.names[i] for i in xrange(len(src_all.names)) if (src_all.cats[i]==repeat_cat) and (i in accepted_inds)] name_string = '' for name in names_to_comb: name_string += ','+name if repeat_cat == 'vlssr': src_g.vlssr = name_string[1:] elif repeat_cat == 'mrc': src_g.mrc = name_string[1:] elif repeat_cat == 'sumss': src_g.sumss = name_string[1:] elif repeat_cat == 'nvss': src_g.nvss = name_string[1:] elif repeat_cat == 'atg20': src_g.atg20 = name_string[1:] else: pass ##TEST TO SEE IF THE REPEATED SOURCES ARE RESOLVED (BY A GIVEN RESOLUTION THRESHOLD) ##Need to do the split test here even if not propagating to the final catalogue ##------------------------------------------------------------------------------------------------------- ##Give it a value even if not splitting dist_test = 0.020833333 ##1.25 arcmin big_inds = [] n = len(ras_to_comb) for i in range(0,n+1): for j in range(i+1,n): dist = mkl.arcdist(ras_to_comb[i],ras_to_comb[j],decs_to_comb[i],decs_to_comb[j]) if dist>dist_test: big_inds.append([i,j]) resolved_diff_inds.append(big_inds) ##------------------------------------------------------------------------------------------------------- comb_flux = sum(flux_to_comb) #Sum the fluxes comb_ferr = np.zeros(1) for ferr in ferr_to_comb: comb_ferr+= ferr**2 #Add the errors in quadrature comb_ferr = comb_ferr**0.5 ##Need these later to fit the combined flux and to populate a new src_g if ##the fit passes temp_freqs.append(comb_freq) temp_fluxs.append([comb_flux]) temp_ferrs.append([comb_ferr[0]]) comb_freqs.append(comb_freq) comb_fluxs.append(comb_flux) comb_ferrs.append(comb_ferr) ##Weight by the first flux in the flux list (in case catalogue has multiple frequencies) flux_s = [flux[0] for flux in flux_to_comb] ##A a little code in case sources are very close to RA=0, and some are reporting 359.9, ##and others 0.001 - if you don't account for this, the weigthed position goes mental wrap = 'no' for combo in combinations(ras_to_comb,2): diff = combo[0]-combo[1] if abs(diff)>180.0: wrap='yes' if wrap=='yes': for i in xrange(len(ras_to_comb)): if ras_to_comb[i]<180.0: ras_to_comb[i]+=360.0 ##Weight the sources by their flux weights = [flux/sum(flux_s) for flux in flux_s] ##Do the weighting, if weighted RA is above 360 deg, rescale, ##add the errors as shown in the write up ra_w = np.dot(ras_to_comb,weights) if ra_w>360.0: ra_w-=360.0 dec_w = np.dot(decs_to_comb,weights) rerr_w = (np.dot(rerrs_to_comb,weights)**2)**0.5 derr_w = (np.dot(derrs_to_comb,weights)**2)**0.5 ra_ws.append(ra_w) dec_ws.append(dec_w) rerr_ws.append(rerr_w) derr_ws.append(derr_w) ##Flag distance between repeated cats as being larger than designated resolution big_flags = [0 for i in xrange(len(resolved_diff_inds))] for i in xrange(len(resolved_diff_inds)): if len(resolved_diff_inds[i])>0: big_flags[i] = 1 ##Now each repeat_cat has a 1 flag if large separation, 0 if not set_freqs = [] set_fluxs = [] set_ferrs = [] set_fits = [] set_jstat = [] set_bse = [] set_red = [] set_cats = [] set_names = [] big_sep = 'no' ##If all repeated cats have large separation, and they have the same amount of repeated sources: if 0 not in big_flags and len(list(set([len(reap) for reap in num_of_repeats])))==1: ##If more than one repeated cat (need more than one data point to get some spectral info) if len(repeated_cats)>1: ##Find the 'sub' set matches, so sources that could be combined to make components sets = [] #name_sets = [] for src in num_of_repeats[0]: match = [src] #names = [src_all.names[src]] for other_srcs in num_of_repeats[1:]: for other_src in other_srcs: if mkl.arcdist(src_all.ras[src],src_all.ras[other_src],src_all.decs[src],src_all.decs[other_src])<dist_test: match.append(other_src) #names.append(src_all.names[other_src]) sets.append(match) #name_sets.append(names) ##If all the sets found have the same amount of components, and only have one source from ##each repeated catalogue if len(list(set([len(sset) for sset in sets])))==1 and len(sets[0])==len(repeated_cats): big_sep = 'yes' for sset in sets: freqs = [src_all.freqs[src][0] for src in sset] fluxs = [src_all.fluxs[src][0] for src in sset] ferrs = [src_all.ferrs[src][0] for src in sset] names = [src_all.names[src] for src in sset] cats = [src_all.cats[src] for src in sset] set_freqs.append(freqs) set_fluxs.append(fluxs) set_ferrs.append(ferrs) set_names.append(names) set_cats.append(cats) flux_to_weight = [src_all.fluxs[i][0] for i in xrange(len(src_all.fluxs)) if (src_all.cats[i] not in repeated_cats)] freq_to_weight = [src_all.freqs[i][0] for i in xrange(len(src_all.freqs)) if (src_all.cats[i] not in repeated_cats)] ferr_to_weight = [src_all.ferrs[i][0] for i in xrange(len(src_all.ferrs)) if (src_all.cats[i] not in repeated_cats)] cats_to_weight = [src_all.cats[i] for i in xrange(len(src_all.cats)) if (src_all.cats[i] not in repeated_cats)] names_to_weight = [src_all.names[i] for i in xrange(len(src_all.names)) if (src_all.cats[i] not in repeated_cats)] ##Find all the fluxs of the repeated cats, come up with weights for the single sources based on each ##individual repeated catalogue, then take an average of these weights fluxs_for_weights = [[src_all.fluxs[sset[src]][0] for sset in sets] for src in xrange(len(sets[0]))] fluxs_weights = [[flux/sum(fluxs) for flux in fluxs] for fluxs in fluxs_for_weights] flux_weights = np.array([np.mean([[weights[weight]] for weights in fluxs_weights]) for weight in xrange(len(fluxs_weights[0]))]) ##For each set of freq,fluxs in the new set matched, append the weighted freq, flux and ferr of the ##sources that have been split up for i in xrange(len(set_freqs)): weighted_fluxs = np.array(flux_to_weight)*flux_weights[i] weighted_errs = np.array(ferr_to_weight)*flux_weights[i] for j in xrange(len(weighted_fluxs)): set_freqs[i].append(freq_to_weight[j]) set_fluxs[i].append(weighted_fluxs[j]) set_ferrs[i].append(weighted_errs[j]) set_cats[i].append(cats_to_weight[j]) set_names[i].append(names_to_weight[j]) ##For every set of frequencies, ferrs, names and fluxes in the set, order the fluxes, names and ferrs by the frequencies set_fluxs = [[flux for flux,freq in sorted(zip(fluxs,freqs), key=lambda pair: pair[1]) ] for fluxs,freqs in zip(set_fluxs,set_freqs)] set_ferrs = [[ferr for ferr,freq in sorted(zip(ferrs,freqs), key=lambda pair: pair[1]) ] for ferrs,freqs in zip(set_ferrs,set_freqs)] set_cats = [[cat for cat,freq in sorted(zip(cats,freqs), key=lambda pair: pair[1]) ] for cats,freqs in zip(set_cats,set_freqs)] set_names = [[name for name,freq in sorted(zip(names,freqs), key=lambda pair: pair[1]) ] for names,freqs in zip(set_names,set_freqs)] set_freqs = [sorted(freq) for freq in set_freqs] for i in xrange(len(set_fluxs)): freqs = set_freqs[i] fluxs = set_fluxs[i] ferrs = set_ferrs[i] fit,jstat,bse,red = mkl.fit_line(np.log(freqs),np.log(fluxs),np.array(ferrs)/np.array(fluxs)) set_fits.append(fit) set_jstat.append(jstat) set_bse.append(bse) set_red.append(red) log_temp_freqs = [] log_temp_fluxs = [] log_temp_ferrs = [] #Get the sources out of the array in list format (which is used later when making the sources ##to add to the final table) for i in xrange(len(temp_freqs)): for j in xrange(len(temp_freqs[i])): if temp_fluxs[i][j] == -100000.0 or np.isnan(temp_fluxs[i][j])==True: pass else: log_temp_freqs.append(np.log(temp_freqs[i][j])) for i in xrange(len(temp_freqs)): for j in xrange(len(temp_freqs[i])): if temp_fluxs[i][j] == -100000.0 or np.isnan(temp_fluxs[i][j])==True: pass else: log_temp_fluxs.append(np.log(temp_fluxs[i][j])) for i in xrange(len(temp_freqs)): for j in xrange(len(temp_freqs[i])): if temp_fluxs[i][j] == -100000.0 or np.isnan(temp_fluxs[i][j])==True: pass else: log_temp_ferrs.append(temp_ferrs[i][j]/temp_fluxs[i][j]) ##Fit and find residuals to the combined spectrum comb_fit,comb_jstat,comb_bse,comb_chi_red = mkl.fit_line(np.array(log_temp_freqs),np.array(log_temp_fluxs),np.array(log_temp_ferrs)) ##Find out where in srg_g the repeated cats appear repeat_cat_inds = [src_g.cats.index(cat) for cat in repeated_cats] split_flag='' ##Make labels for when we're plotting a put in combined_names combined_names = [] #if comb_jstat<=jstat_thresh or comb_chi_red<=chi_thresh: ##Create the combined source no matter what for plotting purposes ##Loop over all the combined sources and repopulate the entries of a src_g ##at the point where the repeated catalogues appear for i in xrange(len(comb_fluxs)): srcg_ind = repeat_cat_inds[i] src_g.ras[srcg_ind] = ra_ws[i] src_g.rerrs[srcg_ind] = rerr_ws[i] src_g.decs[srcg_ind] = dec_ws[i] src_g.derrs[srcg_ind] = derr_ws[i] src_g.PAs[srcg_ind] = -100000.0 src_g.majors[srcg_ind] = -100000.0 src_g.minors[srcg_ind] = -100000.0 src_g.names[srcg_ind] = "Combined-%s" %src_g.cats[srcg_ind] combined_names.append("Combined-%s" %src_g.cats[srcg_ind]) src_g.fluxs[srcg_ind] = [comb_fluxs[i]] src_g.ferrs[srcg_ind] = comb_ferrs[i] #srg_g.freqs = temp_freqs src_g.SI = float(comb_fit.params[0]) src_g.intercept = comb_fit.params[1] src_g.SI_err = comb_bse[0] src_g.intercept_err = comb_bse[1] src_g.chi_resid = comb_chi_red src_g.epsilon_red = comb_jstat ##If good fit, report that in the final stats object if comb_chi_red<=2: src_g.low_resids = 0 else: src_g.low_resids = 1 dom_crit = 'Accepted -\ncombined' return [src_g]
def single_match_test(src_all, comp, accepted_matches, accepted_inds, g_stats, num_matches, repeated_cats, matches, i): """Takes a combination of sources, one from each catalogue, with positional probabilities, and determines whether they are a match or not - Algorithm 2 in the write up""" match = accepted_matches[0] prob = float(match[-1]) ##calculate_resids needs a list of matches - calculate parameters jstat_resids, params, bses, chi_resids = mkl.calculate_resids([match]) src_g = mkl.get_srcg(match) ##Play the prob trick again to work out which match has been accepted match_probs = [float(m.split()[-1]) for m in matches] dom_num = match_probs.index(prob) + 1 match_crit = "Combination (%d)\npossible\n%s repeated cats" % (dom_num, repeated_cats) ##Check to see if all matched sources are within the closeness test - create an ##error ellipse by combined closeness with base cat error ##Need to convert closeness in to an RA offset, due to spherical trigonometry dr = np.pi / 180.0 delta_RA = ( np.arccos((np.cos(closeness * dr) - np.sin(src_all.decs[0] * dr) ** 2) / np.cos(src_all.decs[0] * dr) ** 2) / dr ) ##Make a list of the ras and decs of the sources to distance test ras_test = [ra for ra in src_g.ras if ra != -100000.0] dec_test = [dec for dec in src_g.decs if dec != -100000.0] small_test = [] for ra, dec in zip(ras_test, dec_test): ##Even though at same dec, 3arcmis offset in RA isn't neccessarily 3arcmins arcdistance ra_dist = mkl.arcdist(src_all.ras[0], ra, src_all.decs[0], src_all.decs[0]) dec_dist = src_all.decs[0] - dec ra_axis = src_all.rerrs[0] + abs(delta_RA) dec_axis = src_all.derrs[0] + closeness ##Test to see if the source lies with an error ellipse created using semi-major ##and minor axes defined by the ra and dec error of the base cat + half the resolution ##of the base cat (closeness) ell_test = (ra_dist / ra_axis) ** 2 + (dec_dist / dec_axis) ** 2 if ell_test <= 1: small_test.append("yes") else: ##Otherwise, fails small_test.append("no") # no_names.append(repeat_name) #Note the name of the sources that are far away # Fail the positional test if a source is outside of the resolution plus position error close_test = "passed" if "no" in small_test: close_test = "failed" ##If prob is higher than threshold, ignore position of sources and accept the match if prob > high_prob: i = plot_accept_type( comp, accepted_inds, match_crit, "N/A", "Pos. accepted\nby $P>P_u$", num_matches, plot_accept, src_all, "position", i, ) else: ##look to see if all sources are within the resolution of the ##base catalogue or above some probability theshold, if so check with a spec test else reject them if close_test == "passed" or prob > low_prob: ##IF below either threshold, append with the applicable fit label if jstat_resids[0] <= jstat_thresh or chi_resids[0] <= chi_thresh: i = plot_accept_type( comp, accepted_inds, match_crit, "Spec. passed", "Accept by spec", num_matches, plot_accept, src_all, "spectral", i, ) else: i = plot_accept_type( comp, accepted_inds, match_crit, "Spec. failed", "Reject by spec", num_matches, plot_reject, src_all, "spectral", i, ) else: i = plot_accept_type( comp, accepted_inds, match_crit, "N/A", "pos reject by $P<P_l$", num_matches, plot_reject, src_all, "position", i, ) return i
def single_match_test(src_all,comp,accepted_matches,accepted_inds,g_stats,num_matches,repeated_cats,matches): '''Takes a combination of sources, one from each catalogue, with positional probabilities, and determines whether they are a match or not - Algorithm 2 in the write up''' match = accepted_matches[0] prob = float(match[-1]) ##calculate_resids needs a list of matches - calculate parameters jstat_resids,params,bses,chi_resids = mkl.calculate_resids([match]) ##Gather source information src_g = mkl.get_srcg(match) ##Play the prob trick again to work out which match has been accepted match_probs = [float(m.split()[-1]) for m in matches] dom_num = match_probs.index(prob)+1 match_crit = "Combination (%d)\npossible\n%s repeated cats" %(dom_num,repeated_cats) ##Check to see if all matched sources are within the closeness test - create an ##error ellipse by combined closeness with base cat error ##Need to convert closeness in to an RA offset, due to spherical trigonometry dr=np.pi/180.0 delta_RA = np.arccos((np.cos(closeness*dr)-np.sin(src_all.decs[0]*dr)**2)/np.cos(src_all.decs[0]*dr)**2)/dr ##Make a list of the ras and decs of the sources to distance test ras_test = [ra for ra in src_g.ras if ra!=-100000.0] dec_test = [dec for dec in src_g.decs if dec!=-100000.0] small_test = [] for ra,dec in zip(ras_test,dec_test): ##Even though at same dec, 3arcmis offset in RA isn't neccessarily 3arcmins arcdistance ra_dist = mkl.arcdist(src_all.ras[0],ra,src_all.decs[0],src_all.decs[0]) dec_dist = src_all.decs[0] - dec ra_axis = src_all.rerrs[0] + abs(delta_RA) dec_axis = src_all.derrs[0] + closeness ##Test to see if the source lies with an error ellipse created using semi-major ##and minor axes defined by the ra and dec error of the base cat + half the resolution ##of the base cat (closeness) ell_test = (ra_dist/ra_axis)**2 + (dec_dist/dec_axis)**2 if ell_test <= 1: small_test.append('yes') else: ##Otherwise, fails small_test.append('no') #no_names.append(repeat_name) #Note the name of the sources that are far away #Fail the positional test if a source is outside of the resolution plus position error close_test = 'passed' if 'no' in small_test: close_test = 'failed' ##If prob is higher than threshold, ignore position of sources and accept the match if prob>high_prob: ##Accept the source, put it in a way that can be read when constructing the final table if chi_resids[0]<=2: make_entry(match,params[0][0],params[0][1],bses[0][0],bses[0][1],g_stats,'position',0,chi_resids[0]) else: make_entry(match,params[0][0],params[0][1],bses[0][0],bses[0][1],g_stats,'position',1,chi_resids[0]) make_accept(comp,g_stats,'position',accepted_inds) else: ##look to see if all sources are within the resolution of the ##base catalogue or above some probability theshold, if so check with a spec test else reject them if close_test=='passed' or prob>low_prob: ##IF below eith threshold, append with the applicable fit label if jstat_resids[0]<=jstat_thresh or chi_resids[0]<=chi_thresh: if chi_resids[0]<=2: make_entry(match,params[0][0],params[0][1],bses[0][0],bses[0][1],g_stats,'spectral',0,chi_resids[0]) else: make_entry(match,params[0][0],params[0][1],bses[0][0],bses[0][1],g_stats,'spectral',1,chi_resids[0]) make_accept(comp,g_stats,'spectral',accepted_inds) else: g_stats.retained_matches = 1 ##Put accepted inds as [0] just to have something outputted to the investigate text file - ##accepted_inds is empty is rejecting at this stage make_rejection(comp,g_stats,'spectral',[0]) else: g_stats.retained_matches = 1 make_rejection(comp,g_stats,'position',[0])