def compare_json_spectra(self, orig_file, new_file): """Compares two sets of spectra saved in a json file""" orig_spectra = Spectra.from_json(load_json(orig_file)) orig_spectra_list = orig_spectra.spectra_list() new_spectra = Spectra.from_json(load_json(new_file)) new_spectra_list = new_spectra.spectra_list() self.assertTrue(orig_spectra.size(), new_spectra.size()) for index in range(orig_spectra.size()): self.assertTrue(np.allclose(orig_spectra_list[index].wave(), new_spectra_list[index].wave())) self.assertTrue(np.allclose(orig_spectra_list[index].flux(), new_spectra_list[index].flux())) self.assertTrue(np.allclose(orig_spectra_list[index].ivar(), new_spectra_list[index].ivar()))
def main(cmdargs): """ Run SQUEzE in test mode """ # load options parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[TEST_PARSER]) args = parser.parse_args(cmdargs) if args.check_statistics: quasar_parser_check(parser, args) # manage verbosity userprint = verboseprint if not args.quiet else quietprint t0 = time.time() # load quasar catalogue (only if --check-statistics is passed) if args.check_statistics: userprint("Loading quasar catalogue") if args.qso_dataframe is not None: quasar_catalogue = deserialize(load_json(args.qso_dataframe)) quasar_catalogue["LOADED"] = True else: quasar_catalogue = QuasarCatalogue( args.qso_cat, args.qso_cols, args.qso_specid, args.qso_ztrue, args.qso_hdu).quasar_catalogue() quasar_catalogue["LOADED"] = False t1 = time.time() userprint( f"INFO: time elapsed to load quasar catalogue: {(t1-t0)/60.0} minutes" ) # load model userprint("Loading model") t2 = time.time() if args.model.endswith(".json"): model = Model.from_json(load_json(args.model)) else: model = Model.from_fits(args.model) t3 = time.time() userprint(f"INFO: time elapsed to load model: {(t3-t2)/60.0} minutes") # initialize candidates object userprint("Initializing candidates object") if args.output_candidates is None: candidates = Candidates(mode="test", model=model, userprint=userprint) else: candidates = Candidates(mode="test", name=args.output_candidates, model=model, userprint=userprint) # load candidates dataframe if they have previously looked for if args.load_candidates: userprint("Loading existing candidates") t4 = time.time() candidates.load_candidates(args.input_candidates) t5 = time.time() userprint( f"INFO: time elapsed to load candidates: {(t5-t4)/60.0} minutes") # load spectra if args.input_spectra is not None: userprint("Loading spectra") t6 = time.time() columns_candidates = [] userprint("There are {} files with spectra to be loaded".format( len(args.input_spectra))) for index, spectra_filename in enumerate(args.input_spectra): userprint("Loading spectra from {} ({}/{})".format( spectra_filename, index, len(args.input_spectra))) t60 = time.time() spectra = Spectra.from_json(load_json(spectra_filename)) if not isinstance(spectra, Spectra): raise Error("Invalid list of spectra") if index == 0: columns_candidates += spectra.spectra_list()[0].metadata_names( ) # flag loaded quasars as such if args.check_statistics: for spec in spectra.spectra_list(): if quasar_catalogue[quasar_catalogue["SPECID"] == spec.metadata_by_key( "SPECID")].shape[0] > 0: index2 = quasar_catalogue.index[ quasar_catalogue["SPECID"] == spec.metadata_by_key( "SPECID")].tolist()[0] quasar_catalogue.at[index2, "LOADED"] = True # look for candidates userprint("Looking for candidates") candidates.find_candidates(spectra.spectra_list(), columns_candidates) t61 = time.time() userprint( f"INFO: time elapsed to find candidates from {spectra_filename}:" f" {(t61-t60)/60.0} minutes") t7 = time.time() userprint( f"INFO: time elapsed to find candidates: {(t7-t6)/60.0} minutes") # convert to dataframe userprint("Converting candidates to dataframe") t8 = time.time() candidates.candidates_list_to_dataframe(columns_candidates) t9 = time.time() userprint( f"INFO: time elapsed to convert candidates to dataframe: {(t9-t8)/60.0} minutes" ) # compute probabilities userprint("Computing probabilities") t10 = time.time() candidates.classify_candidates() t11 = time.time() userprint( f"INFO: time elapsed to classify candidates: {(t11-t10)/60.0} minutes") # check completeness if args.check_statistics: probs = args.check_probs if args.check_probs is not None else np.arange( 0.9, 0.0, -0.05) userprint("Check statistics") data_frame = candidates.candidates() userprint("\n---------------") userprint("step 1") candidates.find_completeness_purity(quasar_catalogue.reset_index(), data_frame) for prob in probs: userprint("\n---------------") userprint("proba > {}".format(prob)) candidates.find_completeness_purity( quasar_catalogue.reset_index(), data_frame[(data_frame["PROB"] > prob) & ~(data_frame["DUPLICATED"]) & (data_frame["Z_CONF_PERSON"] == 3)], ) # save the catalogue as a fits file if not args.no_save_catalogue: candidates.save_catalogue(args.output_catalogue, args.prob_cut) t12 = time.time() userprint(f"INFO: total elapsed time: {(t12-t0)/60.0} minutes") userprint("Done")
def main(cmdargs): """ Run SQUEzE in operation mode """ # load options parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[OPERATION_PARSER]) args = parser.parse_args(cmdargs) # manage verbosity userprint = verboseprint if not args.quiet else quietprint t0 = time.time() # load model userprint("Loading model") if args.model.endswith(".json"): model = Model.from_json(load_json(args.model)) else: model = Model.from_fits(args.model) t1 = time.time() userprint(f"INFO: time elapsed to load model", (t1 - t0) / 60.0, 'minutes') # initialize candidates object userprint("Initializing candidates object") if args.output_candidates is None: candidates = Candidates(mode="operation", model=model, userprint=userprint) else: candidates = Candidates(mode="operation", name=args.output_candidates, model=model, userprint=userprint) # load candidates dataframe if they have previously looked for if args.load_candidates: userprint("Loading existing candidates") t2 = time.time() candidates.load_candidates(args.input_candidates) t3 = time.time() userprint( f"INFO: time elapsed to load candidates: {(t3-t2)/60.0} minutes") # load spectra if args.input_spectra is not None: userprint("Loading spectra") t4 = time.time() columns_candidates = [] userprint("There are {} files with spectra to be loaded".format( len(args.input_spectra))) for index, spectra_filename in enumerate(args.input_spectra): userprint("Loading spectra from {} ({}/{})".format( spectra_filename, index, len(args.input_spectra))) t40 = time.time() spectra = Spectra.from_json(load_json(spectra_filename)) if not isinstance(spectra, Spectra): raise Error("Invalid list of spectra") if index == 0: columns_candidates += spectra.spectra_list()[0].metadata_names( ) # look for candidates userprint("Looking for candidates") candidates.find_candidates(spectra.spectra_list(), columns_candidates) t41 = time.time() userprint( f"INFO: time elapsed to find candidates from {spectra_filename}:" f" {(t41-t40)/60.0} minutes") t5 = time.time() userprint( f"INFO: time elapsed to find candidates: {(t5-t4)/60.0} minutes") # convert to dataframe userprint("Converting candidates to dataframe") t6 = time.time() candidates.candidates_list_to_dataframe(columns_candidates) t7 = time.time() userprint( f"INFO: time elapsed to convert candidates to dataframe: {(t7-t6)/60.0} minutes" ) # compute probabilities userprint("Computing probabilities") t8 = time.time() candidates.classify_candidates() t9 = time.time() userprint( f"INFO: time elapsed to classify candidates: {(t9-t8)/60.0} minutes") # save the catalogue as a fits file if not args.no_save_catalogue: candidates.save_catalogue(args.output_catalogue, args.prob_cut) t10 = time.time() userprint(f"INFO: total elapsed time: {(t10-t0)/60.0} minutes") userprint("Done")
def main(cmdargs): """ Run SQUEzE in training mode """ # load options parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[TRAINING_PARSER]) args = parser.parse_args(cmdargs) # manage verbosity userprint = verboseprint if not args.quiet else quietprint t0 = time.time() # load lines userprint("Loading lines") lines = LINES if args.lines is None else deserialize(load_json(args.lines)) # load try_line try_line = TRY_LINES if args.try_lines is None else args.try_lines # load redshift precision z_precision = Z_PRECISION if args.z_precision is None else args.z_precision # load peakfinder options peakfind_width = PEAKFIND_WIDTH if args.peakfind_width is None else args.peakfind_width peakfind_sig = PEAKFIND_SIG if args.peakfind_sig is None else args.peakfind_sig # load random forest options random_forest_options = RANDOM_FOREST_OPTIONS if args.random_forest_options is None else load_json( args.random_forest_options) random_state = RANDOM_STATE if args.random_state is None else args.random_state # initialize candidates object userprint("Initializing candidates object") if args.output_candidates is None: candidates = Candidates(lines_settings=(lines, try_line), z_precision=z_precision, mode="training", peakfind=(peakfind_width, peakfind_sig), model=None, userprint=userprint, model_options=(random_forest_options, random_state, args.pass_cols_to_rf)) else: candidates = Candidates(lines_settings=(lines, try_line), z_precision=z_precision, mode="training", name=args.output_candidates, peakfind=(peakfind_width, peakfind_sig), model=None, userprint=userprint, model_options=(random_forest_options, random_state, args.pass_cols_to_rf)) # load candidates dataframe if they have previously looked for if args.load_candidates: userprint("Loading existing candidates") t1 = time.time() candidates.load_candidates(args.input_candidates) t2 = time.time() userprint( f"INFO: time elapsed to load candidates: {(t2-t1)/60.0} minutes") # load spectra if args.input_spectra is not None: userprint("Loading spectra") t3 = time.time() columns_candidates = [] userprint("There are {} files with spectra to be loaded".format( len(args.input_spectra))) for index, spectra_filename in enumerate(args.input_spectra): userprint("Loading spectra from {} ({}/{})".format( spectra_filename, index, len(args.input_spectra))) t30 = time.time() spectra = Spectra.from_json(load_json(spectra_filename)) if not isinstance(spectra, Spectra): raise Error("Invalid list of spectra") if index == 0: columns_candidates += spectra.spectra_list()[0].metadata_names( ) # look for candidates userprint("Looking for candidates") candidates.find_candidates(spectra.spectra_list(), columns_candidates) t31 = time.time() userprint( f"INFO: time elapsed to find candidates from {spectra_filename}: " f"{(t31-t30)/60.0} minutes") t4 = time.time() userprint( f"INFO: time elapsed to find candidates: {(t4-t3)/60.0} minutes") # convert to dataframe userprint("Converting candidates to dataframe") t5 = time.time() candidates.candidates_list_to_dataframe(columns_candidates) t6 = time.time() userprint( f"INFO: time elapsed to convert candidates to dataframe: {(t6-t5)/60.0} minutes" ) # train model userprint("Training model") t7 = time.time() candidates.train_model(args.model_fits) t8 = time.time() userprint(f"INFO: time elapsed to train model: {(t8-t7)/60.0} minutes") userprint(f"INFO: total elapsed time: {(t8-t0)/60.0} minutes") userprint("Done")