예제 #1
0
    def compare_json_spectra(self, orig_file, new_file):
        """Compares two sets of spectra saved in a json file"""
        orig_spectra = Spectra.from_json(load_json(orig_file))
        orig_spectra_list = orig_spectra.spectra_list()
        new_spectra = Spectra.from_json(load_json(new_file))
        new_spectra_list = new_spectra.spectra_list()

        self.assertTrue(orig_spectra.size(), new_spectra.size())
        for index in range(orig_spectra.size()):
            self.assertTrue(np.allclose(orig_spectra_list[index].wave(),
                                        new_spectra_list[index].wave()))
            self.assertTrue(np.allclose(orig_spectra_list[index].flux(),
                                        new_spectra_list[index].flux()))
            self.assertTrue(np.allclose(orig_spectra_list[index].ivar(),
                                        new_spectra_list[index].ivar()))
예제 #2
0
def main(cmdargs):
    """ Run SQUEzE in test mode """
    # load options
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[TEST_PARSER])
    args = parser.parse_args(cmdargs)
    if args.check_statistics:
        quasar_parser_check(parser, args)

    # manage verbosity
    userprint = verboseprint if not args.quiet else quietprint

    t0 = time.time()
    # load quasar catalogue (only if --check-statistics is passed)
    if args.check_statistics:
        userprint("Loading quasar catalogue")
        if args.qso_dataframe is not None:
            quasar_catalogue = deserialize(load_json(args.qso_dataframe))
            quasar_catalogue["LOADED"] = True
        else:
            quasar_catalogue = QuasarCatalogue(
                args.qso_cat, args.qso_cols, args.qso_specid, args.qso_ztrue,
                args.qso_hdu).quasar_catalogue()
            quasar_catalogue["LOADED"] = False
        t1 = time.time()
        userprint(
            f"INFO: time elapsed to load quasar catalogue: {(t1-t0)/60.0} minutes"
        )

    # load model
    userprint("Loading model")
    t2 = time.time()
    if args.model.endswith(".json"):
        model = Model.from_json(load_json(args.model))
    else:
        model = Model.from_fits(args.model)
    t3 = time.time()
    userprint(f"INFO: time elapsed to load model: {(t3-t2)/60.0} minutes")

    # initialize candidates object
    userprint("Initializing candidates object")
    if args.output_candidates is None:
        candidates = Candidates(mode="test", model=model, userprint=userprint)
    else:
        candidates = Candidates(mode="test",
                                name=args.output_candidates,
                                model=model,
                                userprint=userprint)

    # load candidates dataframe if they have previously looked for
    if args.load_candidates:
        userprint("Loading existing candidates")
        t4 = time.time()
        candidates.load_candidates(args.input_candidates)
        t5 = time.time()
        userprint(
            f"INFO: time elapsed to load candidates: {(t5-t4)/60.0} minutes")

    # load spectra
    if args.input_spectra is not None:
        userprint("Loading spectra")
        t6 = time.time()
        columns_candidates = []
        userprint("There are {} files with spectra to be loaded".format(
            len(args.input_spectra)))
        for index, spectra_filename in enumerate(args.input_spectra):
            userprint("Loading spectra from {} ({}/{})".format(
                spectra_filename, index, len(args.input_spectra)))
            t60 = time.time()
            spectra = Spectra.from_json(load_json(spectra_filename))
            if not isinstance(spectra, Spectra):
                raise Error("Invalid list of spectra")

            if index == 0:
                columns_candidates += spectra.spectra_list()[0].metadata_names(
                )

            # flag loaded quasars as such
            if args.check_statistics:
                for spec in spectra.spectra_list():
                    if quasar_catalogue[quasar_catalogue["SPECID"] ==
                                        spec.metadata_by_key(
                                            "SPECID")].shape[0] > 0:
                        index2 = quasar_catalogue.index[
                            quasar_catalogue["SPECID"] == spec.metadata_by_key(
                                "SPECID")].tolist()[0]
                        quasar_catalogue.at[index2, "LOADED"] = True

            # look for candidates
            userprint("Looking for candidates")
            candidates.find_candidates(spectra.spectra_list(),
                                       columns_candidates)

            t61 = time.time()
            userprint(
                f"INFO: time elapsed to find candidates from {spectra_filename}:"
                f" {(t61-t60)/60.0} minutes")

        t7 = time.time()
        userprint(
            f"INFO: time elapsed to find candidates: {(t7-t6)/60.0} minutes")

        # convert to dataframe
        userprint("Converting candidates to dataframe")
        t8 = time.time()
        candidates.candidates_list_to_dataframe(columns_candidates)
        t9 = time.time()
        userprint(
            f"INFO: time elapsed to convert candidates to dataframe: {(t9-t8)/60.0} minutes"
        )

    # compute probabilities
    userprint("Computing probabilities")
    t10 = time.time()
    candidates.classify_candidates()
    t11 = time.time()
    userprint(
        f"INFO: time elapsed to classify candidates: {(t11-t10)/60.0} minutes")

    # check completeness
    if args.check_statistics:
        probs = args.check_probs if args.check_probs is not None else np.arange(
            0.9, 0.0, -0.05)
        userprint("Check statistics")
        data_frame = candidates.candidates()
        userprint("\n---------------")
        userprint("step 1")
        candidates.find_completeness_purity(quasar_catalogue.reset_index(),
                                            data_frame)
        for prob in probs:
            userprint("\n---------------")
            userprint("proba > {}".format(prob))
            candidates.find_completeness_purity(
                quasar_catalogue.reset_index(),
                data_frame[(data_frame["PROB"] > prob)
                           & ~(data_frame["DUPLICATED"]) &
                           (data_frame["Z_CONF_PERSON"] == 3)],
            )

    # save the catalogue as a fits file
    if not args.no_save_catalogue:
        candidates.save_catalogue(args.output_catalogue, args.prob_cut)

    t12 = time.time()
    userprint(f"INFO: total elapsed time: {(t12-t0)/60.0} minutes")
    userprint("Done")
예제 #3
0
def main(cmdargs):
    """ Run SQUEzE in operation mode """
    # load options
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[OPERATION_PARSER])
    args = parser.parse_args(cmdargs)

    # manage verbosity
    userprint = verboseprint if not args.quiet else quietprint

    t0 = time.time()
    # load model
    userprint("Loading model")
    if args.model.endswith(".json"):
        model = Model.from_json(load_json(args.model))
    else:
        model = Model.from_fits(args.model)
    t1 = time.time()
    userprint(f"INFO: time elapsed to load model", (t1 - t0) / 60.0, 'minutes')

    # initialize candidates object
    userprint("Initializing candidates object")
    if args.output_candidates is None:
        candidates = Candidates(mode="operation",
                                model=model,
                                userprint=userprint)
    else:
        candidates = Candidates(mode="operation",
                                name=args.output_candidates,
                                model=model,
                                userprint=userprint)

    # load candidates dataframe if they have previously looked for
    if args.load_candidates:
        userprint("Loading existing candidates")
        t2 = time.time()
        candidates.load_candidates(args.input_candidates)
        t3 = time.time()
        userprint(
            f"INFO: time elapsed to load candidates: {(t3-t2)/60.0} minutes")

    # load spectra
    if args.input_spectra is not None:
        userprint("Loading spectra")
        t4 = time.time()
        columns_candidates = []
        userprint("There are {} files with spectra to be loaded".format(
            len(args.input_spectra)))
        for index, spectra_filename in enumerate(args.input_spectra):
            userprint("Loading spectra from {} ({}/{})".format(
                spectra_filename, index, len(args.input_spectra)))
            t40 = time.time()
            spectra = Spectra.from_json(load_json(spectra_filename))
            if not isinstance(spectra, Spectra):
                raise Error("Invalid list of spectra")

            if index == 0:
                columns_candidates += spectra.spectra_list()[0].metadata_names(
                )

            # look for candidates
            userprint("Looking for candidates")
            candidates.find_candidates(spectra.spectra_list(),
                                       columns_candidates)

            t41 = time.time()
            userprint(
                f"INFO: time elapsed to find candidates from {spectra_filename}:"
                f" {(t41-t40)/60.0} minutes")

        t5 = time.time()
        userprint(
            f"INFO: time elapsed to find candidates: {(t5-t4)/60.0} minutes")

        # convert to dataframe
        userprint("Converting candidates to dataframe")
        t6 = time.time()
        candidates.candidates_list_to_dataframe(columns_candidates)
        t7 = time.time()
        userprint(
            f"INFO: time elapsed to convert candidates to dataframe: {(t7-t6)/60.0} minutes"
        )

    # compute probabilities
    userprint("Computing probabilities")
    t8 = time.time()
    candidates.classify_candidates()
    t9 = time.time()
    userprint(
        f"INFO: time elapsed to classify candidates: {(t9-t8)/60.0} minutes")

    # save the catalogue as a fits file
    if not args.no_save_catalogue:
        candidates.save_catalogue(args.output_catalogue, args.prob_cut)

    t10 = time.time()
    userprint(f"INFO: total elapsed time: {(t10-t0)/60.0} minutes")
    userprint("Done")
예제 #4
0
def main(cmdargs):
    """ Run SQUEzE in training mode """
    # load options
    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
        parents=[TRAINING_PARSER])
    args = parser.parse_args(cmdargs)

    # manage verbosity
    userprint = verboseprint if not args.quiet else quietprint

    t0 = time.time()
    # load lines
    userprint("Loading lines")
    lines = LINES if args.lines is None else deserialize(load_json(args.lines))

    # load try_line
    try_line = TRY_LINES if args.try_lines is None else args.try_lines

    # load redshift precision
    z_precision = Z_PRECISION if args.z_precision is None else args.z_precision

    # load peakfinder options
    peakfind_width = PEAKFIND_WIDTH if args.peakfind_width is None else args.peakfind_width
    peakfind_sig = PEAKFIND_SIG if args.peakfind_sig is None else args.peakfind_sig

    # load random forest options
    random_forest_options = RANDOM_FOREST_OPTIONS if args.random_forest_options is None else load_json(
        args.random_forest_options)
    random_state = RANDOM_STATE if args.random_state is None else args.random_state

    # initialize candidates object
    userprint("Initializing candidates object")
    if args.output_candidates is None:
        candidates = Candidates(lines_settings=(lines, try_line),
                                z_precision=z_precision,
                                mode="training",
                                peakfind=(peakfind_width, peakfind_sig),
                                model=None,
                                userprint=userprint,
                                model_options=(random_forest_options,
                                               random_state,
                                               args.pass_cols_to_rf))
    else:
        candidates = Candidates(lines_settings=(lines, try_line),
                                z_precision=z_precision,
                                mode="training",
                                name=args.output_candidates,
                                peakfind=(peakfind_width, peakfind_sig),
                                model=None,
                                userprint=userprint,
                                model_options=(random_forest_options,
                                               random_state,
                                               args.pass_cols_to_rf))

    # load candidates dataframe if they have previously looked for
    if args.load_candidates:
        userprint("Loading existing candidates")
        t1 = time.time()
        candidates.load_candidates(args.input_candidates)
        t2 = time.time()
        userprint(
            f"INFO: time elapsed to load candidates: {(t2-t1)/60.0} minutes")

    # load spectra
    if args.input_spectra is not None:
        userprint("Loading spectra")
        t3 = time.time()
        columns_candidates = []
        userprint("There are {} files with spectra to be loaded".format(
            len(args.input_spectra)))
        for index, spectra_filename in enumerate(args.input_spectra):
            userprint("Loading spectra from {} ({}/{})".format(
                spectra_filename, index, len(args.input_spectra)))
            t30 = time.time()
            spectra = Spectra.from_json(load_json(spectra_filename))
            if not isinstance(spectra, Spectra):
                raise Error("Invalid list of spectra")

            if index == 0:
                columns_candidates += spectra.spectra_list()[0].metadata_names(
                )

            # look for candidates
            userprint("Looking for candidates")
            candidates.find_candidates(spectra.spectra_list(),
                                       columns_candidates)

            t31 = time.time()
            userprint(
                f"INFO: time elapsed to find candidates from {spectra_filename}: "
                f"{(t31-t30)/60.0} minutes")

        t4 = time.time()
        userprint(
            f"INFO: time elapsed to find candidates: {(t4-t3)/60.0} minutes")

        # convert to dataframe
        userprint("Converting candidates to dataframe")
        t5 = time.time()
        candidates.candidates_list_to_dataframe(columns_candidates)
        t6 = time.time()
        userprint(
            f"INFO: time elapsed to convert candidates to dataframe: {(t6-t5)/60.0} minutes"
        )

    # train model
    userprint("Training model")
    t7 = time.time()
    candidates.train_model(args.model_fits)
    t8 = time.time()
    userprint(f"INFO: time elapsed to train model: {(t8-t7)/60.0} minutes")

    userprint(f"INFO: total elapsed time: {(t8-t0)/60.0} minutes")
    userprint("Done")